1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

Merge pull request #22 from rajbot/master

Add Vagrantfile
This commit is contained in:
ikreymer 2014-02-04 13:19:34 -08:00
commit 94f8f080c4
3 changed files with 166 additions and 5 deletions

2
.gitignore vendored
View File

@ -37,3 +37,5 @@ nosetests.xml
# Other
globalwb.py
.vagrant

View File

@ -58,17 +58,39 @@ To start a pywb with sample data
2. Install with `python setup.py install`
3. Run pywb by via script `run.sh` (script currently assumes a default python and uwsgi install, feel free to edit as needed)
4. Test pywb in your browser! (pywb is set to run on port 8080 by default.)
If everything worked, the following pages should be loading (served from *sample_archive* dir):
| Original Url | Latest Capture | List of All Captures |
| ------------- | ------------- | ----------------------- |
| ------------- | ------------- | ----------------------- |
| `http://example.com` | [http://localhost:8080/pywb/example.com](http://localhost:8080/pywb/example.com) | [http://localhost:8080/pywb/*/example.com](http://localhost:8080/pywb/*/example.com) |
| `http://iana.org` | [http://localhost:8080/pywb/iana.org](http://localhost:8080/pywb/iana.org) | [http://localhost:8080/pywb/*/iana.org](http://localhost:8080/pywb/*/iana.org) |
### Vagrant
pywb comes with a Vagrantfile to help you set up a VM quickly for testing.
If you have [Vagrant](http://www.vagrantup.com/) and [VirtualBox](https://www.virtualbox.org/)
installed, then you can start a test instance of pywb like so:
```bash
git clone https://github.com/ikreymer/pywb.git
cd pywb
vagrant up
```
After pywb and all its dependencies are installed, the uwsgi server will start up and you should see:
```
spawned uWSGI worker 1 (and the only) (pid: 123, cores: 1)
```
At this point, you can open a web browser and navigate to `http://localhost:8080` for testing.
### Automated Tests
Currently pywb consists of numerous doctests against the sample archive.
@ -129,8 +151,8 @@ the location of those files.
#### SURT
By default, pywb expects the cdx files to be Sort-Friendly-Url-Transform (SURT) ordering.
This is an ordering that transforms: `example.com` -> `com,example)/` to faciliate better search.
By default, pywb expects the cdx files to be Sort-Friendly-Url-Transform (SURT) ordering.
This is an ordering that transforms: `example.com` -> `com,example)/` to faciliate better search.
It is recommended for future indexing, but is not required.
Non-SURT ordered cdx indexs will work as well, but be sure to specify:
@ -164,7 +186,7 @@ The directions are for running in a shell:
4. Ensure sort order set to byte-order `export LC_ALL=C` to ensure proper sorting.
5. From the directory of the warc(s), run `<FULL PATH>/warctools/hanzo/cdx_writer mypath/warcs/mywarc.gz | sort > mypath/cdx/mywarc.cdx`
5. From the directory of the warc(s), run `<FULL PATH>/warctools/hanzo/cdx_writer mypath/warcs/mywarc.gz | sort > mypath/cdx/mywarc.cdx`
This will create a sorted `mywarc.cdx` for `mywarc.gz`. Then point `pywb` to the `mypath/warcs` and `mypath/cdx` directories in the yaml config.

137
Vagrantfile vendored Normal file
View File

@ -0,0 +1,137 @@
# -*- mode: ruby -*-
# vi: set ft=ruby :
$script = <<SCRIPT
apt-get update
apt-get install -y python-dev
apt-get install -y git
apt-get install -y python-pip
pip install virtualenv
sudo -u vagrant virtualenv pywb_env
echo Installing pywb and dependencies via pip... This may take a while.
if [ ! -d pywb ]; then
git clone https://github.com/ikreymer/pywb.git;
fi;
cd pywb
sudo -u vagrant ../pywb_env/bin/pip install .
sudo -u vagrant -H sh -c ". ../pywb_env/bin/activate; ./run.sh"
SCRIPT
# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
VAGRANTFILE_API_VERSION = "2"
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
# All Vagrant configuration is done here. The most common configuration
# options are documented and commented below. For a complete reference,
# please see the online documentation at vagrantup.com.
# Every Vagrant virtual environment requires a box to build off of.
config.vm.box = "precise32"
# The url from where the 'config.vm.box' box will be fetched if it
# doesn't already exist on the user's system.
config.vm.box_url = "http://files.vagrantup.com/precise32.box"
config.vm.provision "shell", inline: $script
# Create a forwarded port mapping which allows access to a specific port
# within the machine from a port on the host machine. In the example below,
# accessing "localhost:8080" will access port 80 on the guest machine.
# config.vm.network :forwarded_port, guest: 80, host: 8080
config.vm.network :forwarded_port, guest: 8080, host: 8080
# Create a private network, which allows host-only access to the machine
# using a specific IP.
# config.vm.network :private_network, ip: "192.168.33.10"
# Create a public network, which generally matched to bridged network.
# Bridged networks make the machine appear as another physical device on
# your network.
# config.vm.network :public_network
# If true, then any SSH connections made will enable agent forwarding.
# Default value: false
# config.ssh.forward_agent = true
# Share an additional folder to the guest VM. The first argument is
# the path on the host to the actual folder. The second argument is
# the path on the guest to mount the folder. And the optional third
# argument is a set of non-required options.
# config.vm.synced_folder "../data", "/vagrant_data"
# Provider-specific configuration so you can fine-tune various
# backing providers for Vagrant. These expose provider-specific options.
# Example for VirtualBox:
#
# config.vm.provider :virtualbox do |vb|
# # Don't boot with headless mode
# vb.gui = true
#
# # Use VBoxManage to customize the VM. For example to change memory:
# vb.customize ["modifyvm", :id, "--memory", "1024"]
# end
#
# View the documentation for the provider you're using for more
# information on available options.
# Enable provisioning with Puppet stand alone. Puppet manifests
# are contained in a directory path relative to this Vagrantfile.
# You will need to create the manifests directory and a manifest in
# the file precise32.pp in the manifests_path directory.
#
# An example Puppet manifest to provision the message of the day:
#
# # group { "puppet":
# # ensure => "present",
# # }
# #
# # File { owner => 0, group => 0, mode => 0644 }
# #
# # file { '/etc/motd':
# # content => "Welcome to your Vagrant-built virtual machine!
# # Managed by Puppet.\n"
# # }
#
# config.vm.provision :puppet do |puppet|
# puppet.manifests_path = "manifests"
# puppet.manifest_file = "site.pp"
# end
# Enable provisioning with chef solo, specifying a cookbooks path, roles
# path, and data_bags path (all relative to this Vagrantfile), and adding
# some recipes and/or roles.
#
# config.vm.provision :chef_solo do |chef|
# chef.cookbooks_path = "../my-recipes/cookbooks"
# chef.roles_path = "../my-recipes/roles"
# chef.data_bags_path = "../my-recipes/data_bags"
# chef.add_recipe "mysql"
# chef.add_role "web"
#
# # You may also specify custom JSON attributes:
# chef.json = { :mysql_password => "foo" }
# end
# Enable provisioning with chef server, specifying the chef server URL,
# and the path to the validation key (relative to this Vagrantfile).
#
# The Opscode Platform uses HTTPS. Substitute your organization for
# ORGNAME in the URL and validation key.
#
# If you have your own Chef Server, use the appropriate URL, which may be
# HTTP instead of HTTPS depending on your configuration. Also change the
# validation key to validation.pem.
#
# config.vm.provision :chef_client do |chef|
# chef.chef_server_url = "https://api.opscode.com/organizations/ORGNAME"
# chef.validation_key_path = "ORGNAME-validator.pem"
# end
#
# If you're using the Opscode platform, your validator client is
# ORGNAME-validator, replacing ORGNAME with your organization name.
#
# If you have your own Chef Server, the default validation client name is
# chef-validator, unless you changed the configuration.
#
# chef.validation_client_name = "ORGNAME-validator"
end