mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
commit
94f8f080c4
2
.gitignore
vendored
2
.gitignore
vendored
@ -37,3 +37,5 @@ nosetests.xml
|
||||
|
||||
# Other
|
||||
globalwb.py
|
||||
|
||||
.vagrant
|
||||
|
32
README.md
32
README.md
@ -58,17 +58,39 @@ To start a pywb with sample data
|
||||
2. Install with `python setup.py install`
|
||||
|
||||
3. Run pywb by via script `run.sh` (script currently assumes a default python and uwsgi install, feel free to edit as needed)
|
||||
|
||||
|
||||
4. Test pywb in your browser! (pywb is set to run on port 8080 by default.)
|
||||
|
||||
|
||||
If everything worked, the following pages should be loading (served from *sample_archive* dir):
|
||||
|
||||
| Original Url | Latest Capture | List of All Captures |
|
||||
| ------------- | ------------- | ----------------------- |
|
||||
| ------------- | ------------- | ----------------------- |
|
||||
| `http://example.com` | [http://localhost:8080/pywb/example.com](http://localhost:8080/pywb/example.com) | [http://localhost:8080/pywb/*/example.com](http://localhost:8080/pywb/*/example.com) |
|
||||
| `http://iana.org` | [http://localhost:8080/pywb/iana.org](http://localhost:8080/pywb/iana.org) | [http://localhost:8080/pywb/*/iana.org](http://localhost:8080/pywb/*/iana.org) |
|
||||
|
||||
|
||||
### Vagrant
|
||||
|
||||
pywb comes with a Vagrantfile to help you set up a VM quickly for testing.
|
||||
If you have [Vagrant](http://www.vagrantup.com/) and [VirtualBox](https://www.virtualbox.org/)
|
||||
installed, then you can start a test instance of pywb like so:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/ikreymer/pywb.git
|
||||
cd pywb
|
||||
vagrant up
|
||||
```
|
||||
|
||||
After pywb and all its dependencies are installed, the uwsgi server will start up and you should see:
|
||||
|
||||
```
|
||||
spawned uWSGI worker 1 (and the only) (pid: 123, cores: 1)
|
||||
```
|
||||
|
||||
At this point, you can open a web browser and navigate to `http://localhost:8080` for testing.
|
||||
|
||||
|
||||
### Automated Tests
|
||||
|
||||
Currently pywb consists of numerous doctests against the sample archive.
|
||||
@ -129,8 +151,8 @@ the location of those files.
|
||||
|
||||
#### SURT
|
||||
|
||||
By default, pywb expects the cdx files to be Sort-Friendly-Url-Transform (SURT) ordering.
|
||||
This is an ordering that transforms: `example.com` -> `com,example)/` to faciliate better search.
|
||||
By default, pywb expects the cdx files to be Sort-Friendly-Url-Transform (SURT) ordering.
|
||||
This is an ordering that transforms: `example.com` -> `com,example)/` to faciliate better search.
|
||||
It is recommended for future indexing, but is not required.
|
||||
|
||||
Non-SURT ordered cdx indexs will work as well, but be sure to specify:
|
||||
@ -164,7 +186,7 @@ The directions are for running in a shell:
|
||||
|
||||
4. Ensure sort order set to byte-order `export LC_ALL=C` to ensure proper sorting.
|
||||
|
||||
5. From the directory of the warc(s), run `<FULL PATH>/warctools/hanzo/cdx_writer mypath/warcs/mywarc.gz | sort > mypath/cdx/mywarc.cdx`
|
||||
5. From the directory of the warc(s), run `<FULL PATH>/warctools/hanzo/cdx_writer mypath/warcs/mywarc.gz | sort > mypath/cdx/mywarc.cdx`
|
||||
|
||||
This will create a sorted `mywarc.cdx` for `mywarc.gz`. Then point `pywb` to the `mypath/warcs` and `mypath/cdx` directories in the yaml config.
|
||||
|
||||
|
137
Vagrantfile
vendored
Normal file
137
Vagrantfile
vendored
Normal file
@ -0,0 +1,137 @@
|
||||
# -*- mode: ruby -*-
|
||||
# vi: set ft=ruby :
|
||||
|
||||
$script = <<SCRIPT
|
||||
apt-get update
|
||||
apt-get install -y python-dev
|
||||
apt-get install -y git
|
||||
apt-get install -y python-pip
|
||||
pip install virtualenv
|
||||
sudo -u vagrant virtualenv pywb_env
|
||||
echo Installing pywb and dependencies via pip... This may take a while.
|
||||
if [ ! -d pywb ]; then
|
||||
git clone https://github.com/ikreymer/pywb.git;
|
||||
fi;
|
||||
cd pywb
|
||||
sudo -u vagrant ../pywb_env/bin/pip install .
|
||||
sudo -u vagrant -H sh -c ". ../pywb_env/bin/activate; ./run.sh"
|
||||
SCRIPT
|
||||
|
||||
# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
|
||||
VAGRANTFILE_API_VERSION = "2"
|
||||
|
||||
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
|
||||
# All Vagrant configuration is done here. The most common configuration
|
||||
# options are documented and commented below. For a complete reference,
|
||||
# please see the online documentation at vagrantup.com.
|
||||
|
||||
# Every Vagrant virtual environment requires a box to build off of.
|
||||
config.vm.box = "precise32"
|
||||
|
||||
# The url from where the 'config.vm.box' box will be fetched if it
|
||||
# doesn't already exist on the user's system.
|
||||
config.vm.box_url = "http://files.vagrantup.com/precise32.box"
|
||||
|
||||
config.vm.provision "shell", inline: $script
|
||||
|
||||
# Create a forwarded port mapping which allows access to a specific port
|
||||
# within the machine from a port on the host machine. In the example below,
|
||||
# accessing "localhost:8080" will access port 80 on the guest machine.
|
||||
# config.vm.network :forwarded_port, guest: 80, host: 8080
|
||||
config.vm.network :forwarded_port, guest: 8080, host: 8080
|
||||
|
||||
# Create a private network, which allows host-only access to the machine
|
||||
# using a specific IP.
|
||||
# config.vm.network :private_network, ip: "192.168.33.10"
|
||||
|
||||
# Create a public network, which generally matched to bridged network.
|
||||
# Bridged networks make the machine appear as another physical device on
|
||||
# your network.
|
||||
# config.vm.network :public_network
|
||||
|
||||
# If true, then any SSH connections made will enable agent forwarding.
|
||||
# Default value: false
|
||||
# config.ssh.forward_agent = true
|
||||
|
||||
# Share an additional folder to the guest VM. The first argument is
|
||||
# the path on the host to the actual folder. The second argument is
|
||||
# the path on the guest to mount the folder. And the optional third
|
||||
# argument is a set of non-required options.
|
||||
# config.vm.synced_folder "../data", "/vagrant_data"
|
||||
|
||||
# Provider-specific configuration so you can fine-tune various
|
||||
# backing providers for Vagrant. These expose provider-specific options.
|
||||
# Example for VirtualBox:
|
||||
#
|
||||
# config.vm.provider :virtualbox do |vb|
|
||||
# # Don't boot with headless mode
|
||||
# vb.gui = true
|
||||
#
|
||||
# # Use VBoxManage to customize the VM. For example to change memory:
|
||||
# vb.customize ["modifyvm", :id, "--memory", "1024"]
|
||||
# end
|
||||
#
|
||||
# View the documentation for the provider you're using for more
|
||||
# information on available options.
|
||||
|
||||
# Enable provisioning with Puppet stand alone. Puppet manifests
|
||||
# are contained in a directory path relative to this Vagrantfile.
|
||||
# You will need to create the manifests directory and a manifest in
|
||||
# the file precise32.pp in the manifests_path directory.
|
||||
#
|
||||
# An example Puppet manifest to provision the message of the day:
|
||||
#
|
||||
# # group { "puppet":
|
||||
# # ensure => "present",
|
||||
# # }
|
||||
# #
|
||||
# # File { owner => 0, group => 0, mode => 0644 }
|
||||
# #
|
||||
# # file { '/etc/motd':
|
||||
# # content => "Welcome to your Vagrant-built virtual machine!
|
||||
# # Managed by Puppet.\n"
|
||||
# # }
|
||||
#
|
||||
# config.vm.provision :puppet do |puppet|
|
||||
# puppet.manifests_path = "manifests"
|
||||
# puppet.manifest_file = "site.pp"
|
||||
# end
|
||||
|
||||
# Enable provisioning with chef solo, specifying a cookbooks path, roles
|
||||
# path, and data_bags path (all relative to this Vagrantfile), and adding
|
||||
# some recipes and/or roles.
|
||||
#
|
||||
# config.vm.provision :chef_solo do |chef|
|
||||
# chef.cookbooks_path = "../my-recipes/cookbooks"
|
||||
# chef.roles_path = "../my-recipes/roles"
|
||||
# chef.data_bags_path = "../my-recipes/data_bags"
|
||||
# chef.add_recipe "mysql"
|
||||
# chef.add_role "web"
|
||||
#
|
||||
# # You may also specify custom JSON attributes:
|
||||
# chef.json = { :mysql_password => "foo" }
|
||||
# end
|
||||
|
||||
# Enable provisioning with chef server, specifying the chef server URL,
|
||||
# and the path to the validation key (relative to this Vagrantfile).
|
||||
#
|
||||
# The Opscode Platform uses HTTPS. Substitute your organization for
|
||||
# ORGNAME in the URL and validation key.
|
||||
#
|
||||
# If you have your own Chef Server, use the appropriate URL, which may be
|
||||
# HTTP instead of HTTPS depending on your configuration. Also change the
|
||||
# validation key to validation.pem.
|
||||
#
|
||||
# config.vm.provision :chef_client do |chef|
|
||||
# chef.chef_server_url = "https://api.opscode.com/organizations/ORGNAME"
|
||||
# chef.validation_key_path = "ORGNAME-validator.pem"
|
||||
# end
|
||||
#
|
||||
# If you're using the Opscode platform, your validator client is
|
||||
# ORGNAME-validator, replacing ORGNAME with your organization name.
|
||||
#
|
||||
# If you have your own Chef Server, the default validation client name is
|
||||
# chef-validator, unless you changed the configuration.
|
||||
#
|
||||
# chef.validation_client_name = "ORGNAME-validator"
|
||||
end
|
Loading…
x
Reference in New Issue
Block a user