mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 08:04:49 +01:00
Compare commits
183 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
7b0f8b5860 | ||
|
b44c93bf6e | ||
|
97fffe3a34 | ||
|
6205646b9b | ||
|
23891be2f1 | ||
|
b190dddee9 | ||
|
b9f1609df9 | ||
|
e89924bd39 | ||
|
b4c91c6633 | ||
|
1e2665af13 | ||
|
fee14d7fe8 | ||
|
5712945991 | ||
|
2fd6190b72 | ||
|
791a8d1033 | ||
|
86ee3bd752 | ||
|
d1e1636ae3 | ||
|
b4955cca66 | ||
|
f40e7ef18c | ||
|
6b4f9b323e | ||
|
7879dd0222 | ||
|
013746c10a | ||
|
79140441df | ||
|
af92a9726e | ||
|
83b2113be2 | ||
|
ed36830dc5 | ||
|
81b6a57dfb | ||
|
5c427b9ff2 | ||
|
454486bf75 | ||
|
b8693307d1 | ||
|
98be48d6e4 | ||
|
c441d83435 | ||
|
4a3e7ddff7 | ||
|
02288db81c | ||
|
4fc2b451d7 | ||
|
c8e78fd7c1 | ||
|
d44d640b93 | ||
|
03f9708d8d | ||
|
406fad95c2 | ||
|
d207c76bae | ||
|
131732d238 | ||
|
59d9beac05 | ||
|
0758e81b62 | ||
|
d392a8d908 | ||
|
9bc8a2e1ef | ||
|
43e5c8bac0 | ||
|
cdab280669 | ||
|
e6ec8b4aeb | ||
|
1790fd006a | ||
|
3d0673e32a | ||
|
3050fd2b2b | ||
|
3c94da04a2 | ||
|
2d19b6b18d | ||
|
6cc9cdc3ad | ||
|
138e2b284d | ||
|
3b49c2229e | ||
|
fec9cef818 | ||
|
d81c2f0303 | ||
|
3d8015c444 | ||
|
91cf74a2a9 | ||
|
373eca641c | ||
|
2ad7eaee4b | ||
|
ca6587caac | ||
|
e20fac2c75 | ||
|
c28941a0b6 | ||
|
ff7783aa74 | ||
|
5e2f47a049 | ||
|
19032e4512 | ||
|
72cb588936 | ||
|
14e464bd1c | ||
|
dc81e78393 | ||
|
6260b226ce | ||
|
29860bcb24 | ||
|
790487ca15 | ||
|
028e7102c0 | ||
|
1dedc46dce | ||
|
f96707d039 | ||
|
ca68cf0da1 | ||
|
815ea92fc2 | ||
|
98378a8845 | ||
|
6e7a8b1e59 | ||
|
1fddec216d | ||
|
8ef4ff102d | ||
|
16135d956a | ||
|
1249b41dba | ||
|
2ccd8eb2c3 | ||
|
f0340c6898 | ||
|
c121198183 | ||
|
0cc912da95 | ||
|
f190190128 | ||
|
49393ce16a | ||
|
a97ad7ebbe | ||
|
4f1a6303fa | ||
|
7432299079 | ||
|
7b00d0627e | ||
|
510c9dc9f1 | ||
|
fbed87aa46 | ||
|
4ac580e401 | ||
|
8e06c2f351 | ||
|
12a9e32129 | ||
|
32e9020fd2 | ||
|
62633a48c4 | ||
|
4f44c2ec98 | ||
|
09f7084aa1 | ||
|
403167fbe0 | ||
|
63ac82ee6f | ||
|
0c3eb4ce94 | ||
|
42445562da | ||
|
0f05dbde55 | ||
|
825e4e54ab | ||
|
38b1952d34 | ||
|
c42833d4ad | ||
|
ddcbde573c | ||
|
6bde8fd8c4 | ||
|
7ff789f1a8 | ||
|
c0519a53c3 | ||
|
de9b9310d4 | ||
|
0c4e406876 | ||
|
c97a66703b | ||
|
5c35a43dac | ||
|
e64e58f040 | ||
|
a6be76642a | ||
|
96de80f83e | ||
|
b28c8f1748 | ||
|
b2a460c33c | ||
|
342007244b | ||
|
98c6fba44d | ||
|
a0faf904ef | ||
|
3e5d97f70b | ||
|
843fe28ed8 | ||
|
096850b41d | ||
|
81308780ec | ||
|
cff2a9efc5 | ||
|
3ca765f847 | ||
|
f9f5d2dc33 | ||
|
f7bd84cdac | ||
|
9587954856 | ||
|
12fcc87962 | ||
|
0eedd1502f | ||
|
d95b79a8ab | ||
|
f07d35709a | ||
|
818b518765 | ||
|
551b8fe026 | ||
|
abb76911f5 | ||
|
626da99899 | ||
|
106a9e9200 | ||
|
5d34018b9f | ||
|
ad9b431eaf | ||
|
c5c4a54e7d | ||
|
73d6735bed | ||
|
cdb17c4000 | ||
|
7ce4573c70 | ||
|
212691bd38 | ||
|
13ea5baee5 | ||
|
c62b1bc987 | ||
|
4224cdd7e5 | ||
|
ca14bdd8b2 | ||
|
084be82550 | ||
|
662fc747bf | ||
|
b475d85c4f | ||
|
78a9888b46 | ||
|
aee458b7f5 | ||
|
94f6273a91 | ||
|
087ef2f261 | ||
|
69654fd013 | ||
|
e1cad621b9 | ||
|
ddf3207e40 | ||
|
04d0586244 | ||
|
4683d95580 | ||
|
841c02c123 | ||
|
07fb6bbf1d | ||
|
a0aaa7558d | ||
|
f628b40e02 | ||
|
b66608c5f3 | ||
|
de81efac78 | ||
|
9e09bcd2a7 | ||
|
7b51101b04 | ||
|
195e85ea9d | ||
|
54d8bccf4a | ||
|
bb1c2a3ec9 | ||
|
3f3f8caef1 | ||
|
9b8c187b3a | ||
|
2e35c3e1ed | ||
|
94b7fdcf97 |
@ -4,6 +4,8 @@ karma-tests/
|
||||
tests_disabled/
|
||||
venv/
|
||||
collections/
|
||||
wombat/
|
||||
docs/
|
||||
|
||||
.cache/
|
||||
.eggs/
|
||||
|
32
.github/workflows/ci.yaml
vendored
Normal file
32
.github/workflows/ci.yaml
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
name: CI
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
unit-tests:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
max-parallel: 3
|
||||
matrix:
|
||||
python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
|
||||
|
||||
steps:
|
||||
- name: checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v1
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install tox tox-gh-actions
|
||||
|
||||
- name: Test with tox
|
||||
run: tox
|
||||
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v1
|
||||
|
33
.github/workflows/publish_pypi.yaml
vendored
Normal file
33
.github/workflows/publish_pypi.yaml
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
name: Publish to PYPI
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
jobs:
|
||||
pypi-release:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.9]
|
||||
|
||||
steps:
|
||||
- name: checkout
|
||||
uses: actions/checkout@v1
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v1
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: python -m pip install --upgrade pip wheel twine
|
||||
|
||||
- name: Build Dist
|
||||
run: python setup.py sdist bdist_wheel --universal
|
||||
|
||||
- name: Publish package to TestPyPI
|
||||
uses: pypa/gh-action-pypi-publish@master
|
||||
with:
|
||||
user: __token__
|
||||
password: ${{ secrets.PYPI_API_TOKEN }}
|
||||
|
43
.github/workflows/release.yaml
vendored
Normal file
43
.github/workflows/release.yaml
vendored
Normal file
@ -0,0 +1,43 @@
|
||||
name: Publish Docker image
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
jobs:
|
||||
push_to_registries:
|
||||
name: Build pywb Docker image for release and push to Dockerhub
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
name: Check out the repo
|
||||
uses: actions/checkout@v2
|
||||
-
|
||||
name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v3
|
||||
with:
|
||||
images: webrecorder/pywb
|
||||
tags: |
|
||||
type=match,pattern=v-(.*),group=1
|
||||
-
|
||||
name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v1
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v1
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
|
4
.gitignore
vendored
4
.gitignore
vendored
@ -53,3 +53,7 @@ git_hash.py
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/*
|
||||
|
||||
# virtualenvs
|
||||
env/
|
||||
venv/
|
||||
|
329
CHANGES.rst
329
CHANGES.rst
@ -1,3 +1,326 @@
|
||||
pywb 2.7.3 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* issue_792 catch warcio exception by @oskarhek in https://github.com/webrecorder/pywb/pull/793
|
||||
* Add ui.logo_home_url as config.yaml option by @tw4l in https://github.com/webrecorder/pywb/pull/791
|
||||
* [#795] Show error when adding duplicate warc file by @kuechensofa in https://github.com/webrecorder/pywb/pull/797
|
||||
* Make search page more intuitive by @krakan in https://github.com/webrecorder/pywb/pull/794
|
||||
* Modify search template buttons by @tw4l in https://github.com/webrecorder/pywb/pull/801
|
||||
* [#804] Use default_locale when lang not set in the request by @krakan in https://github.com/webrecorder/pywb/pull/805
|
||||
* feat: regex substitution on surt rules match by @mijho in https://github.com/webrecorder/pywb/pull/780
|
||||
* Bump minimatch from 3.0.4 to 3.1.2 in /pywb/vueui by @dependabot in https://github.com/webrecorder/pywb/pull/777
|
||||
* Bump decode-uri-component from 0.2.0 to 0.2.2 in /pywb/vueui by @dependabot in https://github.com/webrecorder/pywb/pull/786
|
||||
* rules: add 'debugNoBatch' rewrite for fb and insta by @ikreymer in https://github.com/webrecorder/pywb/pull/806
|
||||
* Vue main order by @tw4l in https://github.com/webrecorder/pywb/pull/809
|
||||
* wombat: bump to 3.4.4 https://github.com/webrecorder/pywb/pull/808
|
||||
|
||||
pywb 2.7.2 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* Fix regression introduced by improper wombat update in 2.7.1
|
||||
* Fix `redirect_to_exact: false` functionality: if not set, UI will stay on current timestamp, but will display info on actual capture.
|
||||
* Location bar nav now keeps current timestamp instead of defaulting to calendar view.
|
||||
* 'Live' mode fixes, no longer cache live cdx entry, don't add timestamp when navigating in live mode without timestamp
|
||||
* Calendar dropdown on replay now scrollable.
|
||||
* Timeline toggle on replay is 'sticky', will stay on if toggled on replay.
|
||||
* Capture text: use '|' as in 'Current Capture: [title] | [capture date]'
|
||||
* Document title: Add 'Archived Page: ' prefix to avoid confusion with live pages.
|
||||
|
||||
pywb 2.7.1 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* Add locale-dependent handling of first day of week by @krakan in https://github.com/webrecorder/pywb/pull/781
|
||||
* Make filter expressions translatable by @krakan in https://github.com/webrecorder/pywb/pull/783
|
||||
* Add title to top frame in framed replay
|
||||
* Add missing tooltip translation strings
|
||||
* Fix calendar and timeline rendering for replay URLs without a timestamp
|
||||
* Update template documentation
|
||||
|
||||
pywb 2.7.0 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* New banner and calendar implementation in Vue.js, which supports localization/internationalization and easier local theming by @vanecat @ikreymer @tw4l with helpful feedback from @ldko
|
||||
* New interactive timeline to assist in navigating between captures
|
||||
* Add basic development Docker Compose configuration file
|
||||
* Update documentation
|
||||
* Add contributing guide
|
||||
|
||||
pywb 2.6.9 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* eval rewrite update + latest wombat by @ikreymer in https://github.com/webrecorder/pywb/pull/763
|
||||
* Rewrite: Support target rewriting, open new windows in top-frame instead by @tw4l in https://github.com/webrecorder/pywb/pull/767
|
||||
* Add arm64 platform support by @luandro in https://github.com/webrecorder/pywb/pull/775
|
||||
* Add uwsgi virtualenv information by @tw4l in https://github.com/webrecorder/pywb/pull/770
|
||||
* update to wombat 3.3.11 to support additional replay improvements
|
||||
* automated pypi publish on release https://github.com/webrecorder/pywb/pull/776
|
||||
|
||||
pywb 2.6.8 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* Upgrade webassets to v2.0 by @m4rk3r in https://github.com/webrecorder/pywb/pull/730
|
||||
* Encoding image 'srcset' value including the intrinsic width by @yasarkunduz in https://github.com/webrecorder/pywb/pull/712
|
||||
* Prevent jinja2 from escaping HTML markup in collection metadata by @tw4l in https://github.com/webrecorder/pywb/pull/747
|
||||
* Increase uwsgi_buffer_size for nginx config by @edsu in https://github.com/webrecorder/pywb/pull/716
|
||||
* Add missing translation for the filter-epression field placeholder by @krakan in https://github.com/webrecorder/pywb/pull/721
|
||||
* Activate field validation when expanding the advanced options by @krakan in https://github.com/webrecorder/pywb/pull/722
|
||||
* S3 loader to use boto3 built-in credential configuration by @sebastian-nagel in https://github.com/webrecorder/pywb/pull/723
|
||||
* describing installation using pip by @sepastian in https://github.com/webrecorder/pywb/pull/726
|
||||
* Add missing org/image to docker run commands by @heyvito in https://github.com/webrecorder/pywb/pull/733
|
||||
* Format error messages by @edsu in https://github.com/webrecorder/pywb/pull/737
|
||||
* Ensure CDX status is a string by @edsu in https://github.com/webrecorder/pywb/pull/739
|
||||
* Improve replay banner's accessibility by @lwrubel in https://github.com/webrecorder/pywb/pull/742
|
||||
* Revisit headers load fix by @ikreymer in https://github.com/webrecorder/pywb/pull/751
|
||||
* Enable translation for the remaining strings on the search results page by @krakan in https://github.com/webrecorder/pywb/pull/752
|
||||
* revisit of redirect optimization: by @ikreymer in https://github.com/webrecorder/pywb/pull/753
|
||||
* proxy: add COEP header for proxy mode to avoid errors by @ikreymer in https://github.com/webrecorder/pywb/pull/755
|
||||
* tests run improvements: update from python setup.py test -> tox by @ikreymer in https://github.com/webrecorder/pywb/pull/754
|
||||
* rewrite: detect edge-case where html starts with bom followed by @ikreymer in https://github.com/webrecorder/pywb/pull/758
|
||||
* tests options: add PYWB_NO_VERIFY_SSL env var for tests to avoid fail… by @ikreymer in https://github.com/webrecorder/pywb/pull/760
|
||||
* rewriting fix: twitter video in embedded tweets by @ikreymer in https://github.com/webrecorder/pywb/pull/761
|
||||
* Add ir_ modifier by @ikreymer in https://github.com/webrecorder/pywb/pull/759
|
||||
* Remove unused Appveyor badge
|
||||
|
||||
pywb 2.6.7 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* dependency: bump gevent to latest (21.12.0)
|
||||
* rewrite: fix eval rewriting where '._eval' was accidentally being rewritten
|
||||
* post-to-get conversion: properly handle json with top-level lists, to match cdxj-indexer, print parse errors, fixes `#709 <https://github.com/webrecorder/pywb/pull/709>`_
|
||||
|
||||
pywb 2.6.6 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* dependency: don't use obsolete werkzeug useragent package `#704 <https://github.com/webrecorder/pywb/pull/704>`_
|
||||
* fix user-agent detection: use ua-parser module, default to new js-proxy mode, unless older browser detected `#707 <https://github.com/webrecorder/pywb/pull/707>`_
|
||||
* fix tests: disable broken s3 tests for now
|
||||
* Dockerfile: use python 3.8 by default
|
||||
|
||||
pywb 2.6.5 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* fix build: add 'markupsafe<2.1.0' to requirements
|
||||
|
||||
|
||||
pywb 2.6.4 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* wombat.js: actually update to 3.3.6, update built wombat.js
|
||||
|
||||
* Fix live mode when ``redirect_to_exact`` is enabled `#692 <https://github.com/webrecorder/pywb/pull/692>`_
|
||||
|
||||
* Rules: additional fuzzy ignore of facebook query param: `#691 <https://github.com/webrecorder/pywb/pull/691>`_
|
||||
|
||||
* Docs: typo fixes: `#669 <https://github.com/webrecorder/pywb/pull/669>`_, `#670 <https://github.com/webrecorder/pywb/pull/670>`_
|
||||
|
||||
|
||||
pywb 2.6.3 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* Fix false-positive rewriting of ``location`` through additional check if local var is used, fixes `#684 <https://github.com/webrecorder/pywb/pull/684>`_
|
||||
|
||||
* Fix missing localization of placeholder, fixes `#685 <https://github.com/webrecorder/pywb/pull/685>`_
|
||||
|
||||
* Fix regression caused by 2.6.2, ensure pywb.app_prefix, pywb.host_prefix and pywb.static_prefix paths set correctly for all pages `#688 <https://github.com/webrecorder/pywb/pull/688>`_, fixes `#686 <https://github.com/webrecorder/pywb/pull/686>`_
|
||||
|
||||
* Documentation: Fixes to ``cdx-indexer`` helped (from @ldko) `#683 <https://github.com/webrecorder/pywb/pull/683>`_
|
||||
|
||||
* Update wombat.js to 3.3.6
|
||||
|
||||
* Add automatic Docker push on new GitHub release
|
||||
|
||||
|
||||
pywb 2.6.2 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Fix regression caused by 2.6.1, with static files not being loaded correctly. `#678 <https://github.com/webrecorder/pywb/pull/678>`_
|
||||
|
||||
|
||||
pywb 2.6.1 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* Domain-Specific Rewriting Rules: Rewrite twitter video to capture full videos.
|
||||
|
||||
* Disable rewriting ``data-`` attributes, better fidelity without rewriting, fixes `#676 <https://github.com/webrecorder/pywb/pull/676>`_
|
||||
|
||||
* Fix regression in autoescaping URL in frame_insert.html
|
||||
|
||||
* Feature: ability to set path used to serve static assets (default ``static``) via ``static_prefix`` config option.
|
||||
|
||||
* Update wombat.js 3.3.4 (includes various rewriting fixes)
|
||||
|
||||
|
||||
pywb 2.6.0 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* Improvements for eval() rewriting + extra unnamed scope to avoid variable collision `#668 <https://github.com/webrecorder/pywb/pull/668>`_
|
||||
|
||||
* fix for documentation links `#666 <https://github.com/webrecorder/pywb/pull/666>`_
|
||||
|
||||
* Update to latest wombat.js (3.3.0)
|
||||
|
||||
|
||||
pywb 2.6.0b4 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* Update rules for IG rewriting to disable Dash `#662 <https://github.com/webrecorder/pywb/pull/662>`_
|
||||
|
||||
* Support for adding custom resource records via PUT ``/<coll>/record`` `#661 <https://github.com/webrecorder/pywb/pull/661>`_
|
||||
|
||||
* Fixes for URL encoding for query and remote index `#657 <https://github.com/webrecorder/pywb/pull/657>`_ and `#658 <https://github.com/webrecorder/pywb/pull/658>`_
|
||||
|
||||
* Doc fixes for incorrect param name `#651 <https://github.com/webrecorder/pywb/pull/651>`_
|
||||
|
||||
* Update to latest wombat.js (3.2.2)
|
||||
|
||||
|
||||
pywb 2.6.0b3 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* Display 'ignoring locales' warning only if locales specified (don't specify any by default)
|
||||
|
||||
* Add -V flag to wb-manager and pywb/wayback commands to display current version and exit
|
||||
|
||||
|
||||
pywb 2.6.0b2 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* Update documentation for CDX Server API (by @sebastian-nagel) `#651 <https://github.com/webrecorder/pywb/pull/651>`_
|
||||
|
||||
Localization fixes: `#653 <https://github.com/webrecorder/pywb/pull/653>`_
|
||||
|
||||
* Ensure banner template is not autoescaped
|
||||
|
||||
* Don't show locale switch on not found pages (redundant with banner)
|
||||
|
||||
* Ensure wb-manager works when optional i18n dependencies are not installed
|
||||
|
||||
|
||||
pywb 2.6.0b1 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Additional documentation / localization fixes `#650 <https://github.com/webrecorder/pywb/pull/650>`_
|
||||
|
||||
* Ensure home page and error page keeps locale, language switching is working.
|
||||
|
||||
* Add autoescaping to Jinja2 to avoid XSS issues (suggested by @sebastian-nagel)
|
||||
|
||||
* Add support for 'pywb[i18n]' extra to install localization dependencies
|
||||
|
||||
Documentation typo fixes (by @ldko, `#649 <https://github.com/webrecorder/pywb/pull/649>`_)
|
||||
|
||||
|
||||
pywb 2.6.0b0 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Documentation Updates:
|
||||
|
||||
* `Embargo + ACL system updates <https://pywb.readthedocs.io/en/latest/manual/access-control.html>`_
|
||||
|
||||
* `New ACL header configuration <https://pywb.readthedocs.io/en/latest/manual/usage.html#config-acl-header>`_
|
||||
|
||||
* `Locaalization / Multi-lingual Support Guide <https://pywb.readthedocs.io/en/latest/manual/localization.html>`_
|
||||
|
||||
|
||||
Localization Improvements: (`#647 <https://github.com/webrecorder/pywb/pull/647>`_)
|
||||
|
||||
* Support for extracting, updating, listing and removing localizable commands via ``wb-manager i18n`` command.
|
||||
|
||||
* UI: Add language switch header to all UI templates.
|
||||
|
||||
* Mark localizable strings in translatable in existing templates.
|
||||
|
||||
|
||||
Access Control Improvements:
|
||||
|
||||
* Support for Embargo System for date-based embargo, overridable via ACL ``allow_ignore_embargo`` `#642 <https://github.com/webrecorder/pywb/pull/642>`_
|
||||
|
||||
* Support for custom ACL 'user' specified via ``X-pywb-ACL-User`` header passed from frontend proxies.
|
||||
|
||||
* Fixes for exact rule matching `#629 <https://github.com/webrecorder/pywb/pull/629>`_
|
||||
|
||||
* Fixes for ACL for auto-collections `#620 <https://github.com/webrecorder/pywb/pull/620>`_
|
||||
|
||||
|
||||
Rewriting Improvements:
|
||||
|
||||
* Updated YT rewriting rules `#635 <https://github.com/webrecorder/pywb/pull/635>`_
|
||||
|
||||
* POST-to-get rewriting consistent with cdxj-indexer, wabac.js/replayweb.page `#636 <https://github.com/webrecorder/pywb/pull/636>`_
|
||||
|
||||
* Improved fuzzy matching to ensure non-POST requests handled via fuzzy matching.
|
||||
|
||||
* Live web: never truncate when reading POST request to avoid hung requests! (Apply limit only on indexing
|
||||
|
||||
|
||||
CDX Server / API Compatibility Fixes:
|
||||
|
||||
* XmlQuery: set WARC record length field, if available `#633 <https://github.com/webrecorder/pywb/pull/633>`_
|
||||
|
||||
* ZipNum: Don't count pages with filter `#631 <https://github.com/webrecorder/pywb/pull/631>`_
|
||||
|
||||
* Better handle of CDX Server HTTP status `#624 <https://github.com/webrecorder/pywb/pull/624>`_
|
||||
|
||||
* Better handling of errors from CDX Server API with 400 `#623 <https://github.com/webrecorder/pywb/pull/623>`_, `#625 <https://github.com/webrecorder/pywb/pull/625>`_, `#626 <https://github.com/webrecorder/pywb/pull/626>`_, `#630 <https://github.com/webrecorder/pywb/pull/630>`_
|
||||
|
||||
* Backwards compatibility of ``fl`` param `#621 <https://github.com/webrecorder/pywb/pull/621>`_
|
||||
|
||||
|
||||
Recording Redis Dedup mode:
|
||||
|
||||
* Fix dedup index config loading `#617 <https://github.com/webrecorder/pywb/pull/617>`_
|
||||
|
||||
* Add recording size counter to track any in-flight requests `#637 <https://github.com/webrecorder/pywb/pull/637>`_
|
||||
|
||||
|
||||
pywb 2.5.0 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* Update to latest wombat.js (3.0.3)
|
||||
|
||||
* Dedup Mode: Support for Redis-based dedup index to skip or write revisit records for duplicates, replay from Redis-based index `#597 <https://github.com/webrecorder/pywb/pull/597>`_, `#611 <https://github.com/webrecorder/pywb/pull/611>`_
|
||||
|
||||
* Rewriting: Updated Rules for youtube and vimeo replay `#610 <https://github.com/webrecorder/pywb/pull/610>`_
|
||||
|
||||
* CDX Indexing: More efficint cdx sorting `#609 <https://github.com/webrecorder/pywb/pull/609>`_
|
||||
|
||||
* Set default CDX closest lookup limit to 100 instead of 10 `#606 <https://github.com/webrecorder/pywb/pull/606>`_
|
||||
|
||||
* UI: Try to avoid css class conflicts in injected banner `#604 <https://github.com/webrecorder/pywb/pull/604>`_
|
||||
|
||||
* Catch invalid headers in uWSGI `#603 <https://github.com/webrecorder/pywb/pull/603>`_
|
||||
|
||||
* Config option to support certificate validation when capturing `#596 <https://github.com/webrecorder/pywb/pull/596>`_
|
||||
|
||||
* Fix indexing POST requests with multipart/form-data without boundary `#599 <https://github.com/webrecorder/pywb/pull/599>`_
|
||||
|
||||
* New OpenWayback->pywb Transition Guide: `https://pywb.readthedocs.io/en/latest/manual/owb-transition.html <https://pywb.readthedocs.io/en/latest/manual/owb-transition.html>`_
|
||||
|
||||
* Sample deployments with Docker Compose for running with Apache, Nginx and OutbackCDX in ``sample-deploy`` directory.
|
||||
|
||||
* Update to latest gevent to fix issues with latest python `#583 <https://github.com/webrecorder/pywb/pull/583>`_
|
||||
|
||||
|
||||
pywb 2.4.2 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* ensure RemoteCDXIndexSource also passes ``matchType`` to upstream
|
||||
|
||||
* cdx-indexer: use ``-o`` flag to specify output, not first param (output to stdout by default)
|
||||
|
||||
* static paths cleanup, move ``url-polyfill.min.js`` to correct dir (fixes `#571 <https://github.com/webrecorder/pywb/issues/571>`_)
|
||||
|
||||
* minor fixes to docs
|
||||
|
||||
* logo: resize new logo to actual size, add logo via absolute link to ensure it works on pypi also
|
||||
|
||||
|
||||
pywb 2.4.1 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* Minor fix: allow timegate content check in `#564 <https://github.com/webrecorder/pywb/pull/564>`_ to be ignored (for use with derived classes)
|
||||
|
||||
|
||||
pywb 2.4.0 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
@ -858,7 +1181,7 @@ pywb 0.9.6 changelist
|
||||
pywb 0.9.5 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* s3 loading: support ``s3://`` scheme in block loader, allowing for loading index and archive files from s3. ``boto`` library must be installed seperately
|
||||
* s3 loading: support ``s3://`` scheme in block loader, allowing for loading index and archive files from s3. ``boto`` library must be installed separately
|
||||
via ``pip install boto``. Attempt default boto auth path, and if that fails, attempt anonymous s3 connection.
|
||||
|
||||
* Wombat/Client-Side Rewrite Customizations: New ``rewrite_opts.client`` settings from ``config.yaml`` are passed directly to wombat as json.
|
||||
@ -954,7 +1277,7 @@ pywb 0.9.1 changelist
|
||||
|
||||
* cdx server query: add support for ``url=*.host`` and ``url=host/*`` as shortcuts for ``matchType=domain`` and ``matchType=prefix``
|
||||
|
||||
* zipnum cdx cluster: support loading index shared from prefix path instead of seperate location file.
|
||||
* zipnum cdx cluster: support loading index shared from prefix path instead of separate location file.
|
||||
|
||||
The ``shard_index_loc`` config property may contain match and replace properties.
|
||||
Regex replacement is then used to obtain path prefix from the shard prefix path.
|
||||
@ -1320,7 +1643,7 @@ pywb 0.4.7 changelist
|
||||
|
||||
* Rewrite: Parsing of html as raw bytes instead of decode/encode, detection still needed for non-ascii compatible encoding.
|
||||
|
||||
* Indexing: Refactoring of cdx-indexer using a seperate 'archive record iterator' and pluggable cdx writer classes. Groundwork for creating custom indexers.
|
||||
* Indexing: Refactoring of cdx-indexer using a separate 'archive record iterator' and pluggable cdx writer classes. Groundwork for creating custom indexers.
|
||||
|
||||
* Indexing: Support for 9 field cdx formats with -9 flag.
|
||||
|
||||
|
68
CONTRIBUTING.md
Normal file
68
CONTRIBUTING.md
Normal file
@ -0,0 +1,68 @@
|
||||
# pywb contributing guide
|
||||
|
||||
Thank you for your interest in contributing to pywb and open source web archiving tools!
|
||||
|
||||
If you have a question not covered below or are interesting in collaborating, please feel free to reach out via any of our [contact points](https://webrecorder.net/contact).
|
||||
|
||||
## How to contribute to pywb
|
||||
|
||||
### I found a bug
|
||||
|
||||
Please take a look at the [open issues](https://github.com/webrecorder/pywb/issues) to see if someone else has already described the same issue and if so, leave any comments or suggestions there.
|
||||
|
||||
If no such issue already exists, feel free to [open a new issue](https://github.com/webrecorder/pywb/issues/new/choose) using the Bug Report template. If the bug is specifically related to replay of a particular site, instead use the Replay Issue template.
|
||||
|
||||
When opening an issue or commenting on an open issue, please describe the problem you are having, any steps required to reproduce the bug (including the pywb version affected), and include any contextual information or screenshots that may be helpful.
|
||||
|
||||
### I wrote a patch to fix a bug
|
||||
|
||||
Please open a new pull request with a description of the changes and a link to the related issue (if no issue yet exists, please create one first).
|
||||
|
||||
Create a new branch with a short descriptive name including the issue number, based on the latest `main` branch.
|
||||
|
||||
All changes should be submitted with test coverage for the change as well as updates to the project documentation if appropriate.
|
||||
|
||||
Avoid making unnecessary changes such as reformatting code or otherwise touching parts of the codebase that are not directly relevant to the issue at hand.
|
||||
|
||||
We do our best to review pull requests in a timely manner but as we are a small team with many projects we cannot guarantee a response or merging timeline. Webrecorder reserves the right to reject pull requests that do not fit the direction of the project or ethics of the Webrecorder project.
|
||||
|
||||
The Development section below has information on how to get started with working on pywb in a local development environment.
|
||||
|
||||
### I want to propose a new feature
|
||||
|
||||
Please take a look at the [open issues](https://github.com/webrecorder/pywb/issues) to see if someone else has already proposed a similar feature and if so, leave any comments or suggestions there.
|
||||
|
||||
If no such issue already exists, feel free to [open a new issue](https://github.com/webrecorder/pywb/issues/new/choose) using the Feature Request template.
|
||||
|
||||
## Development
|
||||
|
||||
The [pywb documentation](https://pywb.readthedocs.io/en/latest/) contains information on pywb's architecture, configuration file, and how to get started with the software locally or in a Docker container.
|
||||
|
||||
The project root directory contains a basic [Docker Compose](https://docs.docker.com/compose/) configuration file, which can be used to easily start a development environment. After installing Docker Desktop and Docker Compose (if not installed with Desktop), to run pywb in detached mode on `localhost:8080`, run:
|
||||
|
||||
```bash
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
(Note: this example assumes a newer version of Docker Desktop. For older versions that did not bundle Compose, you may need to replace `docker compose` with `docker-compose`)
|
||||
|
||||
The first time you run this command, it make take some time to build.
|
||||
|
||||
Changes to the [Vue](https://vuejs.org/) frontend components require rebuilding the Vue bundle (`pywb/static/vue/vueui.js`) to take effect. After making changes to one or more Vue components, you can rebuild the static bundle and view the changes in your development environment like so:
|
||||
|
||||
```bash
|
||||
./build-vue-ui.sh
|
||||
docker compose up -d --build --force-recreate
|
||||
```
|
||||
|
||||
Changes that modify pywb's Python dependencies or the operating system also require rebuilding the container:
|
||||
|
||||
```bash
|
||||
docker compose up -d --build --force-recreate
|
||||
```
|
||||
|
||||
To stop the container:
|
||||
|
||||
```bash
|
||||
docker compose down
|
||||
```
|
@ -1,4 +1,4 @@
|
||||
ARG PYTHON=python:3.7.2
|
||||
ARG PYTHON=python:3.8
|
||||
|
||||
FROM $PYTHON
|
||||
|
||||
|
5
NOTICE
Normal file
5
NOTICE
Normal file
@ -0,0 +1,5 @@
|
||||
pywb
|
||||
Copyright 2014-2020 Webrecorder Software, Rhizome, and Contributors.
|
||||
|
||||
Distributed under the GNU General Public License v3.
|
||||
See LICENSE for details.
|
75
README.rst
75
README.rst
@ -1,11 +1,11 @@
|
||||
Webrecorder pywb 2.4
|
||||
Webrecorder pywb 2.8
|
||||
====================
|
||||
|
||||
.. image:: https://travis-ci.org/webrecorder/pywb.svg?branch=master
|
||||
:target: https://travis-ci.org/webrecorder/pywb
|
||||
.. image:: https://ci.appveyor.com/api/projects/status/qxnbunw65o929599/branch/master?svg=true
|
||||
:target: https://ci.appveyor.com/project/webrecorder/pywb/branch/master
|
||||
.. image:: https://codecov.io/gh/webrecorder/pywb/branch/master/graph/badge.svg
|
||||
.. image:: https://raw.githubusercontent.com/webrecorder/pywb/main/pywb/static/pywb-logo.png
|
||||
|
||||
.. image:: https://github.com/webrecorder/pywb/workflows/CI/badge.svg
|
||||
:target: https://github.com/webrecorder/pywb/actions
|
||||
.. image:: https://codecov.io/gh/webrecorder/pywb/branch/main/graph/badge.svg
|
||||
:target: https://codecov.io/gh/webrecorder/pywb
|
||||
|
||||
Web Archiving Tools for All
|
||||
@ -13,7 +13,7 @@ Web Archiving Tools for All
|
||||
|
||||
`View the full pywb documentation <https://pywb.readthedocs.org>`_
|
||||
|
||||
**pywb** is a Python (2 and 3) web archiving toolkit for replaying web archives large and small as accurately as possible.
|
||||
**pywb** is a Python 3 web archiving toolkit for replaying web archives large and small as accurately as possible.
|
||||
The toolkit now also includes new features for creating high-fidelity web archives.
|
||||
|
||||
This toolset forms the foundation of Webrecorder project, but also provides a generic web archiving toolkit
|
||||
@ -41,38 +41,67 @@ The 2.x release included a major overhaul of pywb and introduces many new featur
|
||||
|
||||
* Improved 'calendar' query UI with incremental loading, grouping results by year and month, and updated replay banner.
|
||||
|
||||
* New in 2.4: Extensible UI customizations system for modifying all aspects of the UI.
|
||||
* Extensible UI customizations system for modifying all aspects of the UI.
|
||||
|
||||
* New in 2.4: Robust access control system for blocking or excluding URLs, by prefix or by exact match.
|
||||
* Robust access control system for blocking or excluding URLs, by prefix or by exact match.
|
||||
|
||||
* New in 2.6: Access Control embargo and http-header control access settings.
|
||||
|
||||
* New in 2.6: Support for localization and multi-language deployment.
|
||||
|
||||
* New in 2.7: New banner/calendar UI written in `Vue <https://vuejs.org/>`_, with interactive timeline and easier theming of colors and logo via ``config.yaml``.
|
||||
|
||||
|
||||
Please see the `full documentation <https://pywb.readthedocs.org>`_ for more detailed info on all these features.
|
||||
|
||||
|
||||
Installation
|
||||
------------
|
||||
Installation for Deployment
|
||||
---------------------------
|
||||
|
||||
To run and install locally you can:
|
||||
To install pywb for usage, you can use:
|
||||
|
||||
* Install with ``python setup.py install``
|
||||
``pip install pywb``
|
||||
|
||||
* Run tests with ``python setup.py test``
|
||||
Note: depending on your Python installation, you may have to use `pip3` instead of `pip`.
|
||||
|
||||
* Run Wayback with ``wayback`` (see docs for info on how to setup collections)
|
||||
|
||||
* Build docs locally with: ``cd docs; make html``. (The docs will be built in ``./_build/html/index.html``)
|
||||
Installation from local copy
|
||||
----------------------------
|
||||
|
||||
``git clone https://github.com/webrecorder/pywb``
|
||||
|
||||
To install from a locally cloned copy, install with ``pip install -e .`` or ``python setup.py install``.
|
||||
|
||||
To run tests, we recommend installing ``pip install tox tox-current-env`` and then running ``tox --current-env`` to test in your current Python environment.
|
||||
|
||||
To Build docs locally, run: ``cd docs; make html``. (The docs will be built in ``./_build/html/index.html``)
|
||||
|
||||
|
||||
Running
|
||||
-------
|
||||
|
||||
After installation, you can run ``pywb`` or ``wayback``.
|
||||
|
||||
Consult the local or `online docs <https://pywb.readthedocs.org>`_ for latest usage and configuration details.
|
||||
|
||||
|
||||
Documentation
|
||||
-------------
|
||||
|
||||
The pywb documentation is extensive. Some links to a few key guides:
|
||||
|
||||
* `Getting Started Guide <https://pywb.readthedocs.io/en/latest/manual/usage.html#getting-started>`_
|
||||
|
||||
* `Embargo and Access Control Guide <https://pywb.readthedocs.io/en/latest/manual/access-control.html>`_
|
||||
|
||||
* `Localization and Multi-Language Guide <https://pywb.readthedocs.io/en/latest/manual/localization.html>`_
|
||||
|
||||
* `Deployment Guide <https://pywb.readthedocs.io/en/latest/manual/usage.html#deployment>`_
|
||||
|
||||
* `OpenWayback Transition Guide <https://pywb.readthedocs.io/en/latest/manual/owb-transition.html>`_
|
||||
|
||||
|
||||
Contributions & Bug Reports
|
||||
---------------------------
|
||||
|
||||
Users are encouraged to fork and contribute to this project to keep improving web archiving tools.
|
||||
|
||||
If you are interested in contributing, especially to any of these areas, please let us know!
|
||||
|
||||
Otherwise, please take a look at `list of current issues <https://github.com/webrecorder/pywb/issues>`_ and feel free to open new ones about any aspect of pywb, including the new documentation.
|
||||
|
||||
|
||||
Users are encouraged to fork and contribute to this project to keep improving web archiving tools. Please consult the `contributing guide <CONTRIBUTING.md>`_ for information on how to contribute to pywb.
|
||||
|
2
babel.ini
Normal file
2
babel.ini
Normal file
@ -0,0 +1,2 @@
|
||||
[jinja2: pywb/templates/**.html]
|
||||
extensions=jinja2.ext.i18n,jinja2.ext.autoescape,jinja2.ext.with_
|
7
build-vue-ui.sh
Executable file
7
build-vue-ui.sh
Executable file
@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
CURR_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
|
||||
|
||||
cd $CURR_DIR/pywb/vueui/
|
||||
yarn install
|
||||
yarn run build
|
21
config.yaml
21
config.yaml
@ -1,6 +1,16 @@
|
||||
# pywb config file
|
||||
# ========================================
|
||||
#
|
||||
debug: true
|
||||
|
||||
# Uncomment to set banner colors and logo
|
||||
# ui:
|
||||
# logo: path/relative/from/static/logo.png
|
||||
# logo_home_url: https://example.com
|
||||
# navbar_background_hex: 0c49b0
|
||||
# navbar_color_hex: fff
|
||||
# navbar_light_buttons: true
|
||||
# disable_printing: true
|
||||
|
||||
collections:
|
||||
all: $all
|
||||
@ -11,9 +21,18 @@ collections:
|
||||
# Settings for each collection
|
||||
use_js_obj_proxy: true
|
||||
|
||||
# Memento support, enable
|
||||
# Eanable Memento support
|
||||
enable_memento: true
|
||||
|
||||
# Replay content in an iframe
|
||||
framed_replay: true
|
||||
|
||||
redirect_to_exact: true
|
||||
|
||||
# Uncomment and change to set default locale
|
||||
# default_locale: en
|
||||
|
||||
# Uncomment to set available locales
|
||||
# locales:
|
||||
# - en
|
||||
# - ru
|
||||
|
10
docker-compose.yaml
Normal file
10
docker-compose.yaml
Normal file
@ -0,0 +1,10 @@
|
||||
version: '3'
|
||||
|
||||
services:
|
||||
pywb:
|
||||
build: .
|
||||
ports:
|
||||
- 8080:8080
|
||||
volumes:
|
||||
- ./config.yaml:/webarchive/config.yaml
|
||||
- ./sample_archive/:/webarchive/sample_archive/
|
@ -1,74 +1,73 @@
|
||||
pywb\.apps package
|
||||
==================
|
||||
pywb.apps package
|
||||
=================
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
pywb\.apps\.cli module
|
||||
----------------------
|
||||
pywb.apps.cli module
|
||||
--------------------
|
||||
|
||||
.. automodule:: pywb.apps.cli
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.apps\.frontendapp module
|
||||
------------------------------
|
||||
pywb.apps.frontendapp module
|
||||
----------------------------
|
||||
|
||||
.. automodule:: pywb.apps.frontendapp
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.apps\.live module
|
||||
-----------------------
|
||||
pywb.apps.live module
|
||||
---------------------
|
||||
|
||||
.. automodule:: pywb.apps.live
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.apps\.rewriterapp module
|
||||
------------------------------
|
||||
pywb.apps.rewriterapp module
|
||||
----------------------------
|
||||
|
||||
.. automodule:: pywb.apps.rewriterapp
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.apps\.static\_handler module
|
||||
----------------------------------
|
||||
pywb.apps.static\_handler module
|
||||
--------------------------------
|
||||
|
||||
.. automodule:: pywb.apps.static_handler
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.apps\.warcserverapp module
|
||||
--------------------------------
|
||||
pywb.apps.warcserverapp module
|
||||
------------------------------
|
||||
|
||||
.. automodule:: pywb.apps.warcserverapp
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.apps\.wayback module
|
||||
--------------------------
|
||||
pywb.apps.wayback module
|
||||
------------------------
|
||||
|
||||
.. automodule:: pywb.apps.wayback
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.apps\.wbrequestresponse module
|
||||
------------------------------------
|
||||
pywb.apps.wbrequestresponse module
|
||||
----------------------------------
|
||||
|
||||
.. automodule:: pywb.apps.wbrequestresponse
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
|
@ -1,26 +1,25 @@
|
||||
pywb\.indexer package
|
||||
=====================
|
||||
pywb.indexer package
|
||||
====================
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
pywb\.indexer\.archiveindexer module
|
||||
------------------------------------
|
||||
pywb.indexer.archiveindexer module
|
||||
----------------------------------
|
||||
|
||||
.. automodule:: pywb.indexer.archiveindexer
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.indexer\.cdxindexer module
|
||||
--------------------------------
|
||||
pywb.indexer.cdxindexer module
|
||||
------------------------------
|
||||
|
||||
.. automodule:: pywb.indexer.cdxindexer
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
|
@ -1,42 +1,49 @@
|
||||
pywb\.manager package
|
||||
=====================
|
||||
pywb.manager package
|
||||
====================
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
pywb\.manager\.aclmanager module
|
||||
--------------------------------
|
||||
pywb.manager.aclmanager module
|
||||
------------------------------
|
||||
|
||||
.. automodule:: pywb.manager.aclmanager
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.manager\.autoindex module
|
||||
-------------------------------
|
||||
pywb.manager.autoindex module
|
||||
-----------------------------
|
||||
|
||||
.. automodule:: pywb.manager.autoindex
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.manager\.manager module
|
||||
-----------------------------
|
||||
pywb.manager.locmanager module
|
||||
------------------------------
|
||||
|
||||
.. automodule:: pywb.manager.locmanager
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb.manager.manager module
|
||||
---------------------------
|
||||
|
||||
.. automodule:: pywb.manager.manager
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.manager\.migrate module
|
||||
-----------------------------
|
||||
pywb.manager.migrate module
|
||||
---------------------------
|
||||
|
||||
.. automodule:: pywb.manager.migrate
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
|
@ -1,42 +1,41 @@
|
||||
pywb\.recorder package
|
||||
======================
|
||||
pywb.recorder package
|
||||
=====================
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
pywb\.recorder\.filters module
|
||||
------------------------------
|
||||
pywb.recorder.filters module
|
||||
----------------------------
|
||||
|
||||
.. automodule:: pywb.recorder.filters
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.recorder\.multifilewarcwriter module
|
||||
------------------------------------------
|
||||
pywb.recorder.multifilewarcwriter module
|
||||
----------------------------------------
|
||||
|
||||
.. automodule:: pywb.recorder.multifilewarcwriter
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.recorder\.recorderapp module
|
||||
----------------------------------
|
||||
pywb.recorder.recorderapp module
|
||||
--------------------------------
|
||||
|
||||
.. automodule:: pywb.recorder.recorderapp
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.recorder\.redisindexer module
|
||||
-----------------------------------
|
||||
pywb.recorder.redisindexer module
|
||||
---------------------------------
|
||||
|
||||
.. automodule:: pywb.recorder.redisindexer
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
|
@ -1,146 +1,145 @@
|
||||
pywb\.rewrite package
|
||||
=====================
|
||||
pywb.rewrite package
|
||||
====================
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
pywb\.rewrite\.content\_rewriter module
|
||||
---------------------------------------
|
||||
pywb.rewrite.content\_rewriter module
|
||||
-------------------------------------
|
||||
|
||||
.. automodule:: pywb.rewrite.content_rewriter
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.rewrite\.cookie\_rewriter module
|
||||
--------------------------------------
|
||||
pywb.rewrite.cookie\_rewriter module
|
||||
------------------------------------
|
||||
|
||||
.. automodule:: pywb.rewrite.cookie_rewriter
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.rewrite\.cookies module
|
||||
-----------------------------
|
||||
pywb.rewrite.cookies module
|
||||
---------------------------
|
||||
|
||||
.. automodule:: pywb.rewrite.cookies
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.rewrite\.default\_rewriter module
|
||||
---------------------------------------
|
||||
pywb.rewrite.default\_rewriter module
|
||||
-------------------------------------
|
||||
|
||||
.. automodule:: pywb.rewrite.default_rewriter
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.rewrite\.header\_rewriter module
|
||||
--------------------------------------
|
||||
pywb.rewrite.header\_rewriter module
|
||||
------------------------------------
|
||||
|
||||
.. automodule:: pywb.rewrite.header_rewriter
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.rewrite\.html\_insert\_rewriter module
|
||||
--------------------------------------------
|
||||
pywb.rewrite.html\_insert\_rewriter module
|
||||
------------------------------------------
|
||||
|
||||
.. automodule:: pywb.rewrite.html_insert_rewriter
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.rewrite\.html\_rewriter module
|
||||
------------------------------------
|
||||
pywb.rewrite.html\_rewriter module
|
||||
----------------------------------
|
||||
|
||||
.. automodule:: pywb.rewrite.html_rewriter
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.rewrite\.jsonp\_rewriter module
|
||||
-------------------------------------
|
||||
pywb.rewrite.jsonp\_rewriter module
|
||||
-----------------------------------
|
||||
|
||||
.. automodule:: pywb.rewrite.jsonp_rewriter
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.rewrite\.regex\_rewriters module
|
||||
--------------------------------------
|
||||
pywb.rewrite.regex\_rewriters module
|
||||
------------------------------------
|
||||
|
||||
.. automodule:: pywb.rewrite.regex_rewriters
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.rewrite\.rewrite\_amf module
|
||||
----------------------------------
|
||||
pywb.rewrite.rewrite\_amf module
|
||||
--------------------------------
|
||||
|
||||
.. automodule:: pywb.rewrite.rewrite_amf
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.rewrite\.rewrite\_dash module
|
||||
-----------------------------------
|
||||
pywb.rewrite.rewrite\_dash module
|
||||
---------------------------------
|
||||
|
||||
.. automodule:: pywb.rewrite.rewrite_dash
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.rewrite\.rewrite\_hls module
|
||||
----------------------------------
|
||||
pywb.rewrite.rewrite\_hls module
|
||||
--------------------------------
|
||||
|
||||
.. automodule:: pywb.rewrite.rewrite_hls
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.rewrite\.rewrite\_js\_workers module
|
||||
------------------------------------------
|
||||
pywb.rewrite.rewrite\_js\_workers module
|
||||
----------------------------------------
|
||||
|
||||
.. automodule:: pywb.rewrite.rewrite_js_workers
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.rewrite\.rewriteinputreq module
|
||||
-------------------------------------
|
||||
pywb.rewrite.rewriteinputreq module
|
||||
-----------------------------------
|
||||
|
||||
.. automodule:: pywb.rewrite.rewriteinputreq
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.rewrite\.templateview module
|
||||
----------------------------------
|
||||
pywb.rewrite.templateview module
|
||||
--------------------------------
|
||||
|
||||
.. automodule:: pywb.rewrite.templateview
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.rewrite\.url\_rewriter module
|
||||
-----------------------------------
|
||||
pywb.rewrite.url\_rewriter module
|
||||
---------------------------------
|
||||
|
||||
.. automodule:: pywb.rewrite.url_rewriter
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.rewrite\.wburl module
|
||||
---------------------------
|
||||
pywb.rewrite.wburl module
|
||||
-------------------------
|
||||
|
||||
.. automodule:: pywb.rewrite.wburl
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
|
@ -5,6 +5,7 @@ Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
pywb.apps
|
||||
pywb.indexer
|
||||
@ -17,15 +18,14 @@ Subpackages
|
||||
Submodules
|
||||
----------
|
||||
|
||||
pywb\.version module
|
||||
--------------------
|
||||
pywb.version module
|
||||
-------------------
|
||||
|
||||
.. automodule:: pywb.version
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
|
@ -1,82 +1,81 @@
|
||||
pywb\.utils package
|
||||
===================
|
||||
pywb.utils package
|
||||
==================
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
pywb\.utils\.binsearch module
|
||||
-----------------------------
|
||||
pywb.utils.binsearch module
|
||||
---------------------------
|
||||
|
||||
.. automodule:: pywb.utils.binsearch
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.utils\.canonicalize module
|
||||
--------------------------------
|
||||
pywb.utils.canonicalize module
|
||||
------------------------------
|
||||
|
||||
.. automodule:: pywb.utils.canonicalize
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.utils\.format module
|
||||
--------------------------
|
||||
pywb.utils.format module
|
||||
------------------------
|
||||
|
||||
.. automodule:: pywb.utils.format
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.utils\.geventserver module
|
||||
--------------------------------
|
||||
pywb.utils.geventserver module
|
||||
------------------------------
|
||||
|
||||
.. automodule:: pywb.utils.geventserver
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.utils\.io module
|
||||
----------------------
|
||||
pywb.utils.io module
|
||||
--------------------
|
||||
|
||||
.. automodule:: pywb.utils.io
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.utils\.loaders module
|
||||
---------------------------
|
||||
pywb.utils.loaders module
|
||||
-------------------------
|
||||
|
||||
.. automodule:: pywb.utils.loaders
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.utils\.memento module
|
||||
---------------------------
|
||||
pywb.utils.memento module
|
||||
-------------------------
|
||||
|
||||
.. automodule:: pywb.utils.memento
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.utils\.merge module
|
||||
-------------------------
|
||||
pywb.utils.merge module
|
||||
-----------------------
|
||||
|
||||
.. automodule:: pywb.utils.merge
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.utils\.wbexception module
|
||||
-------------------------------
|
||||
pywb.utils.wbexception module
|
||||
-----------------------------
|
||||
|
||||
.. automodule:: pywb.utils.wbexception
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
|
@ -1,66 +1,65 @@
|
||||
pywb\.warcserver\.index package
|
||||
===============================
|
||||
pywb.warcserver.index package
|
||||
=============================
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
pywb\.warcserver\.index\.aggregator module
|
||||
------------------------------------------
|
||||
pywb.warcserver.index.aggregator module
|
||||
---------------------------------------
|
||||
|
||||
.. automodule:: pywb.warcserver.index.aggregator
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.warcserver\.index\.cdxobject module
|
||||
-----------------------------------------
|
||||
pywb.warcserver.index.cdxobject module
|
||||
--------------------------------------
|
||||
|
||||
.. automodule:: pywb.warcserver.index.cdxobject
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.warcserver\.index\.cdxops module
|
||||
--------------------------------------
|
||||
pywb.warcserver.index.cdxops module
|
||||
-----------------------------------
|
||||
|
||||
.. automodule:: pywb.warcserver.index.cdxops
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.warcserver\.index\.fuzzymatcher module
|
||||
--------------------------------------------
|
||||
pywb.warcserver.index.fuzzymatcher module
|
||||
-----------------------------------------
|
||||
|
||||
.. automodule:: pywb.warcserver.index.fuzzymatcher
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.warcserver\.index\.indexsource module
|
||||
-------------------------------------------
|
||||
pywb.warcserver.index.indexsource module
|
||||
----------------------------------------
|
||||
|
||||
.. automodule:: pywb.warcserver.index.indexsource
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.warcserver\.index\.query module
|
||||
-------------------------------------
|
||||
pywb.warcserver.index.query module
|
||||
----------------------------------
|
||||
|
||||
.. automodule:: pywb.warcserver.index.query
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.warcserver\.index\.zipnum module
|
||||
--------------------------------------
|
||||
pywb.warcserver.index.zipnum module
|
||||
-----------------------------------
|
||||
|
||||
.. automodule:: pywb.warcserver.index.zipnum
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
|
@ -1,42 +1,41 @@
|
||||
pywb\.warcserver\.resource package
|
||||
==================================
|
||||
pywb.warcserver.resource package
|
||||
================================
|
||||
|
||||
Submodules
|
||||
----------
|
||||
|
||||
pywb\.warcserver\.resource\.blockrecordloader module
|
||||
----------------------------------------------------
|
||||
pywb.warcserver.resource.blockrecordloader module
|
||||
-------------------------------------------------
|
||||
|
||||
.. automodule:: pywb.warcserver.resource.blockrecordloader
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.warcserver\.resource\.pathresolvers module
|
||||
------------------------------------------------
|
||||
pywb.warcserver.resource.pathresolvers module
|
||||
---------------------------------------------
|
||||
|
||||
.. automodule:: pywb.warcserver.resource.pathresolvers
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.warcserver\.resource\.resolvingloader module
|
||||
--------------------------------------------------
|
||||
pywb.warcserver.resource.resolvingloader module
|
||||
-----------------------------------------------
|
||||
|
||||
.. automodule:: pywb.warcserver.resource.resolvingloader
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.warcserver\.resource\.responseloader module
|
||||
-------------------------------------------------
|
||||
pywb.warcserver.resource.responseloader module
|
||||
----------------------------------------------
|
||||
|
||||
.. automodule:: pywb.warcserver.resource.responseloader
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
|
@ -1,10 +1,11 @@
|
||||
pywb\.warcserver package
|
||||
========================
|
||||
pywb.warcserver package
|
||||
=======================
|
||||
|
||||
Subpackages
|
||||
-----------
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 4
|
||||
|
||||
pywb.warcserver.index
|
||||
pywb.warcserver.resource
|
||||
@ -12,71 +13,70 @@ Subpackages
|
||||
Submodules
|
||||
----------
|
||||
|
||||
pywb\.warcserver\.access\_checker module
|
||||
----------------------------------------
|
||||
pywb.warcserver.access\_checker module
|
||||
--------------------------------------
|
||||
|
||||
.. automodule:: pywb.warcserver.access_checker
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.warcserver\.amf module
|
||||
----------------------------
|
||||
pywb.warcserver.amf module
|
||||
--------------------------
|
||||
|
||||
.. automodule:: pywb.warcserver.amf
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.warcserver\.basewarcserver module
|
||||
---------------------------------------
|
||||
pywb.warcserver.basewarcserver module
|
||||
-------------------------------------
|
||||
|
||||
.. automodule:: pywb.warcserver.basewarcserver
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.warcserver\.handlers module
|
||||
---------------------------------
|
||||
pywb.warcserver.handlers module
|
||||
-------------------------------
|
||||
|
||||
.. automodule:: pywb.warcserver.handlers
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.warcserver\.http module
|
||||
-----------------------------
|
||||
pywb.warcserver.http module
|
||||
---------------------------
|
||||
|
||||
.. automodule:: pywb.warcserver.http
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.warcserver\.inputrequest module
|
||||
-------------------------------------
|
||||
pywb.warcserver.inputrequest module
|
||||
-----------------------------------
|
||||
|
||||
.. automodule:: pywb.warcserver.inputrequest
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.warcserver\.upstreamindexsource module
|
||||
--------------------------------------------
|
||||
pywb.warcserver.upstreamindexsource module
|
||||
------------------------------------------
|
||||
|
||||
.. automodule:: pywb.warcserver.upstreamindexsource
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
pywb\.warcserver\.warcserver module
|
||||
-----------------------------------
|
||||
pywb.warcserver.warcserver module
|
||||
---------------------------------
|
||||
|
||||
.. automodule:: pywb.warcserver.warcserver
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
|
@ -53,7 +53,7 @@ master_doc = 'index'
|
||||
|
||||
# General information about the project.
|
||||
project = 'pywb'
|
||||
copyright = 'A Webrecorder Project, Ilya Kreymer, Rhizome'
|
||||
copyright = '2014-2021, Webrecorder Software, Rhizome, and Contributors'
|
||||
author = 'Ilya Kreymer'
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
@ -61,9 +61,9 @@ author = 'Ilya Kreymer'
|
||||
# built documents.
|
||||
#
|
||||
# The short X.Y version.
|
||||
version = '2.0'
|
||||
version = '2.7'
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
release = '2.0'
|
||||
release = '2.7'
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
|
@ -18,8 +18,10 @@ A subset of features provides the basic functionality of a "Wayback Machine".
|
||||
manual/configuring
|
||||
manual/access-control
|
||||
manual/ui-customization
|
||||
manual/localization
|
||||
manual/architecture
|
||||
manual/apis
|
||||
manual/owb-transition
|
||||
code/pywb
|
||||
|
||||
|
||||
|
@ -1,16 +1,89 @@
|
||||
.. _access-control:
|
||||
|
||||
Access Control System
|
||||
---------------------
|
||||
Embargo and Access Control
|
||||
--------------------------
|
||||
|
||||
The access controls system allows for a flexible configuration of rules to allow,
|
||||
block or exclude access to individual urls by longest-prefix match.
|
||||
The embargo system allows for date-based rules to block access to captures based on their capture dates.
|
||||
|
||||
The access controls system provides additional URL-based rules to allow, block or exclude access to specific URL prefixes or exact URLs.
|
||||
|
||||
The embargo and access control rules are configured per collection.
|
||||
|
||||
Embargo Settings
|
||||
================
|
||||
|
||||
The embargo system allows restricting access to all URLs within a collection based on the timestamp of each URL.
|
||||
Access to these resources is 'embargoed' until the date range is adjusted or the time interval passes.
|
||||
|
||||
The embargo can be used to disallow access to captures based on following criteria:
|
||||
|
||||
- Captures before an exact date
|
||||
- Captures after an exact date
|
||||
- Captures newer than a time interval
|
||||
- Captures older than a time interval
|
||||
|
||||
Embargo Before/After Exact Date
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
To block access to all captures before or after a specific date, use the ``before`` or ``after`` embargo blocks
|
||||
with a specific timestamp.
|
||||
|
||||
For example, the following blocks access to all URLs captured before 2020-12-26 in the collection ``embargo-before``::
|
||||
|
||||
embargo-before:
|
||||
index_paths: ...
|
||||
archive_paths: ...
|
||||
embargo:
|
||||
before: '20201226'
|
||||
|
||||
|
||||
The following blocks access to all URLs captured on or after 2020-12-26 in collection ``embargo-after``::
|
||||
|
||||
embargo-after:
|
||||
index_paths: ...
|
||||
archive_paths: ...
|
||||
embargo:
|
||||
after: '20201226'
|
||||
|
||||
Embargo By Time Interval
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The embargo can also be set for a relative time interval, consisting of years, months, weeks and/or days.
|
||||
|
||||
|
||||
For example, the following blocks access to all URLs newer than 1 year::
|
||||
|
||||
embargo-newer:
|
||||
...
|
||||
embargo:
|
||||
newer:
|
||||
years: 1
|
||||
|
||||
|
||||
|
||||
The following blocks access to all URLs older than 1 year, 2 months, 3 weeks and 4 days::
|
||||
|
||||
embargo-older:
|
||||
...
|
||||
embargo:
|
||||
older:
|
||||
years: 1
|
||||
months: 2
|
||||
weeks: 3
|
||||
days: 4
|
||||
|
||||
|
||||
Any combination of years, months, weeks and days can be used (as long as at least one is provided) for the ``newer`` or ``older`` embargo settings.
|
||||
|
||||
|
||||
Access Control Settings
|
||||
=======================
|
||||
|
||||
Access Control Files (.aclj)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Access controls are set in one or more access control json files (.aclj), sorted in reverse alphabetical order.
|
||||
To determine the best match, a binary search is used (similar to CDXJ) lookup and then the best match is found forward.
|
||||
URL-based access controls are set in one or more access control JSON files (.aclj), sorted in reverse alphabetical order.
|
||||
To determine the best match, a binary search is used (similar to CDXJ lookup) and then the best match is found forward.
|
||||
|
||||
An .aclj file may look as follows::
|
||||
|
||||
@ -22,34 +95,83 @@ An .aclj file may look as follows::
|
||||
|
||||
Each JSON entry contains an ``access`` field and the original ``url`` field that was used to convert to the SURT (if any).
|
||||
|
||||
The prefix consists of a SURT key and a ``-`` (currently reserved for a timestamp/date range field to be added later)
|
||||
The JSON entry may also contain a ``user`` field, as explained below.
|
||||
|
||||
The prefix consists of a SURT key and a ``-`` (currently reserved for a timestamp/date range field to be added later).
|
||||
|
||||
Given these rules, a user would:
|
||||
|
||||
* be allowed to visit ``http://httpbin.org/anything/something`` (allow)
|
||||
* but would receive an 'access blocked' error message when viewing ``http://httpbin.org/`` (block)
|
||||
* would receive a 404 not found error when viewing ``http://httpbin.org/anything`` (exclude)
|
||||
|
||||
To match any possible URL in an .aclj file, set ``*,`` as the leading SURT, for example::
|
||||
|
||||
Access Types: allow, block, exclude
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
*, - {"access": "allow"}
|
||||
|
||||
Lines starting with ``*,`` should generally be at the end of the file, respecting the reverse alphabetical order.
|
||||
|
||||
|
||||
Access Types: allow, block, exclude, allow_ignore_embargo
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The available access types are as follows:
|
||||
|
||||
- ``exclude`` - when matched, results are excluded from the index, as if they do not exist. User will receive a 404.
|
||||
- ``block`` - when matched, results are not excluded from the index, marked with ``access: block``, but access to the actual is blocked. User will see a 451
|
||||
- ``allow`` - full access to the index and the resource.
|
||||
- ``block`` - when matched, results are not excluded from the index, but access to the actual content is blocked. User will see a 451.
|
||||
- ``allow`` - full access to the index and the resource, but may be overriden by embargo.
|
||||
- ``allow_ignore_embargo`` - full access to the index and resource, overriding any embargo settings.
|
||||
|
||||
The difference between ``exclude`` and ``block`` is that when blocked, the user can be notified that access is blocked, while
|
||||
with exclude, no trace of the resource is presented to the user.
|
||||
|
||||
The use of ``allow`` is useful to provide access to more specific resources within a broader block/exclude rule.
|
||||
The use of ``allow`` is useful to provide access to more specific resources within a broader block/exclude rule, while ``allow_ignore_embargo``
|
||||
can be used to override any embargo settings.
|
||||
|
||||
If both are present, the embargo restrictions are checked first and take precedence, unless the ``allow_ignore_embargo`` option is used
|
||||
to override the embargo.
|
||||
|
||||
|
||||
User-Based Access Controls
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The access control rules can further be customized be specifying different permissions for different 'users'. Since pywb does not have a user system,
|
||||
a special header, ``X-Pywb-ACL-User`` can be used to indicate a specific user.
|
||||
|
||||
This setting is designed to allow a more privileged user to access additional content or override an embargo.
|
||||
|
||||
For example, the following access control settings restrict access to ``https://example.com/restricted/`` by default, but allow access for the ``staff`` user::
|
||||
|
||||
com,example)/restricted - {"access": "allow", "user": "staff"}
|
||||
com,example)/restricted - {"access": "block"}
|
||||
|
||||
|
||||
Combined with the embargo settings, this can also be used to override the embargo for internal organizational users, while keeping the embargo for general access::
|
||||
|
||||
com,example)/restricted - {"access": "allow_ignore_embargo", "user": "staff"}
|
||||
com,example)/restricted - {"access": "allow"}
|
||||
|
||||
To make this work, pywb must be running behind an Apache or Nginx system that is configured to set ``X-Pywb-ACL-User: staff`` based on certain settings.
|
||||
|
||||
For example, this header may be set based on IP range, or based on password authentication.
|
||||
|
||||
To allow a user access to all URLs, overriding more specific rules and the ``default_access`` configuration setting, use the ``*,`` SURT::
|
||||
|
||||
*, - {"access": "allow", "user": "staff"}
|
||||
|
||||
Further examples of how to set this header will be provided in the deployments section.
|
||||
|
||||
**Note: Do not use the user-based rules without configuring proper authentication on an Apache or Nginx frontend to set or remove this header, otherwise the 'X-Pywb-ACL-User' can easily be faked.**
|
||||
|
||||
See the :ref:`config-acl-header` section in Usage for examples on how to configure this header.
|
||||
|
||||
|
||||
Access Error Messages
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The special error code 451 is used to indicate that a resource has been blocked (access setting ``block``)
|
||||
The special error code 451 is used to indicate that a resource has been blocked (access setting ``block``).
|
||||
|
||||
The [error.html](https://github.com/webrecorder/pywb/blob/master/pywb/templates/error.html) template contains a special message for this access and can be customized further.
|
||||
The `error.html <https://github.com/webrecorder/pywb/blob/master/pywb/templates/error.html>`_ template contains a special message for this access and can be customized further.
|
||||
|
||||
By design, resources that are ``exclude``-ed simply appear as 404 not found and no special error is provided.
|
||||
|
||||
@ -61,7 +183,7 @@ The .aclj files need not ever be added or edited manually.
|
||||
|
||||
The pywb ``wb-manager`` utility has been extended to provide tools for adding, removing and checking access control rules.
|
||||
|
||||
The access rules are written to ``<collection>/acl/access-rules.acl`` for a given collection ``<collection>`` for automatic collections.
|
||||
The access rules are written to ``<collection>/acl/access-rules.aclj`` for a given collection ``<collection>`` for automatic collections.
|
||||
|
||||
For example, to add the first line to an ACL file ``access.aclj``, one could run::
|
||||
|
||||
@ -73,6 +195,11 @@ The URL supplied can be a URL or a SURT prefix. If a SURT is supplied, it is use
|
||||
wb-manager acl add <collection> com, allow
|
||||
|
||||
|
||||
A specific user for user-based rules can also be specified, for example to add ``allow_ignore_embargo`` for user ``staff`` only, run::
|
||||
|
||||
wb-manager acl add <collection> http://httpbin.org/anything/something allow_ignore_embargo -u staff
|
||||
|
||||
|
||||
By default, access control rules apply to a prefix of a given URL or SURT.
|
||||
|
||||
To have the rule apply only to the exact match, use::
|
||||
@ -104,7 +231,7 @@ Access Controls for Custom Collections
|
||||
|
||||
For manually configured collections, there are additional options for configuring access controls.
|
||||
The access control files can be specified explicitly using the ``acl_paths`` key and allow specifying multiple ACL files,
|
||||
and allowing sharing access control files between different collections.
|
||||
and allow sharing access control files between different collections.
|
||||
|
||||
Single ACLJ::
|
||||
|
||||
@ -134,7 +261,21 @@ When finding the best rule from multiple ``.aclj`` files, each file is binary se
|
||||
set merge-sorted to find the best match (very similar to the CDXJ index lookup).
|
||||
|
||||
Note: It might make sense to separate ``allows.aclj`` and ``blocks.aclj`` into individual files for organizational reasons,
|
||||
but there is no specific need to keep more than one access control files.
|
||||
but there is no specific need to keep more than one access control file.
|
||||
|
||||
Finally, ACLJ and embargo settings combined for the same collection might look as follows::
|
||||
|
||||
collections:
|
||||
test:
|
||||
...
|
||||
embargo:
|
||||
newer:
|
||||
days: 366
|
||||
|
||||
acl_paths:
|
||||
- ./path/to/allows.aclj
|
||||
- ./path/to/blocks.aclj
|
||||
|
||||
|
||||
Default Access
|
||||
^^^^^^^^^^^^^^
|
||||
|
@ -46,6 +46,7 @@ It can be used to:
|
||||
|
||||
* Create a new collection -- ``wb-manager init <coll>``
|
||||
* Add WARCs to collection -- ``wb-manager add <coll> <warc>``
|
||||
* Unpack WACZs to add their WARCs and indices to collection -- ``wb-manager add --unpack-wacz <coll> <wacz>``
|
||||
* Add override templates
|
||||
* Add and remove metadata to a collections ``metadata.yaml``
|
||||
* List all collections
|
||||
|
@ -19,7 +19,7 @@ For example, the following query might return the first 10 results from host ``h
|
||||
http://localhost:8080/coll/cdx?url=http://example.com/*&page=1&filter=mime:text/html&limit=10
|
||||
|
||||
|
||||
By default, the api endpoint is available at ``/<coll>/cdx`` for every collection.
|
||||
By default, the api endpoint is available at ``/<coll>/cdx`` for a collection named ``<coll>``.
|
||||
|
||||
The setting can be changed by setting ``cdx_api_endpoint`` in ``config.yaml``.
|
||||
|
||||
@ -36,9 +36,10 @@ API Reference
|
||||
^^^^^^^
|
||||
|
||||
| The only required parameter to the cdx server api is the url, ex:
|
||||
| ``http://localhost:8080/coll-cdx?url=example.com``
|
||||
| ``http://localhost:8080/coll/cdx?url=example.com``
|
||||
|
||||
will return a list of captures for ‘example.com’
|
||||
will return a list of captures for ‘example.com’ in the collection
|
||||
``coll`` (see above regarding per-collection api endpoints).
|
||||
|
||||
|
||||
``from, to``
|
||||
@ -50,7 +51,7 @@ given date/time range (inclusive).
|
||||
Timestamps may be <=14 digits and will be padded to either lower or
|
||||
upper bound.
|
||||
|
||||
| For example, ``...coll-cdx?url=example.com&from=2014&to=2014`` will
|
||||
| For example, ``...?url=example.com&from=2014&to=2014`` will
|
||||
return results of ``example.com`` that
|
||||
| have a timestamp between ``20140101000000`` and ``20141231235959``
|
||||
|
||||
@ -75,11 +76,11 @@ The cdx server supports the following ``matchType``
|
||||
As a shortcut, instead of specifying a separate ``matchType`` parameter,
|
||||
wildcards may be used in the url:
|
||||
|
||||
- ``...coll-cdx?url=http://example.com/path/*`` is equivalent to
|
||||
``...coll-cdx?url=http://example.com/path/&matchType=prefix``
|
||||
- ``...?url=http://example.com/path/*`` is equivalent to
|
||||
``...?url=http://example.com/path/&matchType=prefix``
|
||||
|
||||
- ``...coll-cdx?url=*.example.com`` is equivalent to
|
||||
``...coll-cdx?url=example.com&matchType=domain``
|
||||
- ``...?url=*.example.com`` is equivalent to
|
||||
``...?url=example.com&matchType=domain``
|
||||
|
||||
*Note: if you are using legacy cdx index files which are not
|
||||
SURT-ordered, the ``domain`` option will not be available. if this is
|
||||
@ -141,10 +142,10 @@ The ``filter`` param can be specified multiple times to filter by
|
||||
specific fields in the cdx index. Field names correspond to the fields
|
||||
returned in the JSON output. Filters can be specified as follows:
|
||||
|
||||
- ``...coll-cdx?url=example.com/*&filter==mime:text/html&filter=!=status:200``
|
||||
- ``...?url=example.com/*&filter==mime:text/html&filter=!=status:200``
|
||||
Return captures from example.com/\* where mime is text/html and http
|
||||
status is not 200.
|
||||
- ``...coll-cdx?url=example.com&matchType=domain&filter=~url:.*\.php$``
|
||||
- ``...?url=example.com&matchType=domain&filter=~url:.*\.php$``
|
||||
Return captures from the domain example.com which URL ends in
|
||||
``.php``.
|
||||
|
||||
@ -182,7 +183,7 @@ the following modifiers:
|
||||
|
||||
|
||||
``fields``
|
||||
^^^^^^
|
||||
^^^^^^^^^^
|
||||
|
||||
The ``fields`` param can be used to specify which fields to include in the
|
||||
output. The standard available fields are usually: ``urlkey``,
|
||||
|
@ -16,8 +16,19 @@ With **framed replay**, the archived content is loaded into an iframe, and a top
|
||||
In this mode, the top frame url is for example, ``http://my-archive.example.com/<coll name>/http://example.com/`` while
|
||||
the actual content is served at ``http://my-archive.example.com/<coll name>/mp_/http://example.com/``
|
||||
|
||||
With **frameless replay**, the archived content is loaded directly. As of pywb 2.7, frameless replay is bannerless
|
||||
unless a custom banner is added via the ``custom_banner.html`` template.
|
||||
|
||||
|
||||
.. warning::
|
||||
pywb 2.7 introduces a breaking change around frameless replay and banners.
|
||||
Any custom banner intended to be used with frameless replay in pywb 2.7 and
|
||||
higher must be specified in the ``custom_banner.html`` template. This may
|
||||
require moving custom content from ``banner.html`` to the new
|
||||
``custom_banner.html``.
|
||||
|
||||
The default banner will no longer be served in frameless replay.
|
||||
|
||||
With **frameless replay**, the archived content is loaded directly, and a banner UI is injected into the page.
|
||||
|
||||
In this mode, the content is served directly at ``http://my-archive.example.com/<coll name>/http://example.com/``
|
||||
|
||||
@ -34,6 +45,8 @@ To disable framed replay add:
|
||||
Note: pywb also supports HTTP/S **proxy mode** which requires additional setup. See :ref:`https-proxy` for more details.
|
||||
|
||||
|
||||
.. _dir_structure:
|
||||
|
||||
Directory Structure
|
||||
-------------------
|
||||
|
||||
@ -264,7 +277,7 @@ The full set of configurable options (with their default settings) is as follows
|
||||
rollover_idle_secs: 600
|
||||
filename_template: my-warc-{timestamp}-{hostname}-{random}.warc.gz
|
||||
source_filter: live
|
||||
|
||||
enable_put_custom_record: false
|
||||
|
||||
The required ``source_coll`` setting specifies the source collection from which to load content that will be recorded.
|
||||
Most likely this will be the :ref:`live-web` collection, which should also be defined.
|
||||
@ -294,6 +307,70 @@ If running with auto indexing, the WARC will also get automatically indexed and
|
||||
As a shortcut, ``recorder: live`` can also be used to specify only the ``source_coll`` option.
|
||||
|
||||
|
||||
Dedup Options for Recording
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
By default, recording mode will record every URL.
|
||||
|
||||
Starting with pywb 2.5.0, it is possible to configure pywb to either write revisit records or skip duplicate URLs altogether using the ``dedup_policy`` key.
|
||||
|
||||
Using deduplication requires a Redis instance, which will keep track of the index for deduplication in a sorted-set key.
|
||||
The default Redis key used is ``redis://localhost:6379/0/pywb:{coll}:cdxj`` where ``{coll}`` is replaced with current collection id.
|
||||
|
||||
The field can be customized using the ``dedup_index_url`` field in the recorder config. The URL must start with ``redis://``, as that is the only
|
||||
supported dedup index at this time.
|
||||
|
||||
- To skip duplicate URLs, set ``dedup_policy: skip``. With this setting, only one instance of any URL will be recorded.
|
||||
|
||||
- To write revist records, set ``dedup_policy: revisit``. With this setting, WARC ``revisit`` records will be written when a duplicate URL is detected
|
||||
and has the same digest as a previous response.
|
||||
|
||||
- To keep all duplicates, use ``dedup_policy: keep``. All WARC records are written to disk normally as with no policy, however, the Redis dedup index is still populated,
|
||||
which allows for instant replay (see below).
|
||||
|
||||
- To disable the dedup system, set to ``dedup_policy: none`` or omit the field. This is the default, and no Redis is required.
|
||||
|
||||
Another option, pywb can add an aggressive Cache-Control header to force the browser to cache all responses on a page.
|
||||
This feature is still experimental, but can be enabled via ``cache: always`` setting.
|
||||
|
||||
|
||||
For example, the following will enable ``revisit`` records to be written using the given Redis URL, and also enable aggressive cacheing when recording::
|
||||
|
||||
recorder:
|
||||
...
|
||||
cache: always
|
||||
dedup_policy: revisit
|
||||
dedup_index_url: 'redis://localhost:6379/0/pywb:{coll}:cdxj' # default when omitted
|
||||
|
||||
|
||||
Instant Replay (experimental)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Starting with pywb 2.5.0, when the ``dedup_policy`` is set, pywb can do 'instant replay' after recording, without having to regenerate the CDX or waiting for it to be updated with auto-indexing.
|
||||
|
||||
When any dedup_policy, pywb can also access the dedup Redis index, along with any on-disk CDX, when replaying the collection.
|
||||
|
||||
This feature is still experimental but should generally work. Additional options for working with the Redis Dedup index will be added in the futuer.
|
||||
|
||||
|
||||
.. _put-custom-record:
|
||||
|
||||
Adding Custom Resource Records
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
pywb now also supports adding custom data to a WARC ``resource`` record. This can be used to add custom resources, such as screenshots, logs, error messages,
|
||||
etc.. that are not normally captured as part of recording, but still useful to store in WARCs.
|
||||
|
||||
To add a custom resources, simply call ``PUT /<coll>/record`` with the data to be added as the request body and the type of the data specified as the content-type. The ``url`` can be specified as a query param.
|
||||
|
||||
For example, adding a custom record ``file:///my-custom-resource`` containing ``Some Custom Data`` can be done using ``curl`` as follows::
|
||||
|
||||
curl -XPUT "localhost:8080/my-web-archive/record?url=file:///my-custom-resource" --data "Some Custom Data"
|
||||
|
||||
|
||||
This feature is only available if ``enable_put_custom_record: true`` is set in the recorder config.
|
||||
|
||||
|
||||
.. _auto-fetch:
|
||||
|
||||
Auto-Fetch Responsive Recording
|
||||
@ -553,3 +630,15 @@ To enable the previous behavior, add to config::
|
||||
enable_flash_video_rewrite: true
|
||||
|
||||
The system may be revamped in the future and enabled by default, but for now, it is provided "as-is" for compatibility reasons.
|
||||
|
||||
Verify SSL-Certificates
|
||||
-----------------------
|
||||
|
||||
By default, SSL-Certificates of websites are not verified. To enable verification, add the following to the config::
|
||||
|
||||
certificates:
|
||||
cert_reqs: 'CERT_REQUIRED'
|
||||
ca_cert_dir: '/etc/ssl/certs'
|
||||
|
||||
``ca_cert_dir`` can optionally point to a directory containing the CA certificates that you trust. Most linux distributions provide CA certificates via a package called ``ca-certificates``.
|
||||
If omitted, the default system CA used by Python is used.
|
||||
|
BIN
docs/manual/images/vue-banner.png
Normal file
BIN
docs/manual/images/vue-banner.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.3 MiB |
BIN
docs/manual/images/vue-cal.png
Normal file
BIN
docs/manual/images/vue-cal.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 330 KiB |
152
docs/manual/localization.rst
Normal file
152
docs/manual/localization.rst
Normal file
@ -0,0 +1,152 @@
|
||||
.. _localization:
|
||||
|
||||
Localization / Multi-lingual Support
|
||||
------------------------------------
|
||||
|
||||
pywb supports configuring different language locales and loading different language translations, and dynamically switching languages.
|
||||
|
||||
pywb can extract all text from templates and generate CSV files for translation and convert them back into a binary format used for localization/internationalization.
|
||||
|
||||
(pywb uses the `Babel library <http://babel.pocoo.org/en/latest/>`_ which extends the `standard Python i18n system <https://docs.python.org/3/library/gettext.html>`_)
|
||||
|
||||
To ensure all localization related dependencies are installed, first run::
|
||||
|
||||
pip install pywb[i18n]
|
||||
|
||||
Locales to use are configured in the ``config.yaml``.
|
||||
|
||||
The command-line ``wb-manager`` utility provides a way to manage locales for translation, including generating extracted text, and to update translated text.
|
||||
|
||||
|
||||
Adding a Locale and Extracting Text
|
||||
===================================
|
||||
|
||||
To add a new locale for translation and automatically extract all text that needs to be translated, run::
|
||||
|
||||
wb-manager i18n extract <loc>
|
||||
|
||||
The ``<loc>`` can be one or more supported two-letter locales or CLDR language codes. To list available codes, you can run ``pybabel --list-locales``.
|
||||
|
||||
Localization data is placed in the ``i18n`` directory, and translatable strings can be found in ``i18n/translations/<locale>/LC_MESSAGES/messages.csv``
|
||||
|
||||
Each CSV file looks as follows, listing each source string and an empty string for the translated version::
|
||||
|
||||
"location","source","target"
|
||||
"pywb/templates/banner.html:6","Live on",""
|
||||
"pywb/templates/banner.html:8","Calendar icon",""
|
||||
"pywb/templates/banner.html:9 pywb/templates/query.html:45","View All Captures",""
|
||||
"pywb/templates/banner.html:10 pywb/templates/header.html:4","Language:",""
|
||||
"pywb/templates/banner.html:11","Loading...",""
|
||||
...
|
||||
|
||||
|
||||
This CSV can then be passed to translators to translate the text.
|
||||
|
||||
(The extraction parameters are configured to load data from ``pywb/templates/*.html`` in ``babel.ini``)
|
||||
|
||||
|
||||
For example, the following will generate translation strings for ``es`` and ``pt`` locales::
|
||||
|
||||
wb-manager i18n extract es pt
|
||||
|
||||
|
||||
The translatable text can then be found in ``i18n/translations/es/LC_MESSAGES/messages.csv`` and ``i18n/translations/pt/LC_MESSAGES/messages.csv``.
|
||||
|
||||
|
||||
The CSV files should be updated with a translation for each string in the ``target`` column.
|
||||
|
||||
The extract command adds any new strings without overwriting existing translations, so after running the update command to compile translated strings (described below), it is safe to run the extract command again.
|
||||
|
||||
|
||||
Updating Locale Catalog
|
||||
=======================
|
||||
|
||||
Once the text has been translated, and the CSV files updated, simply run::
|
||||
|
||||
wb-manager i18n update <loc>
|
||||
|
||||
This will parse the CSVs and compile the translated string tables for use with pywb.
|
||||
|
||||
|
||||
Specifying locales in pywb
|
||||
==========================
|
||||
|
||||
To enable the locales in pywb, one or more locales can be added to the ``locales`` key in ``config.yaml``, ex::
|
||||
|
||||
locales:
|
||||
- en
|
||||
- es
|
||||
|
||||
Single Language Default Locale
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
pywb can be configured with a default, single-language locale, by setting the ``default_locale`` property in ``config.yaml``::
|
||||
|
||||
|
||||
default_locale: es
|
||||
locales:
|
||||
- es
|
||||
|
||||
|
||||
With this configuration, pywb will automatically use the ``es`` locale for all text strings in pywb pages.
|
||||
|
||||
pywb will also set the ``<html lang="es">`` so that the browser will recognize the correct locale.
|
||||
|
||||
|
||||
Mutli-language Translations
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
If more than one locale is specified, pywb will automatically show a language switching UI at the top of collection and search pages, with an option
|
||||
for each locale listed. To include English as an option, it should also be added as a locale (and no strings translated). For example::
|
||||
|
||||
locales:
|
||||
- en
|
||||
- es
|
||||
- pt
|
||||
|
||||
will configure pywb to show a language switch option on all pages.
|
||||
|
||||
|
||||
Localized Collection Paths
|
||||
==========================
|
||||
|
||||
When localization is enabled, pywb supports the locale prefix for accessing each collection with a localized language:
|
||||
If pywb has a collection ``my-web-archive``, then:
|
||||
|
||||
* ``/my-web-archive/`` - loads UI with default language (set via ``default_locale``)
|
||||
* ``/en/my-web-archive/`` - loads UI with ``en`` locale
|
||||
* ``/es/my-web-archive/`` - loads UI with ``es`` locale
|
||||
* ``/pt/my-web-archive/`` - loads UI with ``pt`` locale
|
||||
|
||||
The language switch options work by changing the locale prefix for the same page.
|
||||
|
||||
Listing and Removing Locales
|
||||
============================
|
||||
|
||||
To list the locales that have previously been added, you can also run ``wb-manager i18n list``.
|
||||
|
||||
To disable a locale from being used in pywb, simply remove it from the ``locales`` key in ``config.yaml``.
|
||||
|
||||
To remove data for a locale permanently, you can run: ``wb-manager i18n remove <loc>``. This will remove the locale directory on disk.
|
||||
|
||||
To remove all localization data, you can manually delete the ``i18n`` directory.
|
||||
|
||||
|
||||
UI Templates: Adding Localizable Text
|
||||
=====================================
|
||||
|
||||
Text that can be translated, localizable text, can be marked as such directly in the UI templates:
|
||||
|
||||
1. By wrapping the text in ``{% trans %}``/``{% endtrans %}`` tags. For example::
|
||||
|
||||
{% trans %}Collection {{ coll }} Search Page{% endtrans %}
|
||||
|
||||
2. Short-hand by calling a special ``_()`` function, which can be used in attributes or more dynamically. For example::
|
||||
|
||||
... title="{{ _('Enter a URL to search for') }}">
|
||||
|
||||
|
||||
These methods can be used in all UI templates and are supported by the Jinja2 templating system.
|
||||
|
||||
See :ref:`ui-customizations` for a list of all available UI templates.
|
||||
|
31
docs/manual/migrating-cdx.rst
Normal file
31
docs/manual/migrating-cdx.rst
Normal file
@ -0,0 +1,31 @@
|
||||
.. _migrating-cdx:
|
||||
|
||||
Migrating CDX
|
||||
=============
|
||||
|
||||
If you are not using OutbackCDX, you may need to check on the format of the CDX files that you are using.
|
||||
|
||||
Over the years, there have been many variations on the CDX (capture index) format which is used by OpenWayback and pywb to look up captures in WARC/ARC files.
|
||||
|
||||
When migrating CDX from OpenWayback, there are a few options.
|
||||
|
||||
pywb currently supports:
|
||||
|
||||
- 9 field CDX (surt-ordered)
|
||||
- 11 field CDX (surt-ordered)
|
||||
- CDXJ (surt-ordered)
|
||||
|
||||
pywb will support the 11-field and 9-field `CDX format <http://iipc.github.io/warc-specifications/specifications/cdx-format/cdx-2015/>`_ that is also used in OpenWayback.
|
||||
|
||||
Non-SURT ordered CDXs are not currently supported, though they may be supported in the future (see this `pending pull request <https://github.com/webrecorder/pywb/pull/586>`_).
|
||||
|
||||
CDXJ Conversion
|
||||
---------------
|
||||
|
||||
The native format used by pywb is the :ref:`cdxj-index` with SURT-ordering, which uses JSON to encode the fields, allowing for more flexibility by storing most of the index in a JSON, allowing support for optional fields as needed.
|
||||
|
||||
If your CDX are not SURT-ordered, 11 or 9 field CDX, or if there is a mix, pywb also offers a conversion utility which will convert all CDX to the pywb native CDXJ: ::
|
||||
|
||||
wb-manager cdx-convert <dir-of-cdx-files>
|
||||
|
||||
The converter will read the CDX files and create a corresponding .cdxj file for every cdx file. Since the conversion happens on the .cdx itself, it does not require reindexing the source WARC/ARC files and can happen fairly quickly. The converted CDXJ are guaranteed to be in the right format to work with pywb.
|
74
docs/manual/outbackcdx.rst
Normal file
74
docs/manual/outbackcdx.rst
Normal file
@ -0,0 +1,74 @@
|
||||
.. _using-outback:
|
||||
|
||||
|
||||
Using OutbackCDX with pywb
|
||||
==========================
|
||||
|
||||
The recommended setup is to run `OutbackCDX <https://github.com/nla/outbackcdx>`_ alongside pywb.
|
||||
OutbackCDX provides an index (CDX) server and can efficiently store and look up web archive data by URL.
|
||||
|
||||
|
||||
Adding CDX to OutbackCDX
|
||||
------------------------
|
||||
|
||||
To set up OutbackCDX, please follow the instructions on the `OutbackCDX README <https://github.com/nla/outbackcdx>`_.
|
||||
|
||||
Since pywb also uses the default port 8080, be sure to use a different port for OutbackCDX, eg. ``java -jar outbackcdx*.jar -p 8084``.
|
||||
|
||||
OutbackCDX can generally ingest existing CDX used in OpenWayback simply by POSTing to OutbackCDX at a new index endpoint.
|
||||
|
||||
For example, assuming OutbackCDX is running on port 8084, to add CDX for ``index1.cdx``, ``index2.cdx``, run:
|
||||
|
||||
.. code:: console
|
||||
|
||||
curl -X POST --data-binary @index1.cdx http://localhost:8084/mycoll
|
||||
curl -X POST --data-binary @index2.cdx http://localhost:8084/mycoll
|
||||
|
||||
The contents of each CDX file are added to the ``mycoll`` OutbackCDX index, which can correspond to the web archive collection ``mycoll``.
|
||||
The index is created automatically if it does not exist.
|
||||
|
||||
See the `OutbackCDX Docs <https://github.com/nla/outbackcdx#loading-records>`_ for more info on ingesting CDX.
|
||||
|
||||
|
||||
(Re)generating CDX from WARCs
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
There are some exceptions where it may be useful to re-generate the CDX with pywb for existing WARCs:
|
||||
|
||||
- If your CDX is 9-field and does not include the compressed length, regnerating the CDX will result in more efficient HTTP range requests
|
||||
- If you want to replay pages with POST requests, pywb generated CDX will soon be supported in OutbackCDX (see: `Issue #585 <https://github.com/webrecorder/pywb/issues/585>`_, `Issue #91 <https://github.com/nla/outbackcdx/pull/91>`_ )
|
||||
|
||||
|
||||
To generate the CDX, run the ``cdx-indexer`` command (with ``-p`` flag for POST request handling) for each WARC or set of WARCs you wish to index:
|
||||
|
||||
.. code:: console
|
||||
|
||||
cdx-indexer /path/to/mywarcs/my.warc.gz > ./index1.cdx
|
||||
cdx-indexer /path/to/all_warcs/*warc.gz > ./index2.cdx
|
||||
|
||||
|
||||
Then, run the POST command as shown above to ingest to OutbackCDX.
|
||||
|
||||
The above can be repeated for each WARC file, or for a set of WARCs using the ``*.warc.gz`` wildcard.
|
||||
|
||||
If a CDX index is too big, OutbackCDX may fail and ingesting an index per-WARC may be needed.
|
||||
|
||||
|
||||
Configure pywb with OutbackCDX
|
||||
------------------------------
|
||||
|
||||
The ``config.yaml`` should be configured to point to OutbackCDX.
|
||||
|
||||
Assuming a collection named ``mycoll``, the ``config.yaml`` can be configured as follows to use OutbackCDX
|
||||
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
collections:
|
||||
mycoll:
|
||||
index_paths: cdx+http://localhost:8084/mycoll
|
||||
archive_paths: /path/to/mywarcs/
|
||||
|
||||
|
||||
The ``archive_paths`` can be configured to point to a directory of WARCs or a path index.
|
||||
|
42
docs/manual/owb-pywb-terms.rst
Normal file
42
docs/manual/owb-pywb-terms.rst
Normal file
@ -0,0 +1,42 @@
|
||||
OpenWayback vs pywb Terms
|
||||
=========================
|
||||
|
||||
pywb and OpenWayback use slightly different terms to describe the configuration options, as explained below.
|
||||
|
||||
Some differences are:
|
||||
- The ``wayback.xml`` config file in OpenWayback is replaced with ``config.yaml`` yaml
|
||||
- The terms ``Access Point`` and ``Wayback Collection`` are replaced with ``Collection`` in pywb. The collection configuration represents a unique path (access point) and the data that is accessed at that path.
|
||||
- The ``Resource Store`` in OpenWayback is known in pywb as the archive paths, configured under ``archive_paths``
|
||||
- The ``Resource Index`` in OpenWayback is known in pywb as the index paths, configurable under ``index_paths``
|
||||
- The ``Exclusions`` in OpenWayback are replaced with general :ref:`access-control`
|
||||
|
||||
|
||||
|
||||
Pywb Collection Basics
|
||||
----------------------
|
||||
|
||||
A pywb collection must consist of a minimum of three parts: the collection name, the ``index_paths`` (where to read the index), and the ``archive_paths`` (where to read the WARC files).
|
||||
|
||||
The collection is accessed by name, so there is no distinct access point.
|
||||
|
||||
The collections are configured in the ``config.yaml`` under the ``collections`` key:
|
||||
|
||||
For example, a basic collection definition can be specified via:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
collections:
|
||||
wayback:
|
||||
index_paths: /archive/cdx/
|
||||
archive_paths: /archive/storage/warcs/
|
||||
|
||||
|
||||
Pywb also supports a convention-based directory structure. Collections created in this structure can be detected automatically
|
||||
and need not be specified in the ``config.yaml``. This structure is designed for smaller collections that are all stored locally in a subdirectory.
|
||||
|
||||
See the :ref:`dir_structure` for the default pywb directory structure.
|
||||
|
||||
However, for importing existing collections from OpenWayback, it is probably easier to specify the existing paths as shown above.
|
||||
|
||||
|
||||
|
308
docs/manual/owb-to-pywb-config.rst
Normal file
308
docs/manual/owb-to-pywb-config.rst
Normal file
@ -0,0 +1,308 @@
|
||||
Converting OpenWayback Config to pywb Config
|
||||
============================================
|
||||
|
||||
OpenWayback includes many different types of configurations.
|
||||
|
||||
For most use cases, using OutbackCDX with pywb is the recommended approach, as explained in :ref:`using-outback`.
|
||||
|
||||
The following are a few specific example of WaybackCollections gathered from active OpenWayback configurations
|
||||
and how they can be configured for use with pywb.
|
||||
|
||||
|
||||
Remote Collection / Access Point
|
||||
--------------------------------
|
||||
|
||||
A collection configured with a remote index and WARC access can be converted to use OutbackCDX
|
||||
for the remote index, while pywb can load WARCs directly from an HTTP endpoint.
|
||||
|
||||
For example, a configuration similar to:
|
||||
|
||||
.. code:: xml
|
||||
|
||||
<bean name="standardaccesspoint" class="org.archive.wayback.webapp.AccessPoint">
|
||||
<property name="accessPointPath" value="/wayback/"/>
|
||||
<property name="collection" ref="remotecollection" />
|
||||
...
|
||||
</bean>
|
||||
|
||||
<bean id="remotecollection" class="org.archive.wayback.webapp.WaybackCollection">
|
||||
<property name="resourceStore">
|
||||
<bean class="org.archive.wayback.resourcestore.SimpleResourceStore">
|
||||
<property name="prefix" value="http://myarchive.example.com/RemoteStore/" />
|
||||
</bean>
|
||||
</property>
|
||||
<property name="resourceIndex">
|
||||
<bean class="org.archive.wayback.resourceindex.RemoteResourceIndex">
|
||||
<property name="searchUrlBase" value="http://myarchive.example.com/RemoteIndex" />
|
||||
</bean>
|
||||
</property>
|
||||
</bean>
|
||||
|
||||
can be converted to the following config, with OutbackCDX assumed to be running
|
||||
at: ``http://myarchive.example.com/RemoteIndex``
|
||||
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
collections:
|
||||
wayback:
|
||||
index_paths: cdx+http://myarchive.example.com/RemoteIndex
|
||||
archive_paths: http://myarchive.example.com/RemoteStore/
|
||||
|
||||
Local Collection / Access Point
|
||||
-------------------------------
|
||||
|
||||
An OpenWayback configuration with a local collection and local CDX, for example:
|
||||
|
||||
.. code:: xml
|
||||
|
||||
<bean id="collection" class="org.archive.wayback.webapp.WaybackCollection">
|
||||
<property name="resourceIndex">
|
||||
<bean class="org.archive.wayback.resourceindex.cdxserver.EmbeddedCDXServerIndex">
|
||||
...
|
||||
<property name="cdxServer">
|
||||
<bean class="org.archive.cdxserver.CDXServer">
|
||||
<property name="cdxSource">
|
||||
<bean class="org.archive.format.cdx.MultiCDXInputSource">
|
||||
<property name="cdxUris">
|
||||
<list>
|
||||
<value>/wayback/cdx/mycdx1.cdx</value>
|
||||
<value>/wayback/cdx/mycdx2.cdx</value>
|
||||
</list>
|
||||
</property>
|
||||
</bean>
|
||||
</property>
|
||||
<property name="cdxFormat" value="cdx11"/>
|
||||
<property name="surtMode" value="true"/>
|
||||
</bean>
|
||||
</property>
|
||||
...
|
||||
</bean>
|
||||
</property>
|
||||
</bean>
|
||||
|
||||
|
||||
can be configured in pywb using the ``index_paths`` key.
|
||||
|
||||
Note that the CDX files should all be in the same format. See :ref:`migrating-cdx` for more info on converting
|
||||
CDX to pywb native CDXJ format.
|
||||
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
collections:
|
||||
wayback:
|
||||
index_paths: /wayback/cdx/
|
||||
archive_paths: ...
|
||||
|
||||
|
||||
It's also possible to combine directories, individual CDX files, and even a remote index from OutbackCDX in a single collection
|
||||
(as long as all CDX are in the same format).
|
||||
|
||||
pywb will query all the sources simultaneously to find the best match.
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
collections:
|
||||
wayback:
|
||||
index_group:
|
||||
cdx1: /wayback/cdx1/
|
||||
cdx2: /wayback/cdx2/mycdx.cdx
|
||||
remote: cdx+https://myarchive.example.com/outbackcdx
|
||||
|
||||
archive_paths: ...
|
||||
|
||||
However, OutbackCDX is still recommended to avoid more complex CDX configurations.
|
||||
|
||||
|
||||
WatchedCDXSource
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
OpenWayback includes a 'Watched CDX Source' option which watches a directory for new CDX indexes.
|
||||
This functionality is default in pywb when specifying a directory for the index path:
|
||||
|
||||
For example, the config:
|
||||
|
||||
.. code:: xml
|
||||
|
||||
<property name="source">
|
||||
<bean class="org.archive.wayback.resourceindex.WatchedCDXSource">
|
||||
<property name="recursive" value="false" />
|
||||
<property name="filters">
|
||||
<list>
|
||||
<value>^.+\.cdx$</value>
|
||||
</list>
|
||||
</property>
|
||||
<property name="path" value="/wayback/cdx-index/" />
|
||||
</bean>
|
||||
</property>
|
||||
|
||||
can be replaced with:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
collections:
|
||||
wayback:
|
||||
index_paths: /wayback/cdx-index/
|
||||
archive_paths: ...
|
||||
|
||||
|
||||
pywb will load all CDX from that directory.
|
||||
|
||||
|
||||
ZipNum Cluster Index
|
||||
--------------------
|
||||
|
||||
pywb also supports using a compressed :ref:`zipnum` instead of a plain text CDX. For example, the following OpenWayback configuration:
|
||||
|
||||
.. code:: xml
|
||||
|
||||
<bean id="collection" class="org.archive.wayback.webapp.WaybackCollection">
|
||||
<property name="resourceIndex">
|
||||
<bean class="org.archive.wayback.resourceindex.LocalResourceIndex">
|
||||
...
|
||||
<property name="source">
|
||||
<bean class="org.archive.wayback.resourceindex.ZipNumClusterSearchResultSource">
|
||||
<property name="cluster">
|
||||
<bean class="org.archive.format.gzip.zipnum.ZipNumCluster">
|
||||
<property name="summaryFile" value="/webarchive/zipnum-cdx/all.summary"></property>
|
||||
<property name="locFile" value="/webarchive/zipnum-cdx/all.loc"></property>
|
||||
</bean>
|
||||
</property>
|
||||
...
|
||||
</bean>
|
||||
</property>
|
||||
</bean>
|
||||
|
||||
can simply be converted to the pywb config:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
collections:
|
||||
wayback:
|
||||
index_paths: /webarchive/zipnum-cdx
|
||||
|
||||
# if the index is not surt ordered
|
||||
surt_ordered: false
|
||||
|
||||
|
||||
pywb will automatically determine the ``.summary`` and use the ``.loc`` files for the ZipNum Cluster if they are present in the directory.
|
||||
|
||||
Note that if the ZipNum index is **not** SURT ordered, the ``surt_ordered: false`` flag must be added to support this format.
|
||||
|
||||
|
||||
|
||||
Path Index Configuration
|
||||
------------------------
|
||||
|
||||
OpenWayback supports a 'path index' that can be used to look up a WARC by filename and map to an exact path.
|
||||
For compatibility, pywb supports the same path index lookup, as well as loading WARC files by path or URL prefix.
|
||||
|
||||
|
||||
For example, an OpenWayback configuration that includes a path index:
|
||||
|
||||
.. code:: xml
|
||||
|
||||
<bean id="resourcefilelocationdb" class="org.archive.wayback.resourcestore.locationdb.FlatFileResourceFileLocationDB">
|
||||
<property name="path" value="/archive/warc-paths.txt"/>
|
||||
</bean>
|
||||
|
||||
<bean id="resourceStore" class="org.archive.wayback.resourcestore.LocationDBResourceStore">
|
||||
<property name="db" ref="resourcefilelocationdb" />
|
||||
</bean>
|
||||
|
||||
|
||||
can be configured in the ``archive_paths`` field of pywb collection configuration:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
collections:
|
||||
wayback:
|
||||
index_paths: ...
|
||||
archive_paths: /archive/warc-paths.txt
|
||||
|
||||
|
||||
The path index is a tab-delimited text file for mapping WARC filenames to full file paths or URLs, eg:
|
||||
|
||||
.. code::
|
||||
|
||||
example.warc.gz<tab>/some/path/to/example.warc.gz
|
||||
another.warc.gz<tab>/some-other/path/another.warc.gz
|
||||
remote.warc.gz<tab>http://warcstore.example.com/serve/remote.warc.gz
|
||||
|
||||
|
||||
However, if all WARC files are stored in the same directory, or in a few directories, a path index is not needed and pywb will try loading the WARC by prefix.
|
||||
|
||||
The ``archive_paths`` can accept a list of entries. For example, given the config:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
collections:
|
||||
wayback:
|
||||
index_paths: ...
|
||||
archive_paths:
|
||||
- /archive/warcs1/
|
||||
- /archive/warcs2/
|
||||
- https://myarchive.example.com/warcs/
|
||||
- /archive/warc-paths.txt
|
||||
|
||||
|
||||
And the WARC file: ``example.warc.gz``, pywb will try to find the WARC in order from:
|
||||
|
||||
.. code::
|
||||
|
||||
1. /archive/warcs1/example.warc.gz
|
||||
2. /archive/warcs2/example.warc.gz
|
||||
3. https://myarchive.example.com/warcs/example.warc.gz
|
||||
4. Looking up example.warc.gz in /archive/warc-paths.txt
|
||||
|
||||
|
||||
Proxy Mode Access
|
||||
-----------------
|
||||
|
||||
A OpenWayback configuration may include many beans to support proxy mode, eg:
|
||||
|
||||
.. code:: xml
|
||||
|
||||
<bean id="proxyreplaydispatcher" class="org.archive.wayback.replay.SelectorReplayDispatcher">
|
||||
...
|
||||
<property name="renderer">
|
||||
<bean class="org.archive.wayback.proxy.HttpsRedirectAndLinksRewriteProxyHTMLMarkupReplayRenderer">
|
||||
...
|
||||
<property name="uriConverter">
|
||||
<bean class="org.archive.wayback.proxy.ProxyHttpsResultURIConverter"/>
|
||||
</property>
|
||||
</bean>
|
||||
</propery>
|
||||
</bean>
|
||||
<bean name="proxy" class="org.archive.wayback.webapp.AccessPoint">
|
||||
<property name="internalPort" value="${proxy.port}"/>
|
||||
<property name="accessPointPath" value="${proxy.port}" />
|
||||
<property name="collection" ref="localcdxcollection" />
|
||||
...
|
||||
</bean>
|
||||
|
||||
|
||||
In pywb, the proxy mode can be enabled by adding to the main ``config.yaml`` the name of the collection
|
||||
that should be served in proxy mode:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
proxy:
|
||||
source_coll: wayback
|
||||
|
||||
|
||||
There are some differences between OpenWayback and pywb proxy mode support.
|
||||
|
||||
In OpenWayback, proxy mode is configured using separate access points for different collections on different ports.
|
||||
OpenWayback only supports HTTP proxy and attempts to rewrite HTTPS URLs to HTTP.
|
||||
|
||||
In pywb, proxy mode is enabled on the same port as regular access, and pywb supports HTTP and HTTPS proxy.
|
||||
pywb does not attempt to rewrite HTTPS to HTTP, as most browsers disallow HTTP access as insecure for many sites.
|
||||
pywb supports a default collection that is enabled for proxy mode, and a default timestamp accessed by the proxy mode.
|
||||
(Switching the collection and date accessed is possible but not currently supported without extensions to pywb).
|
||||
|
||||
To support HTTPS access, pywb provides a certificate authority that can be trusted by a browser to rewrite HTTPS content.
|
||||
|
||||
See :ref:`https-proxy` for all of the options of pywb proxy mode configuration.
|
||||
|
80
docs/manual/owb-to-pywb-deploy.rst
Normal file
80
docs/manual/owb-to-pywb-deploy.rst
Normal file
@ -0,0 +1,80 @@
|
||||
Deploying pywb: Collection Paths and routing with Nginx/Apache
|
||||
======================================================
|
||||
|
||||
In pywb, the collection name is also the access point, and each of the collections in ``config.yaml``
|
||||
can be accessed by their name as the subpath:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
collections:
|
||||
wayback:
|
||||
...
|
||||
|
||||
another-collection:
|
||||
...
|
||||
|
||||
If pywb is deployed on port 8080, each collection will be available under:
|
||||
``http://<hostname>/wayback/*/https://example.com/`` and ``http://<hostname>/another-collection/*/https://example.com/``
|
||||
|
||||
To make a collection available under the root, simply set its name to: ``$root``
|
||||
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
collections:
|
||||
$root:
|
||||
...
|
||||
|
||||
another-collection:
|
||||
...
|
||||
|
||||
|
||||
Now, the first collection is available at: ``http://<hostname>/*/https://example.com/``.
|
||||
|
||||
|
||||
To deploy pywb on a subdirectory, eg. ``http://<hostname>/pywb/another-collection/*/https://example.com/``,
|
||||
|
||||
and in general, for production use, it is recommended to deploy pywb behind an Nginx or Apache reverse proxy.
|
||||
|
||||
|
||||
Nginx and Apache Reverse Proxy
|
||||
------------------------------
|
||||
|
||||
The recommended deployment for pywb is with uWSGI and behind an Nginx or Apache frontend.
|
||||
|
||||
This configuration allows for more robust deployment, and allowing these servers to handle static files.
|
||||
|
||||
|
||||
See the :ref:`nginx-deploy` and :ref:`apache-deploy` sections for more info on deploying with Nginx and Apache.
|
||||
|
||||
|
||||
Working Docker Compose Examples
|
||||
-------------------------------
|
||||
|
||||
The pywb `Deployment Examples <https://github.com/webrecorder/pywb/blob/main/sample-deploy/>`_ include working examples of deploying pywb with Nginx, Apache and OutbackCDX
|
||||
in Docker using Docker Compose, widely available container orchestration tools.
|
||||
|
||||
See `Installing Docker <https://docs.docker.com/get-docker/>`_ and `Installing Docker Compose <https://docs.docker.com/compose/install/>`_ for instructions on how to install these tools.
|
||||
|
||||
The examples are available in the ``sample-deploy`` directory of the pywb repo. The examples include:
|
||||
|
||||
- ``docker-compose-outback.yaml`` -- Docker Compose config to start OutbackCDX and pywb, and ingest sample data into OutbackCDX
|
||||
- ``docker-compose-nginx.yaml`` -- Docker Compose config to launch pywb and latest Nginx, with pywb running on subdirectory ``/wayback`` and Nginx serving static files from pywb.
|
||||
- ``docker-compose-apache.yaml`` -- Docker Compose config to launch pywb and latest Apache, with pywb running on subdirectory ``/wayback`` and Apache serving static files from pywb.
|
||||
|
||||
|
||||
The examples are designed to be run one at a time, and assume port 8080 is available.
|
||||
|
||||
After installing Docker and Docker Compose, run either of:
|
||||
|
||||
- ``docker-compose -f docker-compose-outback.yaml up``
|
||||
- ``docker-compose -f docker-compose-nginx.yaml up``
|
||||
- ``docker-compose -f docker-compose-apache.yaml up``
|
||||
|
||||
This will download the standard Docker images and start all of the components in Docker.
|
||||
|
||||
If everything works correctly, you should be able to access: ``http://localhost:8080/pywb/https://example.com/`` to view the sample pywb collection.
|
||||
|
||||
Press CTRL+C to interrupt and stop the example in the console.
|
||||
|
||||
|
68
docs/manual/owb-to-pywb-exclusions.rst
Normal file
68
docs/manual/owb-to-pywb-exclusions.rst
Normal file
@ -0,0 +1,68 @@
|
||||
Migrating Exclusion Rules
|
||||
=========================
|
||||
|
||||
pywb includes a new :ref:`access-control` system, which allows granual allow/block/exclude access control rules on paths and subpaths.
|
||||
|
||||
The rules are configured in .aclj files, and a command-line utility exists to import OpenWayback exclusions
|
||||
into the pywb ACLJ format.
|
||||
|
||||
For example, given an OpenWayback exclusion list configuration for a static file:
|
||||
|
||||
.. code:: xml
|
||||
|
||||
<bean id="excluder-factory-static" class="org.archive.wayback.accesscontrol.staticmap.StaticMapExclusionFilterFactory">
|
||||
<property name="file" value="/archive/exclusions.txt"/>
|
||||
<property name="checkInterval" value="600000" />
|
||||
</bean>
|
||||
|
||||
|
||||
The exclusions file can be converted to an .aclj file by running: ::
|
||||
|
||||
wb-manager acl importtxt /archive/exclusions.aclj /archive/exclusions.txt exclude
|
||||
|
||||
|
||||
Then, in the pywb config, specify:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
collections:
|
||||
wayback:
|
||||
index_paths: ...
|
||||
archive_paths: ...
|
||||
acl_paths: /archive/exclusions.aclj
|
||||
|
||||
|
||||
It is possible to specify multiple access control files, which will all be applied.
|
||||
|
||||
Using ``block`` instead of ``exclude`` will result in pywb returning a 451 error, indicating that URLs are in the index but blocked.
|
||||
|
||||
|
||||
CLI Tool
|
||||
--------
|
||||
|
||||
After exclusions have been imported, it is recommended to use ``wb-manager acl`` command-line tool for managing exclusions:
|
||||
|
||||
|
||||
To add an exclusion, run: ::
|
||||
|
||||
wb-manager acl add /archive/exclusions.aclj http://httpbin.org/anything/something exclude
|
||||
|
||||
To remove an exclusion, run: ::
|
||||
|
||||
wb-manager acl remove /archive/exclusions.aclj http://httpbin.org/anything/something
|
||||
|
||||
|
||||
For more options, see the full :ref:`access-control` documentation or run ``wb-manager acl --help``.
|
||||
|
||||
|
||||
Not Yet Supported
|
||||
-----------------
|
||||
|
||||
Some OpenWayback exclusion options are not yet supported in pywb.
|
||||
The following is not yet supported in the access control system:
|
||||
|
||||
- Exclusions/Access Control By specific date range
|
||||
- Regex based exclusions
|
||||
- Date Range Embargo on All URLs
|
||||
- Robots.txt-based exclusions
|
||||
|
21
docs/manual/owb-transition.rst
Normal file
21
docs/manual/owb-transition.rst
Normal file
@ -0,0 +1,21 @@
|
||||
.. _transition-openwayback:
|
||||
|
||||
OpenWayback Transition Guide
|
||||
============================
|
||||
|
||||
This guide provides guidelines for transtioning from OpenWayback to pywb,
|
||||
with additional recommendations. The main recommendation is to run pywb along
|
||||
with OutbackCDX and nginx, and this configuration is covered below, along with additional options.
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
owb-pywb-terms
|
||||
outbackcdx
|
||||
migrating-cdx
|
||||
owb-to-pywb-config
|
||||
owb-to-pywb-exclusions
|
||||
owb-to-pywb-deploy
|
||||
|
||||
|
@ -7,7 +7,7 @@ pywb includes a sophisticated server and client-side rewriting systems, includin
|
||||
configuration for domain and content-specific rewriting rules, fuzzy index matching for replay,
|
||||
and a thorough client-side JS rewriting system.
|
||||
|
||||
With pywb 2.3.0, the client-side rewriting system exists in a separate module at `https://github.com/webrecorder/wombat``
|
||||
With pywb 2.3.0, the client-side rewriting system exists in a separate module at ``https://github.com/webrecorder/wombat``
|
||||
|
||||
|
||||
URL Rewriting
|
||||
@ -92,7 +92,7 @@ Configuring Rewriters
|
||||
---------------------
|
||||
|
||||
pywb provides customizable rewriting based on content-type, the available types are configured
|
||||
in the :py:mod:`pywb.rewriter.default_rewriter`, which specifies rewriter classes per known type,
|
||||
in the :py:mod:`pywb.rewrite.default_rewriter`, which specifies rewriter classes per known type,
|
||||
and mapping of content-types to rewriters.
|
||||
|
||||
|
||||
@ -118,6 +118,7 @@ JS Rewriting
|
||||
The JS rewriter is applied to inline ``<script>`` blocks, or inline attribute js, and any files determine to be javascript (based on content type and ``js_`` modifier).
|
||||
|
||||
The default JS rewriter does not rewrite any links. Instead, JS rewriter performs limited regular expression on the following:
|
||||
|
||||
* ``postMessage`` calls
|
||||
* certain ``this`` property accessors
|
||||
* specific ``location =`` assignment
|
||||
@ -126,7 +127,7 @@ Then, the entire script block is wrapped in a special code block to be executed
|
||||
|
||||
The server-side rewriting is to aid the client-side execution of wrapped code.
|
||||
|
||||
For more information, see :py:mod:`pywb.rewriter.regex_rewriters.JSWombatProxyRewriterMixin`
|
||||
For more information, see :py:mod:`pywb.rewrite.regex_rewriters.JSWombatProxyRewriterMixin`
|
||||
|
||||
|
||||
JSONP Rewriting
|
||||
@ -140,7 +141,7 @@ For example, a requested url might be ``/my-coll/http://example.com?callback=jQu
|
||||
|
||||
To ensure the JSONP callback works as expected, the content is rewritten to ``jQuery123(...)`` -> ``jQuery456(...)``
|
||||
|
||||
For more information, see :py:mod:`pywb.rewriter.jsonp_rewriter`
|
||||
For more information, see :py:mod:`pywb.rewrite.jsonp_rewriter`
|
||||
|
||||
|
||||
DASH and HLS Rewriting
|
||||
@ -148,5 +149,5 @@ DASH and HLS Rewriting
|
||||
|
||||
To support recording and replaying, adaptive streaming formants (DASH and HLS), pywb can perform special rewriting on the manifests for these formats to remoe all but one possible resolution/format. As a result, the non-deterministic format selection is reduced to a single consistent format.
|
||||
|
||||
For more information, see :py:mod:`pywb.rewriter.rewrite_hls` and :py:mod:`pywb.rewriter.rewrite_dash` and the tests in ``pywb/rewrite/test/test_content_rewriter.py``
|
||||
For more information, see :py:mod:`pywb.rewrite.rewrite_hls` and :py:mod:`pywb.rewrite.rewrite_dash` and the tests in ``pywb/rewrite/test/test_content_rewriter.py``
|
||||
|
||||
|
367
docs/manual/template-guide.rst
Normal file
367
docs/manual/template-guide.rst
Normal file
@ -0,0 +1,367 @@
|
||||
.. _template-guide:
|
||||
|
||||
Template Guide
|
||||
==============
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
This guide provides a reference of all of the templates available in pywb and how they could be modified.
|
||||
|
||||
These templates are found in the ``pywb/templates`` directory and can be overridden as needed, one HTML page at a time.
|
||||
|
||||
Template variables are listed as ``{{ variable }}`` to indicate the syntax used for rendering the value of the variable in Jinja2.
|
||||
|
||||
Copying a Template For Modification
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
To modify a template, it is often useful to start with the default template. To do so, simply copy a default template
|
||||
to a local ``templates`` directory.
|
||||
|
||||
For convenience, you can also run: ``wb-manager template --add <template-name>`` to add the template automatically.
|
||||
|
||||
For a list of available templates that can be overridden in this way, run ``wb-manager template --list``.
|
||||
|
||||
|
||||
Per-Collection Templates
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Certain templates can be customized per-collection, instead of for all of pywb.
|
||||
|
||||
To override a template for a specific collection only, run ``wb-manager template --add <template-name> <coll-name>``
|
||||
|
||||
For example:
|
||||
|
||||
|
||||
.. code:: console
|
||||
|
||||
wb-manager init my-coll
|
||||
wb-manager template --add search_html my-coll
|
||||
|
||||
This will create the file ``collections/my-coll/templates/search.html``, a copy of the default search.html, but configured to be used only
|
||||
for the collection ``my-coll``.
|
||||
|
||||
|
||||
|
||||
Base Templates (and supporting templates)
|
||||
-----------------------------------------
|
||||
|
||||
File: ``base.html``
|
||||
|
||||
This template includes the HTML added to all pages other than framed replay. Shared JS and CSS includes meant for pages other than framed replay can be added here.
|
||||
|
||||
To customize the default pywb UI across multiple pages, the following additional templates
|
||||
can also be overriden:
|
||||
|
||||
* ``head.html`` -- Template containing content to be added to the ``<head>`` of the ``base`` template
|
||||
|
||||
* ``header.html`` -- Template to be added as the first content of the ``<body>`` tag of the ``base`` template
|
||||
|
||||
* ``footer.html`` -- Template for adding content as the "footer" of the ``<body>`` tag of the ``base`` template
|
||||
|
||||
|
||||
Note: The default pywb ``head.html`` and ``footer.html`` are currently blank. They can be populated to customize the rendering, add analytics, etc... as needed. Content such as styles or JS code (for example for analytics) must be added to the ``frame_insert.html`` template as well (details on that template below) to also be included in framed replay.
|
||||
|
||||
|
||||
The ``base.html`` template also provides five blocks that can be supplied by templates that extend it.
|
||||
|
||||
* ``title`` -- Block for supplying the title for the page
|
||||
|
||||
* ``head`` -- Block for adding content to the ``<head>``, includes ``head.html`` template
|
||||
|
||||
* ``header`` -- Block for adding content to the ``<body>`` before the ``body`` block, includes the ``header.html`` template
|
||||
|
||||
* ``body`` -- Block for adding the primary content to template
|
||||
|
||||
* ``footer`` -- Block for adding content to the ``<body>`` after the ``body`` block, includes the ``footer.html`` template
|
||||
|
||||
|
||||
Home, Collection and Search Templates
|
||||
-------------------------------------
|
||||
|
||||
|
||||
Home Page Template
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
||||
File: ``index.html``
|
||||
|
||||
This template renders the home page for pywb, and by default renders a list of available collections.
|
||||
|
||||
|
||||
Template variables:
|
||||
|
||||
* ``{{ routes }}`` - a list of available collection routes.
|
||||
|
||||
* ``{{ all_metadata }}`` - a dictionary of all metadata for all collections, keyed by collection id. See :ref:`custom-metadata` for more info on the custom metadata.
|
||||
|
||||
|
||||
Additionally, the :ref:`shared-template-vars` are also available to the home page template, as well as all other templates.
|
||||
|
||||
|
||||
Collection Page Template
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
File: ``search.html``
|
||||
|
||||
The 'collection page' template is the page rendered when no URL is specified, e.g. ``http://localhost:8080/my-collection/``.
|
||||
|
||||
The default template renders a search page that can be used to start searching for URLs.
|
||||
|
||||
Template variables:
|
||||
|
||||
* ``{{ coll }}`` - the collection name identifier.
|
||||
|
||||
* ``{{ metadata }}`` - an optional dictionary of metadata. See :ref:`custom-metadata` for more info.
|
||||
|
||||
* ``{{ ui }}`` - an optional ``ui`` dictionary from ``config.yaml``, if any
|
||||
|
||||
|
||||
.. _custom-metadata:
|
||||
|
||||
Custom Metadata
|
||||
"""""""""""""""
|
||||
|
||||
If custom collection metadata is provided, this page will automatically show this metadata as well.
|
||||
|
||||
It is possible to also add custom metadata per-collection that will be available to the collection.
|
||||
|
||||
For dynamic collections, any fields placed in ``<coll_name>/metadata.yaml`` files can be accessed
|
||||
|
||||
via the ``{{ metadata }}`` variable.
|
||||
|
||||
For example, if the metadata file contains:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
somedata: value
|
||||
|
||||
Accessing ``{{ metadata.somedata }}`` will resolve to ``value``.
|
||||
|
||||
The metadata can also be added via commandline: ``wb-manager metadata myCollection --set somedata=value``.
|
||||
|
||||
|
||||
URL Query/Calendar Page Template
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
File: ``query.html``
|
||||
|
||||
This template is rendered for any URL search response pages, either a single URL or more complex queries.
|
||||
|
||||
For example, the page ``http://localhost:8080/my-collection/*/https://example.com/`` will be rendered using this template, with functionality provided by a Vue application.
|
||||
|
||||
Template variables:
|
||||
|
||||
* ``{{ url }}`` - the URL being queried, e.g. ``https://example.com/``
|
||||
|
||||
* ``{{ prefix }}`` - the collection prefix that will be used for replay, e.g. ``http://localhost:8080/my-collection/``
|
||||
|
||||
* ``{{ ui }}`` - an optional ``ui`` dictionary from ``config.yaml``, if any
|
||||
|
||||
* ``{{ static_prefix }}`` - the prefix from which static files will be accessed from, e.g. ``http://localhost:8080/static/``.
|
||||
|
||||
|
||||
Replay and Banner Templates
|
||||
---------------------------
|
||||
|
||||
The following templates are used to configure the replay view itself.
|
||||
|
||||
|
||||
Banner Template
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
File: ``banner.html``
|
||||
|
||||
This template is used to render the banner for framed replay. It is rendered only rendered in the top/outer frame.
|
||||
|
||||
Template variables:
|
||||
|
||||
* ``{{ url }}`` - the URL being replayed.
|
||||
|
||||
* ``{{ timestamp }}`` - the timestamp being replayed, e.g. ``20211226`` in ``http://localhost:8080/pywb/20211226/mp_/https://example.com/``
|
||||
|
||||
* ``{{ is_framed }}`` - true/false if currently in framed mode.
|
||||
|
||||
* ``{{ wb_prefix }}`` - the collection prefix, e.g. ``http://localhost:8080/pywb/``
|
||||
|
||||
* ``{{ host_prefix }}`` - the pywb server origin, e.g. ``http://localhost:8080``
|
||||
|
||||
* ``{{ config }}`` - provides the contents of the ``config.yaml`` as a dictionary.
|
||||
|
||||
* ``{{ ui }}`` - an optional ``ui`` dictionary from ``config.yaml``, if any.
|
||||
|
||||
The default banner creates the UI dynamically in JavaScript using Vue in the ``frame_insert.html`` template.
|
||||
|
||||
|
||||
Custom Banner Template
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
File: ``custom_banner.html``
|
||||
|
||||
This template can be used to render a custom banner for frameless replay. It is blank by default.
|
||||
|
||||
In frameless replay, the content of this template is injected into the ``head_insert.html`` template to render the banner.
|
||||
|
||||
|
||||
Head Insert Template
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
File: ``head_insert.html``
|
||||
|
||||
This template represents the HTML injected into every replay page to add support for client-side rewriting via ``wombat.js``.
|
||||
|
||||
This template is part of the core pywb replay, and modifying this template is not recommended.
|
||||
|
||||
For customizing the banner, modify the ``banner.html`` (framed replay) or ``custom_banner.html`` (frameless replay) template instead.
|
||||
|
||||
|
||||
Top Frame Template
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
||||
File: ``frame_insert.html``
|
||||
|
||||
This template represents the top-level frame that is inserted to render the replay in framed mode.
|
||||
|
||||
By design, this template does *not* extend from the base template.
|
||||
|
||||
This template is responsible for creating the iframe that will render the content.
|
||||
|
||||
This template only renders the banner and is designed *not* to set the encoding to allow the browser to 'detect' the encoding for the containing iframe.
|
||||
For this reason, the template should only contain ASCII text, and %-encode any non-ASCII characters.
|
||||
|
||||
Content such as analytics code that is desired in the top frame of framed replay pages should be added to this template.
|
||||
|
||||
Template variables:
|
||||
|
||||
* ``{{ url }}`` - the URL being replayed.
|
||||
|
||||
* ``{{ timestamp }}`` - the timestamp being replayed, e.g. ``20211226`` in ``http://localhost:8080/pywb/20211226/mp_/https://example.com/``
|
||||
|
||||
* ``{{ wb_url }}`` - A complete ``WbUrl`` object, which contains the ``url``, ``timestamp`` and ``mod`` properties, representing the replay url.
|
||||
|
||||
* ``{{ wb_prefix }}`` - the collection prefix, e.g. ``http://localhost:8080/pywb/``
|
||||
|
||||
* ``{{ is_proxy }}`` - set to true if page is being loaded via an HTTP/S proxy (checks if WSGI env has ``wsgiprox.proxy_host`` set)
|
||||
|
||||
* ``{{ ui }}`` - an optional ``ui`` dictionary from ``config.yaml``, if any.
|
||||
|
||||
|
||||
.. _custom-top-frame:
|
||||
|
||||
Customizing the Top Frame Template
|
||||
""""""""""""""""""""""""""""""""""
|
||||
|
||||
The top-frame used for framed replay can be replaced or augmented
|
||||
by modifying the ``frame_insert.html``.
|
||||
|
||||
To start with modifying the default outer page, you can add it to the current
|
||||
templates directory by running ``wb-manager template --add frame_insert_html``
|
||||
|
||||
To initialize the replay, the outer page should include ``wb_frame.js``,
|
||||
create an ``<iframe>`` element and pass the id (or element itself) to the ``ContentFrame`` constructor:
|
||||
|
||||
.. code-block:: html
|
||||
|
||||
<script src='{{ host_prefix }}/{{ static_path }}/wb_frame.js'> </script>
|
||||
<script>
|
||||
var cframe = new ContentFrame({"url": "{{ url }}" + window.location.hash,
|
||||
"prefix": "{{ wb_prefix }}",
|
||||
"request_ts": "{{ wb_url.timestamp }}",
|
||||
"iframe": "#replay_iframe"});
|
||||
</script>
|
||||
|
||||
|
||||
The outer frame can receive notifications of changes to the replay via ``postMessage``
|
||||
|
||||
For example, to detect when the content frame changed and log the new url and timestamp,
|
||||
use the following script in the outer frame html:
|
||||
|
||||
.. code-block:: javascript
|
||||
|
||||
window.addEventListener("message", function(event) {
|
||||
if (event.data.wb_type == "load" || event.data.wb_type == "replace-url") {
|
||||
console.log("New Url: " + event.data.url);
|
||||
console.log("New Timestamp: " + event.data.ts);
|
||||
}
|
||||
});
|
||||
|
||||
The ``load`` message is sent when a new page is first loaded, while ``replace-url`` is used
|
||||
for url changes caused by content frame History navigation.
|
||||
|
||||
|
||||
Error Templates
|
||||
---------------
|
||||
|
||||
The following templates are used to render errors.
|
||||
|
||||
|
||||
Page Not Found Template
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
File: ``not_found.html`` - template for 404 error pages.
|
||||
|
||||
This template is used to render any 404/page not found errors that can occur when loading a URL that is not in the web archive.
|
||||
|
||||
Template variables:
|
||||
|
||||
* ``{{ url }}`` - the URL of the page
|
||||
|
||||
* ``{{ wbrequest }}`` - the full ``WbRequest`` object which can be used to get additional info about the request.
|
||||
|
||||
|
||||
(The default template checks ``{{ wbrequest and wbrequest.env.pywb_proxy_magic }}`` to determine if the request is via an :ref:`https-proxy` connection or a regular request).
|
||||
|
||||
|
||||
Generic Error Template
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
File: ``error.html`` - generic error template.
|
||||
|
||||
|
||||
This template is used to render all other errors that are not 'page not found'.
|
||||
|
||||
Template variables:
|
||||
|
||||
* ``{{ err_msg }}`` - a shorter error message indicating what went wrong.
|
||||
|
||||
* ``{{ err_details }}`` - additional details about the error.
|
||||
|
||||
|
||||
|
||||
|
||||
.. _shared-template-vars:
|
||||
|
||||
Shared Template Variables
|
||||
-------------------------
|
||||
|
||||
The following template variables are available to all templates.
|
||||
|
||||
* ``{{ env }}`` - contains environment variables passed to pywb.
|
||||
|
||||
* ``{{ env.pywb_proxy_magic }}`` - if set, indicates pywb is accessed via proxy. See :ref:`https-proxy`
|
||||
|
||||
* ``{{ static_prefix }}`` - URL path to use for loading static files.
|
||||
|
||||
|
||||
UI Configuration
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
Starting with pywb 2.7.0, the ``ui`` block in ``config.yaml`` can contain any custom ui-specific settings.
|
||||
|
||||
This block is provided to the ``search.html``, ``query.html`` and ``banner.html`` templates.
|
||||
|
||||
|
||||
Localization Globals
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The Localization system (see: :ref:`localization`) adds several additional template globals, to facilitate listing available locales and getting URLs to switch locales, including:
|
||||
|
||||
* ``{{ _Q() }}`` - a function used to mark certain text for localization, e.g. ``{{ _Q('localize this text') }}``
|
||||
|
||||
* ``{{ env.pywb_lang }}`` - indicates current locale language code used for localization.
|
||||
|
||||
* ``{{ locales }}`` - a list of all available locale language codes, used for iterating over all locales.
|
||||
|
||||
* ``{{ get_locale_prefixes() }}`` - a function which returns the prefixes to use to switch locales.
|
||||
|
||||
* ``{{ switch_locale() }}`` - a function used to render a URL to switch locale for the current page. Ex: ``<a href="{{ switch_locale(locale) }}">{{ locale }}</a>`` renders a link to switch to a specific locale.
|
||||
|
@ -1,141 +1,9 @@
|
||||
.. _ui-customizations:
|
||||
UI Customization
|
||||
================
|
||||
|
||||
UI Customizations
|
||||
-----------------
|
||||
.. toctree::
|
||||
|
||||
pywb supports UI customizations, either for an entire archive,
|
||||
or per-collection. Jinja2 templates are used for rendering all views,
|
||||
and static files can also be added as needed.
|
||||
ui-guide
|
||||
vue-ui
|
||||
template-guide
|
||||
|
||||
Templates
|
||||
^^^^^^^^^
|
||||
|
||||
Default templates, listed below, are found in the ``./pywb/templates/`` directory.
|
||||
|
||||
Custom template files placed in the ``templates`` directory, either in the root or per collection, will override that template.
|
||||
|
||||
To copy the default pywb template to the template directory using the cli tools, run:
|
||||
|
||||
``wb-manager template --add search_html``
|
||||
|
||||
The following page-level templates are available, corresponding to home page, collection page or search results:
|
||||
|
||||
* ``index.html`` -- Home Page Template, used for ``http://my-archive.example.com/``
|
||||
|
||||
* ``search.html`` -- Collection Template, used for each collection page ``http://my-archive.example.com/<coll name>/``
|
||||
|
||||
* ``query.html`` -- Capture Query Page for a given url, used for ``http://my-archive.example.com/<coll name/*/<url>``
|
||||
|
||||
Error Pages:
|
||||
|
||||
* ``not_found.html`` -- Page to show when a url is not found in the archive
|
||||
|
||||
* ``error.html`` -- Generic Error Page for any error (except not found)
|
||||
|
||||
Replay and Banner templates:
|
||||
|
||||
* ``frame_insert.html`` -- Top-frame for framed replay mode (not used with frameless mode)
|
||||
|
||||
* ``head_insert.html`` -- Rewriting code injected into ``<head>`` of each replayed page.
|
||||
This template includes the banner template and itself should generally not need to be modified.
|
||||
|
||||
* ``banner.html`` -- The banner used for frameless replay. Can be set to blank to disable the banner.
|
||||
|
||||
|
||||
To customize the default pywb UI across multiple pages, the following generic templates
|
||||
can also be overriden:
|
||||
|
||||
* ``base.html`` -- The base template used for non-replay related pages.
|
||||
|
||||
* ``head.html`` -- Template containing content to be added to the ``<head>`` of the ``base`` template
|
||||
|
||||
* ``header.html`` -- Template to be added as the first content of the ``<body>`` tag of the ``base`` template
|
||||
|
||||
* ``footer.html`` -- Template for adding content as the "footer" of the ``<body>`` tag of the ``base`` template
|
||||
|
||||
|
||||
The ``base.html`` template also provides five blocks that can be supplied by templates that extend it.
|
||||
|
||||
* ``title`` -- Block for supplying the title for the page
|
||||
|
||||
* ``head`` -- Block for adding content to the ``<head>``, includes ``head.html`` template
|
||||
|
||||
* ``header`` -- Block for adding content to the ``<body>`` before the ``body`` block, includes the ``header.html`` template
|
||||
|
||||
* ``body`` -- Block for adding the primary content to template
|
||||
|
||||
* ``footer`` -- Block for adding content to the ``<body>`` after the ``body`` block, includes the ``footer.html`` template
|
||||
|
||||
Static Files
|
||||
^^^^^^^^^^^^
|
||||
|
||||
The pywb server will automatically support static files placed under the following directories:
|
||||
|
||||
* Files under the root ``static`` directory can be accessed via ``http://my-archive.example.com/static/<filename>``
|
||||
|
||||
* Files under the per-collection ``./collections/<coll name>/static`` directory can be accessed via ``http://my-archive.example.com/static/_/<coll name>/<filename>``
|
||||
|
||||
|
||||
Custom Metadata
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
It is possible to also add custom metadata that will be available in the Jinja2 template.
|
||||
|
||||
For dynamic collections, any fields placed under ``<coll_name>/metadata.yaml`` filed can be accessed
|
||||
|
||||
via the ``{{ metadata }}`` variable.
|
||||
|
||||
For example, if metadata file contains:
|
||||
|
||||
.. ex-block:: yaml
|
||||
|
||||
somedata: value
|
||||
|
||||
Accessing ``{{ metadata.somedata }}`` will resolve to ``value``
|
||||
|
||||
The metadata can also be added via commandline: ``wb-manager metadata myCollection --set somedata=value]``
|
||||
|
||||
|
||||
|
||||
The default collection UI template (search.html) currently lists all of the available metadata fields.
|
||||
|
||||
|
||||
Custom Outer Replay Frame
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The top-frame used for framed replay can be replaced or augmented
|
||||
by modifying the ``frame_insert.html``.
|
||||
|
||||
To start with modifying the default outer page, you can add it to the current
|
||||
templates directory by running ``wb-manager template --add frame_insert_html``
|
||||
|
||||
To initialize the replay, the outer page should include ``wb_frame.js``,
|
||||
create an ``<iframe>`` element and pass the id (or element itself) to the ``ContentFrame`` constructor:
|
||||
|
||||
.. code-block:: html
|
||||
|
||||
<script src='{{ host_prefix }}/{{ static_path }}/wb_frame.js'> </script>
|
||||
<script>
|
||||
var cframe = new ContentFrame({"url": "{{ url }}" + window.location.hash,
|
||||
"prefix": "{{ wb_prefix }}",
|
||||
"request_ts": "{{ wb_url.timestamp }}",
|
||||
"iframe": "#replay_iframe"});
|
||||
</script>
|
||||
|
||||
|
||||
The outer frame can receive notifications of changes to the replay via ``postMessage``
|
||||
|
||||
For example, to detect when the content frame changed and log the new url and timestamp,
|
||||
use the following script to the outer frame html:
|
||||
|
||||
.. code-block:: javascript
|
||||
|
||||
window.addEventListener("message", function(event) {
|
||||
if (event.data.wb_type == "load" || event.data.wb_type == "replace-url") {
|
||||
console.log("New Url: " + event.data.url);
|
||||
console.log("New Timestamp: " + event.data.ts);
|
||||
}
|
||||
});
|
||||
|
||||
The ``load`` message is sent when a new page is first loaded, while ``replace-url`` is used
|
||||
for url changes caused by content frame History navigation.
|
||||
|
91
docs/manual/ui-guide.rst
Normal file
91
docs/manual/ui-guide.rst
Normal file
@ -0,0 +1,91 @@
|
||||
.. _ui-customizations:
|
||||
|
||||
Customization Guide
|
||||
===================
|
||||
|
||||
Most aspects of the pywb user-interface can be customized by changing the default styles, or overriding the HTML templates.
|
||||
|
||||
This guide covers a few different options for customizing the UI.
|
||||
|
||||
|
||||
New Vue-based UI
|
||||
----------------
|
||||
|
||||
With pywb 2.7.0, pywb includes a brand new UI which includes a visual calendar mode and a histogram-based banner.
|
||||
|
||||
See :ref:`vue-ui` for more information on how to enable this UI.
|
||||
|
||||
|
||||
Customizing UI Templates
|
||||
------------------------
|
||||
|
||||
pywb renders HTML using the Jinja2 templating engine, loading default templates from the ``pywb/templates`` directory.
|
||||
|
||||
If running from a custom directory, templates can be placed in the ``templates`` directory and will override the defaults.
|
||||
|
||||
See :ref:`template-guide` for more details on customizing the templates.
|
||||
|
||||
|
||||
Static Files
|
||||
------------
|
||||
|
||||
pywb will automatically support static files placed under the following directories:
|
||||
|
||||
* Files under the root ``static`` directory: ``static/my-file.js`` can be accessed via ``http://localhost:8080/static/my-file.js``
|
||||
|
||||
|
||||
* Files under the per-collection directory: ``./collections/my-coll/static/my-file.js`` can be accessed via ``http://localhost:8080/static/_/my-coll/my-file.js``
|
||||
|
||||
|
||||
It is possible to change these settings via ``config.yaml``:
|
||||
|
||||
* ``static_prefix`` - sets the URL path used in pywb to serve static content (default ``static``)
|
||||
|
||||
* ``static_dir`` - sets the directory name used to read static files on disk (default ``static``)
|
||||
|
||||
While pywb can serve static files, it is recommended to use an existing web server to serve static files, especially if already using it in production.
|
||||
|
||||
For example, this can be done via nginx with:
|
||||
|
||||
|
||||
.. code:: text
|
||||
|
||||
location /wayback/static {
|
||||
alias /pywb/pywb/static;
|
||||
}
|
||||
|
||||
|
||||
Loading Custom Metadata
|
||||
-----------------------
|
||||
|
||||
pywb includes a default mechanism for loading externally defined metadata, loaded from a per-collection ``metadata.yaml`` YAML file at runtime.
|
||||
|
||||
See :ref:`custom-metadata` for more details.
|
||||
|
||||
Additionally, the banner template has access to the contents of the ``config.yaml`` via the ``{{ config }}`` template variable,
|
||||
allowing for passing in arbitrary config information.
|
||||
|
||||
For more dynamic loading of data, the banner and all of the templates can load additional data via JS ``fetch()`` calls.
|
||||
|
||||
|
||||
Embedding pywb in frames
|
||||
------------------------
|
||||
|
||||
It should be possible to embed pywb replay itself as an iframe as needed.
|
||||
|
||||
For customizing the top-level page and banner, see :ref:`custom-top-frame`.
|
||||
|
||||
However, there may be other reasons to embed pywb in an iframe.
|
||||
|
||||
This can be done simply by including something like:
|
||||
|
||||
.. code:: html
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<body>
|
||||
<div>Embedding pywb replay</div>
|
||||
<iframe style="width: 100%; height: 100%" src="http://localhost:8080/pywb/20130729195151/http://test@example.com/"></iframe>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -95,8 +95,8 @@ add the WARC to a new collection and start pywb:
|
||||
|
||||
docker pull webrecorder/pywb
|
||||
docker run -e INIT_COLLECTION=my-web-archive -v /pywb-data:/webarchive \
|
||||
-v /path/to:/source webrecorder/pywb wb-manager add default /path/to/my_warc.warc.gz
|
||||
docker run -p 8080:8080 -v /pywb-data/:/webarchive wayback
|
||||
-v /path/to:/source webrecorder/pywb wb-manager add my-web-archive /source/my_warc.warc.gz
|
||||
docker run -p 8080:8080 -v /pywb-data/:/webarchive webrecorder/pywb wayback
|
||||
|
||||
This example is equivalent to the non-Docker example above.
|
||||
|
||||
@ -114,6 +114,8 @@ Using Existing Web Archive Collections
|
||||
Existing archives of WARCs/ARCs files can be used with pywb with minimal amount of setup. By using ``wb-manager add``,
|
||||
WARC/ARC files will automatically be placed in the collection archive directory and indexed.
|
||||
|
||||
In pywb 2.8.0 and later, preliminary support for WACZ files is also added with ``wb-manager add --unpack-wacz``. This will unpack the provided WACZ file, adding its WARCs and indices to the collection.
|
||||
|
||||
By default ``wb-manager``, places new collections in ``collections/<coll name>`` subdirectory in the current working directory. To specify a different root directory, the ``wb-manager -d <dir>``. Other options can be set in the config file.
|
||||
|
||||
If you have a large number of existing CDX index files, pywb will be able to read them as well after running through a simple conversion process.
|
||||
@ -154,32 +156,40 @@ To enable auto-indexing, run with ``wayback -a`` or ``wayback -a --auto-interval
|
||||
Creating a Web Archive
|
||||
----------------------
|
||||
|
||||
Using Webrecorder
|
||||
^^^^^^^^^^^^^^^^^
|
||||
Using ArchiveWeb.page
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
If you do not have a web archive to test, one easy way to create one is to use `Webrecorder <https://webrecorder.io>`_
|
||||
If you do not have a web archive to test, one easy way to create one is to use the `ArchiveWeb.page <https://archiveweb.page>`_ browser extension for Chrome and other Chromium-based browsers such as Brave Browser. ArchiveWeb.page records pages visited during an archiving session in the browser, and provides means of both replaying and downloading the archived items created.
|
||||
|
||||
After recording, you can click **Stop** and then click `Download Collection` to receive a WARC (`.warc.gz`) file.
|
||||
Follow the instructions in `How To Create Web Archives with ArchiveWeb.page <https://archiveweb.page/en/usage/>`_. After recording, press **Stop** and then `download your collection <https://archiveweb.page/en/download/>`_ to receive a WARC (`.warc.gz`) file. If you choose to download your collection in the WACZ format, the WARC files can be found inside the zipped WACZ in the ``archive/`` directory.
|
||||
|
||||
You can then use this with work with pywb.
|
||||
You can then use your WARCs to work with pywb.
|
||||
|
||||
|
||||
Using pywb Recorder
|
||||
^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The core recording functionality in Webrecorder is also part of :mod:`pywb`. If you want to create a WARC locally, this can be
|
||||
Recording functionality is also part of :mod:`pywb`. If you want to create a WARC locally, this can be
|
||||
done by directly recording into your pywb collection:
|
||||
|
||||
1. Create a collection: ``wb-manager init my-web-archive`` (if you haven't already created a web archive collection)
|
||||
2. Run: ``wayback --record --live -a --auto-interval 10``
|
||||
3. Point your browser to ``http://localhost:8080/my-web-archive/record/<url>``
|
||||
|
||||
For example, to record ``http://example.com/``, visit ``http://localhost:8080/my-web-archive/record/<url>``
|
||||
For example, to record ``http://example.com/``, visit ``http://localhost:8080/my-web-archive/record/http://example.com/``
|
||||
|
||||
In this configuration, the indexing happens every 10 seconds.. After 10 seconds, the recorded url will be accessible for replay, eg:
|
||||
``http://localhost:8080/my-web-archive/http://example.com/``
|
||||
|
||||
|
||||
Using Browsertrix
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
For a more automated browser-based web archiving experience, `Browsertrix <https://browsertrix.com/>`_ provides a web interface for configuring, scheduling, running, reviewing, and curating crawls of web content. Crawl activity is shown in a live screencast of the browsers used for crawling and all web archives created in Browsertrix can be easily downloaded from the application in the WACZ format.
|
||||
|
||||
`Browsertrix Crawler <https://crawler.docs.browsertrix.com/>`_, which provides the underlying crawling functionality of Browsertrix, can also be run standalone in a Docker container on your local computer.
|
||||
|
||||
|
||||
HTTP/S Proxy Mode Access
|
||||
------------------------
|
||||
|
||||
@ -206,6 +216,21 @@ pywb uses the gevent coroutine library, and the default app will support many co
|
||||
|
||||
For larger scale production deployments, running with `uwsgi <http://uwsgi-docs.readthedocs.io/>`_ server application is recommended. The ``uwsgi.ini`` script provided can be used to launch pywb with uwsgi. uwsgi can be scaled to multiple processes to support the necessary workload, and pywb must be run with the `Gevent Loop Engine <http://uwsgi-docs.readthedocs.io/en/latest/Gevent.html>`_. Nginx or Apache can be used as an additional frontend for uwsgi.
|
||||
|
||||
It is recommended to install uwsgi and its dependencies in a Python virtual environment (virtualenv). Consult the uwsgi documentation for `virtualenv support <https://uwsgi-docs.readthedocs.io/en/latest/Python.html#virtualenv-support>`_ for details on how to specify the virtualenv to uwsgi.
|
||||
|
||||
Installation of uswgi in a virtualenv will avoid known issues with installing uwsgi in some Debian-based OSes with Python 3.9+. As an example, in Ubuntu 22.04 with Python 3.10, it is recommended to install uwsgi like so: ::
|
||||
|
||||
sudo apt install -y python3-pip \
|
||||
python3-dev \
|
||||
build-essential \
|
||||
libssl-dev \
|
||||
libffi-dev \
|
||||
python3-setuptools \
|
||||
python3-venv
|
||||
python3 -m venv pywbenv
|
||||
source pywbenv/bin/activate
|
||||
pip install wheel uwsgi pywb
|
||||
|
||||
Although uwsgi does not provide a way to specify command line, all command line options can alternatively be configured via ``config.yaml``. See :ref:`configuring-pywb` for more info on available configuration options.
|
||||
|
||||
Docker Deployment
|
||||
@ -218,18 +243,20 @@ The following will run pywb in Docker directly on port 80:
|
||||
|
||||
.. code:: console
|
||||
|
||||
docker run -p 80:8080 -v /webarchive-data/:/webarchive
|
||||
docker run -p 80:8080 -v /webarchive-data/:/webarchive webrecorder/pywb
|
||||
|
||||
To run pywb in Docker behind a local nginx (as shown below), port 8081 should also be mapped:
|
||||
|
||||
.. code:: console
|
||||
|
||||
docker run -p 8081:8081 -v /webarchive-data/:/webarchive
|
||||
docker run -p 8081:8081 -v /webarchive-data/:/webarchive webrecorder/pywb
|
||||
|
||||
|
||||
See :ref:`getting-started-docker` for more info on using pywb with Docker.
|
||||
|
||||
|
||||
.. _nginx-deploy:
|
||||
|
||||
Sample Nginx Configuration
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
@ -263,29 +290,99 @@ See the `Nginx Docs <https://nginx.org/en/docs/>`_ for a lot more details on how
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
.. _apache-deploy:
|
||||
|
||||
Sample Apache Configuration
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The following Apache configuration snippet can be used to deploy pywb *without* uwsgi. A configuration with uwsgi is also probably possible but this covers the simplest case of launching the `wayback` binary directly.
|
||||
The recommended Apache configuration is to use pywb with ``mod_proxy`` and ``mod_proxy_uwsgi``.
|
||||
|
||||
The configuration assumes pywb is running on port 8080 on localhost, but it could be on a different machine as well.
|
||||
To enable these, ensure that your httpd.conf includes:
|
||||
|
||||
.. code:: apache
|
||||
|
||||
LoadModule proxy_module modules/mod_proxy.so
|
||||
LoadModule proxy_uwsgi_module modules/mod_proxy_uwsgi.so
|
||||
|
||||
|
||||
|
||||
Then, in your config, simply include:
|
||||
|
||||
.. code:: apache
|
||||
|
||||
<VirtualHost *:80>
|
||||
ServerName proxy.example.com
|
||||
Redirect / https://proxy.example.com/
|
||||
DocumentRoot /var/www/html/
|
||||
ProxyPass / uwsgi://pywb:8081/
|
||||
</VirtualHost>
|
||||
|
||||
<VirtualHost *:443>
|
||||
ServerName proxy.example.com
|
||||
SSLEngine on
|
||||
DocumentRoot /var/www/html/
|
||||
ErrorDocument 404 /404.html
|
||||
ProxyPreserveHost On
|
||||
ProxyPass /.well-known/ !
|
||||
ProxyPass / http://localhost:8080/
|
||||
ProxyPassReverse / http://localhost:8080/
|
||||
RequestHeader set "X-Forwarded-Proto" expr=%{REQUEST_SCHEME}
|
||||
</VirtualHost>
|
||||
The configuration assumes uwsgi is started with ``uwsgi uwsgi.ini``
|
||||
|
||||
|
||||
.. _config-acl-header:
|
||||
|
||||
Configuring Access Control Header
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The :ref:`access-control` system allows users to be granted different access settings based on the value of an ACL header, ``X-pywb-ACL-user``.
|
||||
|
||||
The header can be set via Nginx or Apache to grant custom access priviliges based on IP address, password, or other combination of rules.
|
||||
|
||||
For example, to set the value of the header to ``staff`` if the IP of the request is from designated local IP ranges (127.0.0.1, 192.168.1.0/24), the following settings can be added to the configs:
|
||||
|
||||
For Nginx::
|
||||
|
||||
geo $acl_user {
|
||||
# ensure user is set to empty by default
|
||||
default "";
|
||||
|
||||
# optional: add IP ranges to allow privileged access
|
||||
127.0.0.1 "staff";
|
||||
192.168.0.0/24 "staff";
|
||||
}
|
||||
|
||||
...
|
||||
location /wayback/ {
|
||||
...
|
||||
uwsgi_param HTTP_X_PYWB_ACL_USER $acl_user;
|
||||
}
|
||||
|
||||
|
||||
For Apache::
|
||||
|
||||
<If "-R '192.168.1.0/24' || -R '127.0.0.1'">
|
||||
RequestHeader set X-Pywb-ACL-User staff
|
||||
</If>
|
||||
# ensure header is cleared if no match
|
||||
<Else>
|
||||
RequestHeader set X-Pywb-ACL-User ""
|
||||
</Else>
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
Running on Subdirectory Path
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
To run pywb on a subdirectory, rather than at the root of the web server, the recommended configuration is to adjust the ``uwsgi.ini`` to include the subdirectory:
|
||||
For example, to deploy pywb under the ``/wayback`` subdirectory, the ``uwsgi.ini`` can be configured as follows:
|
||||
|
||||
.. code:: ini
|
||||
|
||||
mount = /wayback=./pywb/apps/wayback.py
|
||||
manage-script-name = true
|
||||
|
||||
|
||||
.. _example-deploy:
|
||||
|
||||
Deployment Examples
|
||||
^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The ``sample-deploy`` directory includes working Docker Compose examples for deploying pywb with Nginx and Apache on the ``/wayback`` subdirectory.
|
||||
|
||||
See:
|
||||
- `Docker Compose Nginx <https://github.com/webrecorder/pywb/blob/main/sample-deploy/docker-compose-nginx.yaml>`_ for sample Nginx config.
|
||||
- `Docker Compose Apache <https://github.com/webrecorder/pywb/blob/main/sample-deploy/docker-compose-apache.yaml>`_ for sample Apache config.
|
||||
- `uwsgi_subdir.ini <https://github.com/webrecorder/pywb/blob/main/sample-deploy/uwsgi_subdir.ini>`_ for example subdirectory uwsgi config.
|
||||
|
||||
|
126
docs/manual/vue-ui.rst
Normal file
126
docs/manual/vue-ui.rst
Normal file
@ -0,0 +1,126 @@
|
||||
.. _vue-ui:
|
||||
|
||||
|
||||
Vue-based UI
|
||||
================
|
||||
|
||||
With 2.7.0, pywb introduces a new `Vue UI <https://vuejs.org/>`_ based system, which provides a more feature-rich representation of a web archive.
|
||||
|
||||
|
||||
Overview
|
||||
--------
|
||||
|
||||
Calendar UI
|
||||
^^^^^^^^^^^
|
||||
|
||||
The new calendar UI provides a histogram and a clickable calendar representation of a web archive.
|
||||
|
||||
The calendar is rendered in place of the URL query page from versions before 2.7.0.
|
||||
|
||||
.. image:: images/vue-cal.png
|
||||
:width: 600
|
||||
:alt: Calendar UI Screenshot
|
||||
|
||||
|
||||
Banner Replay UI
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
The new banner histogram allows for zooming in on captures per year, month, week, and day.
|
||||
|
||||
Navigation preserves the different levels. The full calendar UI is also available as a dropdown by clicking the calendar icon.
|
||||
|
||||
The new banner should allow for faster navigation across multiple captures.
|
||||
|
||||
.. image:: images/vue-banner.png
|
||||
:width: 600
|
||||
:alt: Calendar UI Screenshot
|
||||
|
||||
|
||||
Custom Logo
|
||||
^^^^^^^^^^^
|
||||
|
||||
It is possible to configure a custom logo by setting ``ui.logo`` in ``config.yaml`` to a static file.
|
||||
|
||||
If omitted, the standard pywb logo will be used by default.
|
||||
|
||||
If set, the logo should point to a file in the static directory (default is ``static`` but can be changed via the ``static_dir`` config option).
|
||||
|
||||
For example, to use the file ``./static/my-logo.png`` as the logo, set:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
ui:
|
||||
logo: my-logo.png
|
||||
|
||||
|
||||
Logo URL
|
||||
^^^^^^^^
|
||||
|
||||
It is possible to configure the logo to link to any URL by setting ``ui.logo_home_url`` in ``config.yml`` to the URL of your choice.
|
||||
|
||||
If omitted, the logo will not link to any page.
|
||||
|
||||
For example, to have the logo redirect to ``https://example.com/web-archive-landing-page``, set:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
ui:
|
||||
logo_home_url: https://example.com/web-archive-landing-page
|
||||
|
||||
|
||||
Printing
|
||||
^^^^^^^^
|
||||
|
||||
As of pywb 2.8, the replay header includes a print button that prints the contents of the replay iframe.
|
||||
|
||||
This button can be disabled by setting ``ui.disable_printing`` in ``config.yaml`` to any value.
|
||||
|
||||
For example:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
ui:
|
||||
disable_printing: true
|
||||
|
||||
|
||||
Banner Colors
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
It is possible to configure the background color, text color, and button outlines of the header by setting values in the ``ui`` section of ``config.yaml``.
|
||||
|
||||
To customize the header background color, set ``ui.navbar_background_hex`` to the color's hex value, with the initial hash symbol (``#``) omitted. If omitted, ``#f8f9fa`` (Bootstrap 4's ``light``) will be used by default.
|
||||
|
||||
For example, to use the color ``#cff3ff`` as the banner color, set:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
ui:
|
||||
navbar_background_hex: cff3ff
|
||||
|
||||
The navbar text color can similarly be set using the ``ui.navbar_color_hex`` setting.
|
||||
|
||||
The banner's buttons default to Bootstrap 4's ``btn-outline-dark``. To use light-outlined buttons instead, set ``ui.navbar_light_buttons`` equal to any value.
|
||||
|
||||
|
||||
Updating the Vue UI
|
||||
-------------------
|
||||
|
||||
The UI is contained within the ``pywb/vueui`` directory.
|
||||
|
||||
The Vue component sources can be found in ``pywb/vueui/src``.
|
||||
|
||||
Updating the UI requires ``node`` and ``yarn``.
|
||||
|
||||
To install and build, run:
|
||||
|
||||
|
||||
.. code:: console
|
||||
|
||||
cd pywb/vueui
|
||||
yarn install
|
||||
yarn build
|
||||
|
||||
|
||||
This will generate the output to ``pywb/static/vue/vueui.js`` which is loaded from the default templates when the Vue UI rendering is enabled.
|
||||
|
||||
Additional styles for the banner are loaded from ``pywb/static/vue_banner.css``.
|
@ -5,3 +5,5 @@ uwsgi
|
||||
ujson
|
||||
pysocks
|
||||
lxml
|
||||
babel
|
||||
translate_toolkit
|
||||
|
@ -2,6 +2,13 @@ from gevent.monkey import patch_all; patch_all()
|
||||
from argparse import ArgumentParser
|
||||
|
||||
import logging
|
||||
import pkg_resources
|
||||
|
||||
|
||||
#=============================================================================
|
||||
def get_version():
|
||||
"""Get version of the pywb"""
|
||||
return "pywb " + pkg_resources.get_distribution("pywb").version
|
||||
|
||||
|
||||
#=============================================================================
|
||||
@ -40,6 +47,8 @@ class BaseCli(object):
|
||||
:param str desc: The description for the application to be started
|
||||
"""
|
||||
parser = ArgumentParser(description=desc)
|
||||
parser.add_argument("-V", "--version", action="version", version=get_version())
|
||||
|
||||
parser.add_argument('-p', '--port', type=int, default=default_port,
|
||||
help='Port to listen on (default %s)' % default_port)
|
||||
parser.add_argument('-b', '--bind', default='0.0.0.0',
|
||||
@ -110,7 +119,7 @@ class BaseCli(object):
|
||||
self.extra_config['debug'] = True
|
||||
|
||||
if self.r.record:
|
||||
self.extra_config['recorder'] = 'live'
|
||||
self.extra_config['recorder'] = {'source_coll': 'live'}
|
||||
|
||||
def run(self):
|
||||
"""Start the application"""
|
||||
|
@ -1,15 +1,17 @@
|
||||
from gevent.monkey import patch_all; patch_all()
|
||||
|
||||
from werkzeug.routing import Map, Rule, RequestRedirect, Submount
|
||||
from werkzeug.wsgi import pop_path_info
|
||||
from six.moves.urllib.parse import urljoin
|
||||
from wsgiref.util import shift_path_info
|
||||
from six.moves.urllib.parse import urljoin, parse_qsl
|
||||
from six import iteritems
|
||||
from warcio.utils import to_native_str
|
||||
from warcio.timeutils import iso_date_to_timestamp
|
||||
from warcio.timeutils import iso_date_to_timestamp, timestamp_to_iso_date
|
||||
from wsgiprox.wsgiprox import WSGIProxMiddleware
|
||||
|
||||
from pywb.recorder.multifilewarcwriter import MultiFileWARCWriter
|
||||
from pywb.recorder.recorderapp import RecorderApp
|
||||
from pywb.recorder.filters import SkipDupePolicy, WriteDupePolicy, WriteRevisitDupePolicy
|
||||
from pywb.recorder.redisindexer import WritableRedisIndexer, RedisPendingCounterTempBuffer
|
||||
|
||||
from pywb.utils.loaders import load_yaml_config
|
||||
from pywb.utils.geventserver import GeventServer
|
||||
@ -72,6 +74,7 @@ class FrontEndApp(object):
|
||||
custom_config=custom_config)
|
||||
self.recorder = None
|
||||
self.recorder_path = None
|
||||
self.put_custom_record_path = None
|
||||
self.proxy_default_timestamp = None
|
||||
|
||||
config = self.warcserver.config
|
||||
@ -104,6 +107,8 @@ class FrontEndApp(object):
|
||||
|
||||
self.templates_dir = config.get('templates_dir', 'templates')
|
||||
self.static_dir = config.get('static_dir', 'static')
|
||||
self.static_prefix = config.get('static_prefix', 'static')
|
||||
self.default_locale = config.get('default_locale', '')
|
||||
|
||||
metadata_templ = os.path.join(self.warcserver.root_dir, '{coll}', 'metadata.yaml')
|
||||
self.metadata_cache = MetadataCache(metadata_templ)
|
||||
@ -115,8 +120,8 @@ class FrontEndApp(object):
|
||||
specific routes (proxy mode, record)
|
||||
"""
|
||||
self.url_map = Map()
|
||||
self.url_map.add(Rule('/static/_/<coll>/<path:filepath>', endpoint=self.serve_static))
|
||||
self.url_map.add(Rule('/static/<path:filepath>', endpoint=self.serve_static))
|
||||
self.url_map.add(Rule('/{0}/_/<coll>/<path:filepath>'.format(self.static_prefix), endpoint=self.serve_static))
|
||||
self.url_map.add(Rule('/{0}/<path:filepath>'.format(self.static_prefix), endpoint=self.serve_static))
|
||||
self.url_map.add(Rule('/collinfo.json', endpoint=self.serve_listing))
|
||||
|
||||
if self.is_valid_coll('$root'):
|
||||
@ -171,6 +176,10 @@ class FrontEndApp(object):
|
||||
if self.recorder_path:
|
||||
routes.append(Rule(coll_prefix + self.RECORD_ROUTE + '/<path:url>', endpoint=self.serve_record))
|
||||
|
||||
# enable PUT of custom data as 'resource' records
|
||||
if self.put_custom_record_path:
|
||||
routes.append(Rule(coll_prefix + self.RECORD_ROUTE, endpoint=self.put_custom_record, methods=["PUT"]))
|
||||
|
||||
return routes
|
||||
|
||||
def get_upstream_paths(self, port):
|
||||
@ -207,21 +216,60 @@ class FrontEndApp(object):
|
||||
else:
|
||||
recorder_coll = recorder_config['source_coll']
|
||||
|
||||
# TODO: support dedup
|
||||
# cache mode
|
||||
self.rec_cache_mode = recorder_config.get('cache', 'default')
|
||||
|
||||
dedup_policy = recorder_config.get('dedup_policy')
|
||||
dedup_by_url = False
|
||||
|
||||
if dedup_policy == 'none':
|
||||
dedup_policy = ''
|
||||
|
||||
if dedup_policy == 'keep':
|
||||
dedup_policy = WriteDupePolicy()
|
||||
elif dedup_policy == 'revisit':
|
||||
dedup_policy = WriteRevisitDupePolicy()
|
||||
elif dedup_policy == 'skip':
|
||||
dedup_policy = SkipDupePolicy()
|
||||
dedup_by_url = True
|
||||
elif dedup_policy:
|
||||
msg = 'Invalid option for dedup_policy: {0}'
|
||||
raise Exception(msg.format(dedup_policy))
|
||||
|
||||
if dedup_policy:
|
||||
dedup_index = WritableRedisIndexer(redis_url=self.warcserver.dedup_index_url,
|
||||
dupe_policy=dedup_policy,
|
||||
rel_path_template=self.warcserver.root_dir + '/{coll}/archive')
|
||||
else:
|
||||
dedup_index = None
|
||||
|
||||
|
||||
warc_writer = MultiFileWARCWriter(self.warcserver.archive_paths,
|
||||
max_size=int(recorder_config.get('rollover_size', 1000000000)),
|
||||
max_idle_secs=int(recorder_config.get('rollover_idle_secs', 600)),
|
||||
filename_template=recorder_config.get('filename_template'),
|
||||
dedup_index=dedup_index)
|
||||
dedup_index=dedup_index,
|
||||
dedup_by_url=dedup_by_url)
|
||||
|
||||
if dedup_policy:
|
||||
pending_counter = self.warcserver.dedup_index_url.replace(':cdxj', ':pending')
|
||||
pending_timeout = recorder_config.get('pending_timeout', 30)
|
||||
create_buff_func = lambda params, name: RedisPendingCounterTempBuffer(512 * 1024, pending_counter, params, name, pending_timeout)
|
||||
else:
|
||||
create_buff_func = None
|
||||
|
||||
self.recorder = RecorderApp(self.RECORD_SERVER % str(self.warcserver_server.port), warc_writer,
|
||||
accept_colls=recorder_config.get('source_filter'))
|
||||
accept_colls=recorder_config.get('source_filter'),
|
||||
create_buff_func=create_buff_func)
|
||||
|
||||
recorder_server = GeventServer(self.recorder, port=0)
|
||||
|
||||
self.recorder_path = self.RECORD_API % (recorder_server.port, recorder_coll)
|
||||
|
||||
# enable PUT of custom data as 'resource' records
|
||||
if recorder_config.get('enable_put_custom_record'):
|
||||
self.put_custom_record_path = self.recorder_path + '&put_record={rec_type}&url={url}'
|
||||
|
||||
def init_autoindex(self, auto_interval):
|
||||
"""Initialize and start the auto-indexing of the collections. If auto_interval is None this is a no op.
|
||||
|
||||
@ -317,6 +365,9 @@ class FrontEndApp(object):
|
||||
else:
|
||||
coll_config['metadata'] = self.metadata_cache.load(coll) or {}
|
||||
|
||||
if 'ui' in self.warcserver.config:
|
||||
coll_config['ui'] = self.warcserver.config['ui']
|
||||
|
||||
return coll_config
|
||||
|
||||
def serve_coll_page(self, environ, coll='$root'):
|
||||
@ -334,6 +385,7 @@ class FrontEndApp(object):
|
||||
|
||||
coll_config = self.get_coll_config(coll)
|
||||
metadata = coll_config.get('metadata')
|
||||
ui = coll_config.get('ui', {})
|
||||
|
||||
view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html')
|
||||
|
||||
@ -345,7 +397,8 @@ class FrontEndApp(object):
|
||||
wb_prefix=wb_prefix,
|
||||
coll=coll,
|
||||
coll_config=coll_config,
|
||||
metadata=metadata)
|
||||
metadata=metadata,
|
||||
ui=ui)
|
||||
|
||||
return WbResponse.text_response(content, content_type='text/html; charset="utf-8"')
|
||||
|
||||
@ -362,6 +415,14 @@ class FrontEndApp(object):
|
||||
# if coll == self.all_coll:
|
||||
# coll = '*'
|
||||
|
||||
config = self.warcserver.get_coll_config(coll)
|
||||
is_live = config.get("index") == "$live"
|
||||
|
||||
if is_live:
|
||||
cache_control = "no-store, no-cache"
|
||||
else:
|
||||
cache_control = "max-age=86400, must-revalidate"
|
||||
|
||||
cdx_url = base_url.format(coll=coll)
|
||||
|
||||
if environ.get('QUERY_STRING'):
|
||||
@ -373,12 +434,19 @@ class FrontEndApp(object):
|
||||
cdx_url += 'limit=' + str(self.query_limit)
|
||||
|
||||
try:
|
||||
res = requests.get(cdx_url, stream=True)
|
||||
headers = {}
|
||||
for key in environ.keys():
|
||||
if key.startswith("HTTP_X_"):
|
||||
headers[key[5:].replace("_", "-")] = environ[key]
|
||||
res = requests.get(cdx_url, stream=True, headers=headers)
|
||||
|
||||
status_line = '{} {}'.format(res.status_code, res.reason)
|
||||
content_type = res.headers.get('Content-Type')
|
||||
|
||||
return WbResponse.bin_stream(StreamIter(res.raw),
|
||||
content_type=content_type)
|
||||
content_type=content_type,
|
||||
status=status_line,
|
||||
headers=[("Cache-Control", cache_control)])
|
||||
|
||||
except Exception as e:
|
||||
return WbResponse.text_response('Error: ' + str(e), status='400 Bad Request')
|
||||
@ -428,6 +496,7 @@ class FrontEndApp(object):
|
||||
coll_config = self.get_coll_config(coll)
|
||||
if record:
|
||||
coll_config['type'] = 'record'
|
||||
coll_config['cache'] = self.rec_cache_mode
|
||||
|
||||
if timemap_output:
|
||||
coll_config['output'] = timemap_output
|
||||
@ -436,6 +505,47 @@ class FrontEndApp(object):
|
||||
|
||||
return self.rewriterapp.render_content(wb_url_str, coll_config, environ)
|
||||
|
||||
def put_custom_record(self, environ, coll="$root"):
|
||||
""" When recording, PUT a custom WARC record to the specified collection
|
||||
(Available only when recording)
|
||||
|
||||
:param dict environ: The WSGI environment dictionary for the request
|
||||
:param str coll: The name of the collection the record is to be served from
|
||||
"""
|
||||
chunks = []
|
||||
while True:
|
||||
buff = environ["wsgi.input"].read()
|
||||
if not buff:
|
||||
break
|
||||
|
||||
chunks.append(buff)
|
||||
|
||||
data = b"".join(chunks)
|
||||
|
||||
params = dict(parse_qsl(environ.get("QUERY_STRING")))
|
||||
|
||||
rec_type = "resource"
|
||||
|
||||
headers = {"Content-Type": environ.get("CONTENT_TYPE", "text/plain")}
|
||||
|
||||
target_uri = params.get("url")
|
||||
|
||||
if not target_uri:
|
||||
return WbResponse.json_response({"error": "no url"}, status="400 Bad Request")
|
||||
|
||||
timestamp = params.get("timestamp")
|
||||
if timestamp:
|
||||
headers["WARC-Date"] = timestamp_to_iso_date(timestamp)
|
||||
|
||||
put_url = self.put_custom_record_path.format(
|
||||
url=target_uri, coll=coll, rec_type=rec_type
|
||||
)
|
||||
res = requests.put(put_url, headers=headers, data=data)
|
||||
|
||||
res = res.json()
|
||||
|
||||
return WbResponse.json_response(res)
|
||||
|
||||
def setup_paths(self, environ, coll, record=False):
|
||||
"""Populates the WSGI environment dictionary with the path information necessary to perform a response for
|
||||
content or record.
|
||||
@ -448,9 +558,9 @@ class FrontEndApp(object):
|
||||
return
|
||||
|
||||
if coll != '$root':
|
||||
pop_path_info(environ)
|
||||
shift_path_info(environ)
|
||||
if record:
|
||||
pop_path_info(environ)
|
||||
shift_path_info(environ)
|
||||
|
||||
paths = [self.warcserver.root_dir]
|
||||
|
||||
@ -493,7 +603,7 @@ class FrontEndApp(object):
|
||||
and message.
|
||||
|
||||
:param dict environ: The WSGI environment dictionary for the request
|
||||
:param str err_type: The identifier for type of error that occured
|
||||
:param str err_type: The identifier for type of error that occurred
|
||||
:param str url: The url of the archived page that was requested
|
||||
"""
|
||||
raise AppPageNotFound(err_type, url)
|
||||
@ -551,16 +661,20 @@ class FrontEndApp(object):
|
||||
urls = self.url_map.bind_to_environ(environ)
|
||||
try:
|
||||
endpoint, args = urls.match()
|
||||
# store original script_name (original prefix) before modifications are made
|
||||
environ['pywb.app_prefix'] = environ.get('SCRIPT_NAME', '')
|
||||
|
||||
self.rewriterapp.prepare_env(environ)
|
||||
|
||||
# store original script_name (original prefix) before modifications are made
|
||||
environ['ORIG_SCRIPT_NAME'] = environ.get('SCRIPT_NAME')
|
||||
|
||||
lang = args.pop('lang', '')
|
||||
if lang:
|
||||
pop_path_info(environ)
|
||||
shift_path_info(environ)
|
||||
|
||||
if lang:
|
||||
environ['pywb_lang'] = lang
|
||||
elif self.default_locale:
|
||||
environ['pywb_lang'] = self.default_locale
|
||||
|
||||
response = endpoint(environ, **args)
|
||||
|
||||
|
@ -2,7 +2,7 @@ from io import BytesIO
|
||||
|
||||
import requests
|
||||
from fakeredis import FakeStrictRedis
|
||||
from six.moves.urllib.parse import unquote, urlencode, urlsplit, urlunsplit
|
||||
from six.moves.urllib.parse import unquote, urlencode, urlsplit, urlunsplit, parse_qsl
|
||||
from warcio.bufferedreaders import BufferedReader
|
||||
from warcio.recordloader import ArcWarcRecordLoader
|
||||
from warcio.timeutils import http_date_to_timestamp, timestamp_to_http_date
|
||||
@ -64,7 +64,7 @@ class RewriterApp(object):
|
||||
|
||||
if not jinja_env:
|
||||
jinja_env = JinjaEnv(globals={'static_path': 'static'},
|
||||
extensions=['jinja2.ext.i18n', 'jinja2.ext.with_'])
|
||||
extensions=['jinja2.ext.i18n'])
|
||||
jinja_env.jinja_env.install_null_translations()
|
||||
|
||||
self.jinja_env = jinja_env
|
||||
@ -72,15 +72,17 @@ class RewriterApp(object):
|
||||
|
||||
self.jinja_env.init_loc(self.config.get('locales_root_dir'),
|
||||
self.config.get('locales'),
|
||||
self.loc_map)
|
||||
self.loc_map,
|
||||
self.config.get('default_locale'))
|
||||
|
||||
self.redirect_to_exact = config.get('redirect_to_exact')
|
||||
|
||||
self.banner_view = BaseInsertView(self.jinja_env, self._html_templ('banner_html'))
|
||||
self.custom_banner_view = BaseInsertView(self.jinja_env, self._html_templ('custom_banner_html'))
|
||||
|
||||
self.head_insert_view = HeadInsertView(self.jinja_env,
|
||||
self._html_templ('head_insert_html'),
|
||||
self.banner_view)
|
||||
self.custom_banner_view)
|
||||
|
||||
self.frame_insert_view = TopFrameView(self.jinja_env,
|
||||
self._html_templ('frame_insert_html'),
|
||||
@ -96,6 +98,8 @@ class RewriterApp(object):
|
||||
|
||||
self.enable_memento = self.config.get('enable_memento')
|
||||
|
||||
self.static_prefix = self.config.get('static_prefix', 'static')
|
||||
|
||||
csp_header = self.config.get('csp-header', self.DEFAULT_CSP)
|
||||
if csp_header:
|
||||
self.csp_header = ('Content-Security-Policy', csp_header)
|
||||
@ -300,15 +304,24 @@ class RewriterApp(object):
|
||||
|
||||
return resp
|
||||
|
||||
def render_content(self, wb_url, kwargs, environ):
|
||||
wb_url = wb_url.replace('#', '%23')
|
||||
wb_url = WbUrl(wb_url)
|
||||
def prepare_env(self, environ):
|
||||
""" setup environ path prefixes and scheme """
|
||||
if 'pywb.host_prefix' in environ:
|
||||
return
|
||||
|
||||
proto = environ.get('HTTP_X_FORWARDED_PROTO', self.force_scheme)
|
||||
|
||||
if proto:
|
||||
environ['wsgi.url_scheme'] = proto
|
||||
|
||||
environ['pywb.host_prefix'] = self.get_host_prefix(environ)
|
||||
environ['pywb.app_prefix'] = environ.get('SCRIPT_NAME', '')
|
||||
environ['pywb.static_prefix'] = environ['pywb.host_prefix'] + environ['pywb.app_prefix'] + '/' + self.static_prefix
|
||||
|
||||
def render_content(self, wb_url, kwargs, environ):
|
||||
wb_url = wb_url.replace('#', '%23')
|
||||
wb_url = WbUrl(wb_url)
|
||||
|
||||
history_page = environ.pop('HTTP_X_WOMBAT_HISTORY_PAGE', '')
|
||||
if history_page:
|
||||
wb_url.url = history_page
|
||||
@ -318,20 +331,19 @@ class RewriterApp(object):
|
||||
|
||||
is_timegate = self._check_accept_dt(wb_url, environ)
|
||||
|
||||
host_prefix = self.get_host_prefix(environ)
|
||||
self.prepare_env(environ)
|
||||
|
||||
host_prefix = environ['pywb.host_prefix']
|
||||
rel_prefix = self.get_rel_prefix(environ)
|
||||
full_prefix = host_prefix + rel_prefix
|
||||
environ['pywb.host_prefix'] = host_prefix
|
||||
pywb_static_prefix = host_prefix + environ.get('pywb.app_prefix', '') + environ.get(
|
||||
'pywb.static_prefix', '/static/')
|
||||
|
||||
pywb_static_prefix = environ['pywb.static_prefix'] + '/'
|
||||
is_proxy = ('wsgiprox.proxy_host' in environ)
|
||||
|
||||
# if OPTIONS in proxy mode, just generate the proxy responss
|
||||
if is_proxy and self.is_preflight(environ):
|
||||
return WbResponse.options_response(environ)
|
||||
|
||||
environ['pywb.host_prefix'] = host_prefix
|
||||
|
||||
if self.use_js_obj_proxy:
|
||||
content_rw = self.js_proxy_rw
|
||||
else:
|
||||
@ -368,13 +380,12 @@ class RewriterApp(object):
|
||||
response = self.handle_query(environ, wb_url, kwargs, full_prefix)
|
||||
|
||||
else:
|
||||
# don't return top-frame response for timegate with exact redirects
|
||||
if not (is_timegate and redirect_to_exact):
|
||||
keep_frame_response = is_timegate and not redirect_to_exact and not is_proxy
|
||||
response = self.handle_custom_response(environ, wb_url,
|
||||
full_prefix, host_prefix,
|
||||
kwargs)
|
||||
|
||||
keep_frame_response = (not kwargs.get('no_timegate_check') and is_timegate and not is_proxy) or redirect_to_exact
|
||||
|
||||
|
||||
if response and not keep_frame_response:
|
||||
return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy)
|
||||
@ -453,8 +464,12 @@ class RewriterApp(object):
|
||||
|
||||
return self.send_redirect(new_path, url_parts, urlrewriter)
|
||||
|
||||
|
||||
# only redirect to exact if not live, otherwise set to false
|
||||
redirect_to_exact = redirect_to_exact and not cdx.get('is_live')
|
||||
|
||||
# return top-frame timegate response, with timestamp from cdx
|
||||
if response and keep_frame_response:
|
||||
if response and keep_frame_response and (not redirect_to_exact or not is_timegate):
|
||||
no_except_close(r.raw)
|
||||
return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy, cdx['timestamp'])
|
||||
|
||||
@ -475,8 +490,8 @@ class RewriterApp(object):
|
||||
if target_uri != wb_url.url and cdx.get('is_fuzzy') == '1':
|
||||
set_content_loc = True
|
||||
|
||||
# if redirect to exact timestamp, bit only if not live
|
||||
if redirect_to_exact and not cdx.get('is_live'):
|
||||
# if redirect to exact timestamp (only set if not live)
|
||||
if redirect_to_exact:
|
||||
if set_content_loc or is_timegate or wb_url.timestamp != cdx.get('timestamp'):
|
||||
new_url = urlrewriter.get_new_url(url=target_uri,
|
||||
timestamp=cdx['timestamp'],
|
||||
@ -518,6 +533,7 @@ class RewriterApp(object):
|
||||
coll=kwargs.get('coll', ''),
|
||||
replay_mod=self.replay_mod,
|
||||
metadata=kwargs.get('metadata', {}),
|
||||
ui=kwargs.get('ui', {}),
|
||||
config=self.config))
|
||||
|
||||
cookie_rewriter = None
|
||||
@ -573,6 +589,9 @@ class RewriterApp(object):
|
||||
if is_proxy and environ.get('HTTP_ORIGIN'):
|
||||
response.add_access_control_headers(environ)
|
||||
|
||||
if r.status_code == 200 and kwargs.get('cache') == 'always' and environ.get('HTTP_REFERER'):
|
||||
response.status_headers['Cache-Control'] = 'public, max-age=31536000, immutable'
|
||||
|
||||
return response
|
||||
|
||||
def format_response(self, response, wb_url, full_prefix, is_timegate, is_proxy, timegate_closest_ts=None):
|
||||
@ -680,7 +699,7 @@ class RewriterApp(object):
|
||||
return self._error_response(environ, wbe)
|
||||
|
||||
def _not_found_response(self, environ, url):
|
||||
resp = self.not_found_view.render_to_string(environ, url=url)
|
||||
resp = self.not_found_view.render_to_string(environ, url=url, err_msg="Not Found")
|
||||
|
||||
return WbResponse.text_response(resp, status='404 Not Found', content_type='text/html')
|
||||
|
||||
@ -700,6 +719,8 @@ class RewriterApp(object):
|
||||
headers = {'Content-Length': str(len(req_data)),
|
||||
'Content-Type': 'application/request'}
|
||||
|
||||
headers.update(inputreq.warcserver_headers)
|
||||
|
||||
if skip_record:
|
||||
headers['Recorder-Skip'] = '1'
|
||||
|
||||
@ -782,8 +803,17 @@ class RewriterApp(object):
|
||||
def handle_query(self, environ, wb_url, kwargs, full_prefix):
|
||||
prefix = self.get_full_prefix(environ)
|
||||
|
||||
res = dict(parse_qsl(environ.get("QUERY_STRING")))
|
||||
is_advanced = res.get("matchType", "exact") != "exact" or res.get("url", "").endswith("*")
|
||||
|
||||
# vue ui not supported for advanced search for now
|
||||
ui = kwargs.get("ui", {})
|
||||
if is_advanced:
|
||||
ui["vue_calendar_ui"] = False
|
||||
|
||||
params = dict(url=wb_url.url,
|
||||
prefix=prefix)
|
||||
prefix=prefix,
|
||||
ui=ui)
|
||||
|
||||
return self.query_view.render_to_string(environ, **params)
|
||||
|
||||
@ -813,7 +843,7 @@ class RewriterApp(object):
|
||||
|
||||
def get_rel_prefix(self, environ):
|
||||
# return request.script_name
|
||||
return environ.get('SCRIPT_NAME') + '/'
|
||||
return environ.get('SCRIPT_NAME', '') + '/'
|
||||
|
||||
def get_full_prefix(self, environ):
|
||||
return self.get_host_prefix(environ) + self.get_rel_prefix(environ)
|
||||
@ -890,7 +920,9 @@ class RewriterApp(object):
|
||||
pass
|
||||
|
||||
def get_top_frame_params(self, wb_url, kwargs):
|
||||
return {'metadata': kwargs.get('metadata', {})}
|
||||
return {'metadata': kwargs.get('metadata', {}),
|
||||
'ui': kwargs.get('ui', {})
|
||||
}
|
||||
|
||||
def handle_custom_response(self, environ, wb_url, full_prefix, host_prefix, kwargs):
|
||||
if self.is_framed_replay(wb_url):
|
||||
|
@ -164,7 +164,7 @@ class WbResponse(object):
|
||||
try:
|
||||
start_response(self.status_headers.statusline,
|
||||
self.status_headers.headers)
|
||||
except UnicodeError:
|
||||
except (UnicodeError, TypeError):
|
||||
self.try_fix_errors()
|
||||
start_response(self.status_headers.statusline,
|
||||
self.status_headers.headers)
|
||||
@ -212,6 +212,7 @@ class WbResponse(object):
|
||||
self.status_headers.replace_header('Access-Control-Allow-Methods', allowed_methods)
|
||||
self.status_headers.replace_header('Access-Control-Allow-Credentials', 'true')
|
||||
self.status_headers.replace_header('Access-Control-Max-Age', '1800')
|
||||
self.status_headers.replace_header('Cross-Origin-Resource-Policy', 'cross-origin')
|
||||
return self
|
||||
|
||||
def __repr__(self):
|
||||
|
@ -12,9 +12,15 @@ templates_dir: templates
|
||||
|
||||
# Template HTML
|
||||
banner_html: banner.html
|
||||
custom_banner_html: custom_banner.html
|
||||
head_insert_html: head_insert.html
|
||||
frame_insert_html: frame_insert.html
|
||||
|
||||
base_html: base.html
|
||||
header_html: header.html
|
||||
footer_html: footer.html
|
||||
head_html: head.html
|
||||
|
||||
query_html: query.html
|
||||
search_html: search.html
|
||||
not_found_html: not_found.html
|
||||
@ -31,6 +37,7 @@ info_json: collinfo.json
|
||||
# HTML Templates List
|
||||
html_templates:
|
||||
- banner_html
|
||||
- custom_banner_html
|
||||
- head_insert_html
|
||||
- frame_insert_html
|
||||
|
||||
@ -39,6 +46,12 @@ html_templates:
|
||||
- not_found_html
|
||||
|
||||
- home_html
|
||||
|
||||
- base_html
|
||||
- header_html
|
||||
- head_html
|
||||
- footer_html
|
||||
|
||||
- error_html
|
||||
- proxy_cert_download_html
|
||||
- proxy_select_html
|
||||
|
@ -75,6 +75,9 @@ class ArchiveIndexEntryMixin(object):
|
||||
self['urlkey'] = canonicalize(new_url, surt_ordered)
|
||||
other['urlkey'] = self['urlkey']
|
||||
|
||||
self['method'] = post_query.method
|
||||
self['requestBody'] = post_query.query
|
||||
|
||||
referer = other.record.http_headers.get_header('referer')
|
||||
if referer:
|
||||
self['_referer'] = referer
|
||||
|
@ -1,5 +1,9 @@
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
import warcio
|
||||
|
||||
# Use ujson if available
|
||||
try:
|
||||
@ -27,7 +31,6 @@ except ImportError: # pragma: no cover
|
||||
|
||||
|
||||
from argparse import ArgumentParser, RawTextHelpFormatter
|
||||
from bisect import insort
|
||||
|
||||
from six import StringIO
|
||||
|
||||
@ -167,9 +170,10 @@ class SortedCDXWriter(BaseCDXWriter):
|
||||
super(SortedCDXWriter, self).write(entry, filename)
|
||||
line = self.out.getvalue()
|
||||
if line:
|
||||
insort(self.sortlist, line)
|
||||
self.sortlist.append(line)
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.sortlist.sort()
|
||||
self.actual_out.write(''.join(self.sortlist))
|
||||
return False
|
||||
|
||||
@ -298,8 +302,11 @@ def write_multi_cdx_index(output, inputs, **options):
|
||||
with open(fullpath, 'rb') as infile:
|
||||
entry_iter = record_iter(infile)
|
||||
|
||||
try:
|
||||
for entry in entry_iter:
|
||||
writer.write(entry, filename)
|
||||
except warcio.exceptions.ArchiveLoadFailed:
|
||||
logging.error('Error while indexing file %s, %s',filename,traceback.format_exc())
|
||||
|
||||
return writer
|
||||
|
||||
@ -331,13 +338,13 @@ are supported.
|
||||
Some examples:
|
||||
|
||||
* Create "example.cdx" index from example.warc.gz
|
||||
{0} ./cdx/example.cdx ./warcs/example.warc.gz
|
||||
{0} --output ./cdx/example.cdx ./warcs/example.warc.gz
|
||||
|
||||
* Create "combined.cdx", a combined, sorted index of all warcs in ./warcs/
|
||||
{0} --sort combined.cdx ./warcs/
|
||||
{0} --sort --output combined.cdx ./warcs/
|
||||
|
||||
* Create a sorted cdx per file in ./cdx/ for each archive file in ./warcs/
|
||||
{0} --sort ./cdx/ ./warcs/
|
||||
{0} --sort --output ./cdx/ ./warcs/
|
||||
""".format(os.path.basename(sys.argv[0]))
|
||||
|
||||
sort_help = """
|
||||
@ -377,7 +384,7 @@ url timestamp { ... }
|
||||
|
||||
output_help = """
|
||||
Output file or directory.
|
||||
- If directory, each input file is written to a seperate output file
|
||||
- If directory, each input file is written to a separate output file
|
||||
with a .cdx extension
|
||||
- If output is '-', output is written to stdout
|
||||
"""
|
||||
@ -451,7 +458,9 @@ instead of current working directory
|
||||
action='store_true',
|
||||
help=minimal_json_help)
|
||||
|
||||
parser.add_argument('output', nargs='?', default='-', help=output_help)
|
||||
parser.add_argument('-o', '--output',
|
||||
default='-', help=output_help)
|
||||
|
||||
parser.add_argument('inputs', nargs='+', help=input_help)
|
||||
|
||||
cmd = parser.parse_args(args=args)
|
||||
|
@ -101,9 +101,9 @@ org,httpbin)/post?foo=bar 20140610001255 http://httpbin.org/post?foo=bar applica
|
||||
# post append
|
||||
>>> print_cdx_index('post-test.warc.gz', append_post=True)
|
||||
CDX N b a m s k r M S V g
|
||||
org,httpbin)/post?foo=bar&test=abc 20140610000859 http://httpbin.org/post application/json 200 M532K5WS4GY2H4OVZO6HRPOP47A7KDWU - - 720 0 post-test.warc.gz
|
||||
org,httpbin)/post?a=1&b=[]&c=3 20140610001151 http://httpbin.org/post application/json 200 M7YCTM7HS3YKYQTAWQVMQSQZBNEOXGU2 - - 723 1196 post-test.warc.gz
|
||||
org,httpbin)/post?data=^&foo=bar 20140610001255 http://httpbin.org/post?foo=bar application/json 200 B6E5P6JUZI6UPDTNO4L2BCHMGLTNCUAJ - - 723 2395 post-test.warc.gz
|
||||
org,httpbin)/post?__wb_method=post&foo=bar&test=abc 20140610000859 http://httpbin.org/post application/json 200 M532K5WS4GY2H4OVZO6HRPOP47A7KDWU - - 720 0 post-test.warc.gz
|
||||
org,httpbin)/post?__wb_method=post&a=1&b=[]&c=3 20140610001151 http://httpbin.org/post application/json 200 M7YCTM7HS3YKYQTAWQVMQSQZBNEOXGU2 - - 723 1196 post-test.warc.gz
|
||||
org,httpbin)/post?__wb_method=post&data=^&foo=bar 20140610001255 http://httpbin.org/post?foo=bar application/json 200 B6E5P6JUZI6UPDTNO4L2BCHMGLTNCUAJ - - 723 2395 post-test.warc.gz
|
||||
|
||||
# no post append, requests included
|
||||
>>> print_cdx_index('post-test.warc.gz', include_all=True)
|
||||
@ -118,12 +118,12 @@ org,httpbin)/post?foo=bar 20140610001255 http://httpbin.org/post?foo=bar applica
|
||||
# post append + requests included
|
||||
>>> print_cdx_index('post-test.warc.gz', include_all=True, append_post=True)
|
||||
CDX N b a m s k r M S V g
|
||||
org,httpbin)/post?foo=bar&test=abc 20140610000859 http://httpbin.org/post application/json 200 M532K5WS4GY2H4OVZO6HRPOP47A7KDWU - - 720 0 post-test.warc.gz
|
||||
org,httpbin)/post?foo=bar&test=abc 20140610000859 http://httpbin.org/post application/x-www-form-urlencoded - - - - 476 720 post-test.warc.gz
|
||||
org,httpbin)/post?a=1&b=[]&c=3 20140610001151 http://httpbin.org/post application/json 200 M7YCTM7HS3YKYQTAWQVMQSQZBNEOXGU2 - - 723 1196 post-test.warc.gz
|
||||
org,httpbin)/post?a=1&b=[]&c=3 20140610001151 http://httpbin.org/post application/x-www-form-urlencoded - - - - 476 1919 post-test.warc.gz
|
||||
org,httpbin)/post?data=^&foo=bar 20140610001255 http://httpbin.org/post?foo=bar application/json 200 B6E5P6JUZI6UPDTNO4L2BCHMGLTNCUAJ - - 723 2395 post-test.warc.gz
|
||||
org,httpbin)/post?data=^&foo=bar 20140610001255 http://httpbin.org/post?foo=bar application/x-www-form-urlencoded - - - - 475 3118 post-test.warc.gz
|
||||
org,httpbin)/post?__wb_method=post&foo=bar&test=abc 20140610000859 http://httpbin.org/post application/json 200 M532K5WS4GY2H4OVZO6HRPOP47A7KDWU - - 720 0 post-test.warc.gz
|
||||
org,httpbin)/post?__wb_method=post&foo=bar&test=abc 20140610000859 http://httpbin.org/post application/x-www-form-urlencoded - - - - 476 720 post-test.warc.gz
|
||||
org,httpbin)/post?__wb_method=post&a=1&b=[]&c=3 20140610001151 http://httpbin.org/post application/json 200 M7YCTM7HS3YKYQTAWQVMQSQZBNEOXGU2 - - 723 1196 post-test.warc.gz
|
||||
org,httpbin)/post?__wb_method=post&a=1&b=[]&c=3 20140610001151 http://httpbin.org/post application/x-www-form-urlencoded - - - - 476 1919 post-test.warc.gz
|
||||
org,httpbin)/post?__wb_method=post&data=^&foo=bar 20140610001255 http://httpbin.org/post?foo=bar application/json 200 B6E5P6JUZI6UPDTNO4L2BCHMGLTNCUAJ - - 723 2395 post-test.warc.gz
|
||||
org,httpbin)/post?__wb_method=post&data=^&foo=bar 20140610001255 http://httpbin.org/post?foo=bar application/x-www-form-urlencoded - - - - 475 3118 post-test.warc.gz
|
||||
|
||||
# post append + minimal = error
|
||||
>>> print_cdx_index('example.arc.gz', append_post=True, minimal=True)
|
||||
@ -149,13 +149,13 @@ StatusAndHeadersParserException: Expected Status Line starting with ['HTTP/1.0',
|
||||
#=================================================================
|
||||
|
||||
# test sort, multiple inputs
|
||||
>>> cli_lines(['--sort', '-', TEST_WARC_DIR])
|
||||
>>> cli_lines(['--sort', '-o', '-', TEST_WARC_DIR])
|
||||
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 example-url-agnostic-revisit.warc.gz
|
||||
urn:x-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 example-wpull.warc.gz
|
||||
Total: 213
|
||||
|
||||
# test sort, multiple inputs, recursive, from base test dir
|
||||
>>> cli_lines(['--sort', '-r', '-', get_test_dir()])
|
||||
>>> cli_lines(['--sort', '-r', '-o', '-', get_test_dir()])
|
||||
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 warcs/example-url-agnostic-revisit.warc.gz
|
||||
urn:x-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 warcs/example-wpull.warc.gz
|
||||
Total: 213
|
||||
@ -167,7 +167,7 @@ urn:x-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX
|
||||
Total: 408
|
||||
|
||||
# test writing to stdout
|
||||
>>> cli_lines(['-', TEST_WARC_DIR + 'example.warc.gz'])
|
||||
>>> cli_lines([TEST_WARC_DIR + 'example.warc.gz'])
|
||||
com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1043 333 example.warc.gz
|
||||
org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz
|
||||
Total: 4
|
||||
@ -178,7 +178,7 @@ com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 20
|
||||
org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz
|
||||
Total: 4
|
||||
|
||||
# test custom root dir for cdx filenames, singlw warc
|
||||
# test custom root dir for cdx filenames, single warc
|
||||
>>> cli_lines(['--dir-root', get_test_dir() + 'other/', TEST_WARC_DIR + 'example.warc.gz'])
|
||||
com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1043 333 ../warcs/example.warc.gz
|
||||
org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 ../warcs/example.warc.gz
|
||||
@ -265,7 +265,7 @@ def cli_lines_with_dir(input_):
|
||||
tmp_dir = None
|
||||
tmp_dir = tempfile.mkdtemp()
|
||||
|
||||
main([tmp_dir, input_])
|
||||
main(['-o', tmp_dir, input_])
|
||||
|
||||
filename = cdx_filename(os.path.basename(input_))
|
||||
|
||||
@ -463,6 +463,104 @@ com,example)/xyz.pdf 20140401052011 http://example.com/xyz.pdf application/http
|
||||
"""
|
||||
|
||||
|
||||
def test_multipart_form():
|
||||
test_data = b'\
|
||||
WARC/1.0\r\n\
|
||||
WARC-Type: response\r\n\
|
||||
WARC-Record-ID: <urn:uuid:073fac44-c383-4a2b-980d-76fec83bd20d>\r\n\
|
||||
WARC-Date: 2020-11-19T19:54:34Z\r\n\
|
||||
WARC-Target-URI: https://example.com/ajax/bz?foo=bar\r\n\
|
||||
Content-Type: application/http;msgtype=response\r\n\
|
||||
WARC-Payload-Digest: sha1:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ\r\n\
|
||||
Content-Length: 48\r\n\
|
||||
WARC-Block-Digest: sha1:XN45YTSBLG5PLJ4HA7DRDYGJBM5VW4UO\r\n\
|
||||
\r\n\
|
||||
Content-Type: text/html; charset="utf-8"\r\n\
|
||||
\r\n\
|
||||
ABCD\r\n\
|
||||
\r\n\
|
||||
\r\n\
|
||||
\r\n\
|
||||
WARC/1.0\r\n\
|
||||
WARC-Type: request\r\n\
|
||||
WARC-Record-ID: <urn:uuid:3084e79c-ae58-4bfd-8590-fcf2830fe896>\r\n\
|
||||
WARC-Date: 2020-11-19T19:54:34Z\r\n\
|
||||
WARC-Target-URI: https://example.com/ajax/bz?foo=bar\r\n\
|
||||
WARC-Concurrent-To: <urn:uuid:073fac44-c383-4a2b-980d-76fec83bd20d>\r\n\
|
||||
WARC-Block-Digest: sha1:LNYP3X3NWXQLUGDI745P4L4FK27XGP24\r\n\
|
||||
Content-Type: application/http;msgtype=request\r\n\
|
||||
Content-Length: 321\r\n\
|
||||
\r\n\
|
||||
POST /ajax/bz?foo=bar HTTP/1.1\r\n\
|
||||
Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryWUBf9liofZK0nuJd\r\n\
|
||||
content-Length: 199\r\n\
|
||||
\r\n\
|
||||
------WebKitFormBoundaryWUBf9liofZK0nuJd\r\n\
|
||||
Content-Disposition: form-data; name="q"\r\n\
|
||||
\r\n\
|
||||
[{"webSessionId":"pb2tr7:vx83uz:fdi8ta","user":"0"}]\r\n\
|
||||
------WebKitFormBoundaryWUBf9liofZK0nuJd--\r\n\
|
||||
\r\n\
|
||||
'
|
||||
options = dict(include_all=True, append_post=True)
|
||||
buff = BytesIO()
|
||||
test_record = BytesIO(test_data)
|
||||
write_cdx_index(buff, test_record, 'test.warc.gz', **options)
|
||||
print(buff.getvalue())
|
||||
assert buff.getvalue() == b"""\
|
||||
CDX N b a m s k r M S V g
|
||||
com,example)/ajax/bz?__wb_method=post&foo=bar&q=[{"websessionid":"pb2tr7:vx83uz:fdi8ta","user":"0"}] 20201119195434 https://example.com/ajax/bz?foo=bar unk text/html; 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 420 0 test.warc.gz
|
||||
com,example)/ajax/bz?__wb_method=post&foo=bar&q=[{"websessionid":"pb2tr7:vx83uz:fdi8ta","user":"0"}] 20201119195434 https://example.com/ajax/bz?foo=bar multipart/form-data - - - - 701 428 test.warc.gz
|
||||
"""
|
||||
|
||||
|
||||
def test_multipart_form_no_boundary():
|
||||
test_data = b'\
|
||||
WARC/1.0\r\n\
|
||||
WARC-Type: response\r\n\
|
||||
WARC-Record-ID: <urn:uuid:3bc1606a-d517-487e-a6d5-bfeaebda2ec3>\r\n\
|
||||
WARC-Date: 2020-11-19T14:02:52Z\r\n\
|
||||
WARC-Target-URI: https://capi.connatix.com/core/story?v=77797\r\n\
|
||||
WARC-IP-Address: 18.221.6.219\r\n\
|
||||
Content-Type: application/http;msgtype=response\r\n\
|
||||
WARC-Payload-Digest: sha1:SIGZ3RJW5J7DUKEZ4R7RSYUZNGLETIS5\r\n\
|
||||
Content-Length: 41\r\n\
|
||||
WARC-Block-Digest: sha1:JXKKZNALIPOW7J2FX5XUTGQZXKBSGZLU\r\n\
|
||||
\r\n\
|
||||
Content-Type: multipart/form-data\r\n\
|
||||
\r\n\
|
||||
ABCD\r\n\
|
||||
\r\n\
|
||||
\r\n\
|
||||
\r\n\
|
||||
WARC/1.0\r\n\
|
||||
WARC-Type: request\r\n\
|
||||
WARC-Record-ID: <urn:uuid:d5e7186f-5725-4ed1-b199-56fbdf4bd805>\r\n\
|
||||
WARC-Date: 2020-11-19T14:02:52Z\r\n\
|
||||
WARC-Target-URI: https://capi.connatix.com/core/story?v=77797\r\n\
|
||||
WARC-Concurrent-To: <urn:uuid:3bc1606a-d517-487e-a6d5-bfeaebda2ec3>\r\n\
|
||||
WARC-Block-Digest: sha1:QJ2YUIKEWDSCLK5A2DHGLQ7WWEKYMO3W\r\n\
|
||||
Content-Type: application/http;msgtype=request\r\n\
|
||||
Content-Length: 111\r\n\
|
||||
\r\n\
|
||||
POST /core/story?v=77797 HTTP/1.1\r\n\
|
||||
Content-Length: 19\r\n\
|
||||
Content-Type: multipart/form-data\r\n\
|
||||
\r\n\
|
||||
{"text": "default"}\r\n\
|
||||
\r\n\
|
||||
'
|
||||
options = dict(include_all=True, append_post=True)
|
||||
buff = BytesIO()
|
||||
test_record = BytesIO(test_data)
|
||||
write_cdx_index(buff, test_record, 'test.warc.gz', **options)
|
||||
assert buff.getvalue() == b"""\
|
||||
CDX N b a m s k r M S V g
|
||||
com,connatix,capi)/core/story?__wb_method=post&__wb_post_data=eyj0zxh0ijogimrlzmf1bhqifq==&v=77797 20201119140252 https://capi.connatix.com/core/story?v=77797 unk multipart/form-data SIGZ3RJW5J7DUKEZ4R7RSYUZNGLETIS5 - - 453 0 test.warc.gz
|
||||
com,connatix,capi)/core/story?__wb_method=post&__wb_post_data=eyj0zxh0ijogimrlzmf1bhqifq==&v=77797 20201119140252 https://capi.connatix.com/core/story?v=77797 multipart/form-data - - - - 500 461 test.warc.gz
|
||||
"""
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
|
@ -12,7 +12,7 @@ from pywb.warcserver.index.cdxobject import CDXObject
|
||||
class ACLManager(CollectionsManager):
|
||||
SURT_RX = re.compile('([^:.]+[,)])+')
|
||||
|
||||
VALID_ACCESS = ('allow', 'block', 'exclude')
|
||||
VALID_ACCESS = ('allow', 'block', 'exclude', 'allow_ignore_embargo')
|
||||
|
||||
DEFAULT_FILE = 'access-rules.aclj'
|
||||
|
||||
@ -102,11 +102,11 @@ class ACLManager(CollectionsManager):
|
||||
|
||||
except IOError as io:
|
||||
if must_exist:
|
||||
print('Error Occured: ' + str(io))
|
||||
print('Error Occurred: ' + str(io))
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print('Error Occured: ' + str(e))
|
||||
print('Error Occurred: ' + str(e))
|
||||
return False
|
||||
|
||||
def save_acl(self, r=None):
|
||||
@ -167,9 +167,9 @@ class ACLManager(CollectionsManager):
|
||||
:param argparse.Namespace r: The argparse namespace representing the rule to be added
|
||||
:rtype: None
|
||||
"""
|
||||
return self._add_rule(r.url, r.access, r.exact_match)
|
||||
return self._add_rule(r.url, r.access, r.exact_match, r.user)
|
||||
|
||||
def _add_rule(self, url, access, exact_match=False):
|
||||
def _add_rule(self, url, access, exact_match=False, user=None):
|
||||
"""Adds an rule to the acl file
|
||||
|
||||
:param str url: The URL for the rule
|
||||
@ -185,12 +185,14 @@ class ACLManager(CollectionsManager):
|
||||
acl['timestamp'] = '-'
|
||||
acl['access'] = access
|
||||
acl['url'] = url
|
||||
if user:
|
||||
acl['user'] = user
|
||||
|
||||
i = 0
|
||||
replace = False
|
||||
|
||||
for rule in self.rules:
|
||||
if acl['urlkey'] == rule['urlkey'] and acl['timestamp'] == rule['timestamp']:
|
||||
if acl['urlkey'] == rule['urlkey'] and acl['timestamp'] == rule['timestamp'] and acl.get('user') == rule.get('user'):
|
||||
replace = True
|
||||
break
|
||||
|
||||
@ -255,7 +257,7 @@ class ACLManager(CollectionsManager):
|
||||
i = 0
|
||||
urlkey = self.to_key(r.url, r.exact_match)
|
||||
for rule in self.rules:
|
||||
if urlkey == rule['urlkey']:
|
||||
if urlkey == rule['urlkey'] and r.user == rule.get('user'):
|
||||
acl = self.rules.pop(i)
|
||||
print('Removed Rule:')
|
||||
self.print_rule(acl)
|
||||
@ -285,7 +287,7 @@ class ACLManager(CollectionsManager):
|
||||
:rtype: None
|
||||
"""
|
||||
access_checker = AccessChecker(self.acl_file, '<default>')
|
||||
rule = access_checker.find_access_rule(r.url)
|
||||
rule = access_checker.find_access_rule(r.url, acl_user=r.user)
|
||||
|
||||
print('Matched rule:')
|
||||
print('')
|
||||
@ -344,15 +346,18 @@ class ACLManager(CollectionsManager):
|
||||
else:
|
||||
op.add_argument(arg)
|
||||
|
||||
if kwargs.get('user_opt'):
|
||||
op.add_argument('-u', '--user')
|
||||
|
||||
if kwargs.get('exact_opt'):
|
||||
op.add_argument('-e', '--exact-match', action='store_true', default=False)
|
||||
|
||||
op.set_defaults(acl_func=kwargs['func'])
|
||||
|
||||
command('add', 'coll_name', 'url', 'access', func=cls.add_rule, exact_opt=True)
|
||||
command('remove', 'coll_name', 'url', func=cls.remove_rule, exact_opt=True)
|
||||
command('add', 'coll_name', 'url', 'access', func=cls.add_rule, exact_opt=True, user_opt=True)
|
||||
command('remove', 'coll_name', 'url', func=cls.remove_rule, exact_opt=True, user_opt=True)
|
||||
command('list', 'coll_name', func=cls.list_rules)
|
||||
command('validate', 'coll_name', func=cls.validate_save)
|
||||
command('match', 'coll_name', 'url', 'default_access', func=cls.find_match)
|
||||
command('match', 'coll_name', 'url', 'default_access', func=cls.find_match, user_opt=True)
|
||||
command('importtxt', 'coll_name', 'filename', 'access', func=cls.add_excludes)
|
||||
|
||||
|
115
pywb/manager/locmanager.py
Normal file
115
pywb/manager/locmanager.py
Normal file
@ -0,0 +1,115 @@
|
||||
import os
|
||||
import os.path
|
||||
import shutil
|
||||
|
||||
try:
|
||||
from babel.messages.frontend import CommandLineInterface
|
||||
|
||||
from translate.convert.po2csv import main as po2csv
|
||||
from translate.convert.csv2po import main as csv2po
|
||||
loc_avail = True
|
||||
except:
|
||||
loc_avail = False
|
||||
|
||||
|
||||
ROOT_DIR = 'i18n'
|
||||
|
||||
TRANSLATIONS = os.path.join(ROOT_DIR, 'translations')
|
||||
|
||||
MESSAGES = os.path.join(ROOT_DIR, 'messages.pot')
|
||||
|
||||
# ============================================================================
|
||||
class LocManager:
|
||||
def process(self, r):
|
||||
if r.name == 'list':
|
||||
r.loc_func(self)
|
||||
elif r.name == 'remove':
|
||||
r.loc_func(self, r.locale)
|
||||
else:
|
||||
r.loc_func(self, r.locale, r.no_csv)
|
||||
|
||||
def extract_loc(self, locale, no_csv):
|
||||
self.extract_text()
|
||||
|
||||
for loc in locale:
|
||||
loc_dir = os.path.join(TRANSLATIONS, loc)
|
||||
if os.path.isdir(loc_dir):
|
||||
self.update_catalog(loc)
|
||||
else:
|
||||
os.makedirs(loc_dir)
|
||||
self.init_catalog(loc)
|
||||
|
||||
if not no_csv:
|
||||
base = os.path.join(TRANSLATIONS, loc, 'LC_MESSAGES')
|
||||
po = os.path.join(base, 'messages.po')
|
||||
csv = os.path.join(base, 'messages.csv')
|
||||
po2csv([po, csv])
|
||||
|
||||
self.compile_catalog()
|
||||
|
||||
def update_loc(self, locale, no_csv):
|
||||
for loc in locale:
|
||||
if not no_csv:
|
||||
loc_dir = os.path.join(TRANSLATIONS, loc)
|
||||
base = os.path.join(TRANSLATIONS, loc, 'LC_MESSAGES')
|
||||
po = os.path.join(base, 'messages.po')
|
||||
csv = os.path.join(base, 'messages.csv')
|
||||
|
||||
if os.path.isfile(csv):
|
||||
csv2po([csv, po])
|
||||
|
||||
self.compile_catalog()
|
||||
|
||||
def remove_loc(self, locale):
|
||||
for loc in locale:
|
||||
loc_dir = os.path.join(TRANSLATIONS, loc)
|
||||
if not os.path.isdir(loc_dir):
|
||||
print('Locale "{0}" does not exist'.format(loc))
|
||||
return
|
||||
|
||||
shutil.rmtree(loc_dir)
|
||||
print('Removed locale "{0}"'.format(loc))
|
||||
|
||||
def list_loc(self):
|
||||
print('Current locales:')
|
||||
print('\n'.join(' - ' + x for x in os.listdir(TRANSLATIONS)))
|
||||
print('')
|
||||
|
||||
def extract_text(self):
|
||||
os.makedirs(ROOT_DIR, exist_ok=True)
|
||||
|
||||
CommandLineInterface().run(['pybabel', 'extract', '-F', 'babel.ini', '-k', '_ _Q gettext ngettext', '-o', MESSAGES, './', '--omit-header'])
|
||||
|
||||
def init_catalog(self, loc):
|
||||
CommandLineInterface().run(['pybabel', 'init', '-l', loc, '-i', MESSAGES, '-d', TRANSLATIONS])
|
||||
|
||||
def update_catalog(self, loc):
|
||||
CommandLineInterface().run(['pybabel', 'update', '-l', loc, '-i', MESSAGES, '-d', TRANSLATIONS, '--previous'])
|
||||
|
||||
def compile_catalog(self):
|
||||
CommandLineInterface().run(['pybabel', 'compile', '-d', TRANSLATIONS])
|
||||
|
||||
|
||||
@classmethod
|
||||
def init_parser(cls, parser):
|
||||
"""Initializes an argument parser for acl commands
|
||||
|
||||
:param argparse.ArgumentParser parser: The parser to be initialized
|
||||
:rtype: None
|
||||
"""
|
||||
subparsers = parser.add_subparsers(dest='op')
|
||||
subparsers.required = True
|
||||
|
||||
def command(name, func):
|
||||
op = subparsers.add_parser(name)
|
||||
if name != 'list':
|
||||
op.add_argument('locale', nargs='+')
|
||||
if name != 'remove':
|
||||
op.add_argument('--no-csv', action='store_true')
|
||||
|
||||
op.set_defaults(loc_func=func, name=name)
|
||||
|
||||
command('extract', cls.extract_loc)
|
||||
command('update', cls.update_loc)
|
||||
command('remove', cls.remove_loc)
|
||||
command('list', cls.list_loc)
|
@ -5,12 +5,16 @@ import logging
|
||||
import heapq
|
||||
import yaml
|
||||
import re
|
||||
import gzip
|
||||
import six
|
||||
import pathlib
|
||||
|
||||
from distutils.util import strtobool
|
||||
from pkg_resources import resource_string
|
||||
from pkg_resources import resource_string, get_distribution
|
||||
|
||||
from argparse import ArgumentParser, RawTextHelpFormatter
|
||||
from tempfile import mkdtemp, TemporaryDirectory
|
||||
from zipfile import ZipFile
|
||||
|
||||
from pywb.utils.loaders import load_yaml_config
|
||||
from warcio.timeutils import timestamp20_now
|
||||
@ -28,8 +32,12 @@ def get_input(msg): # pragma: no cover
|
||||
return input(msg)
|
||||
|
||||
#=============================================================================
|
||||
def get_version():
|
||||
"""Get version of the pywb"""
|
||||
return "wb-manager " + get_distribution("pywb").version
|
||||
|
||||
|
||||
#=============================================================================
|
||||
class CollectionsManager(object):
|
||||
""" This utility is designed to
|
||||
simplify the creation and management of web archive collections
|
||||
@ -43,6 +51,9 @@ directory structure expected by pywb
|
||||
|
||||
COLLS_DIR = 'collections'
|
||||
|
||||
WARC_RX = re.compile(r'.*\.w?arc(\.gz)?$')
|
||||
WACZ_RX = re.compile(r'.*\.wacz$')
|
||||
|
||||
def __init__(self, coll_name, colls_dir=None, must_exist=True):
|
||||
colls_dir = colls_dir or self.COLLS_DIR
|
||||
self.default_config = load_yaml_config(DEFAULT_CONFIG)
|
||||
@ -111,19 +122,142 @@ directory structure expected by pywb
|
||||
'To create a new collection, run\n\n{1} init {0}')
|
||||
raise IOError(msg.format(self.coll_name, sys.argv[0]))
|
||||
|
||||
def add_warcs(self, warcs):
|
||||
def add_archives(self, archives, unpack_wacz=False):
|
||||
if not os.path.isdir(self.archive_dir):
|
||||
raise IOError('Directory {0} does not exist'.
|
||||
format(self.archive_dir))
|
||||
|
||||
full_paths = []
|
||||
for filename in warcs:
|
||||
filename = os.path.abspath(filename)
|
||||
shutil.copy2(filename, self.archive_dir)
|
||||
full_paths.append(os.path.join(self.archive_dir, filename))
|
||||
logging.info('Copied ' + filename + ' to ' + self.archive_dir)
|
||||
invalid_archives = []
|
||||
warc_paths = []
|
||||
for archive in archives:
|
||||
if self.WARC_RX.match(archive):
|
||||
full_path = self._add_warc(archive)
|
||||
if full_path:
|
||||
warc_paths.append(full_path)
|
||||
elif self.WACZ_RX.match(archive):
|
||||
if unpack_wacz:
|
||||
self._add_wacz_unpacked(archive)
|
||||
else:
|
||||
raise NotImplementedError('Adding waczs without unpacking is not yet implemented. Use '
|
||||
'\'--unpack-wacz\' flag to add the wacz\'s content.')
|
||||
else:
|
||||
invalid_archives.append(archive)
|
||||
|
||||
self._index_merge_warcs(full_paths, self.DEF_INDEX_FILE)
|
||||
self._index_merge_warcs(warc_paths, self.DEF_INDEX_FILE)
|
||||
|
||||
if invalid_archives:
|
||||
logging.warning(f'Invalid archives weren\'t added: {", ".join(invalid_archives)}')
|
||||
|
||||
def _rename_warc(self, warc_basename):
|
||||
dupe_idx = 1
|
||||
ext = ''.join(pathlib.Path(warc_basename).suffixes)
|
||||
pre_ext_name = warc_basename.split(ext)[0]
|
||||
|
||||
while True:
|
||||
new_basename = f'{pre_ext_name}-{dupe_idx}{ext}'
|
||||
if not os.path.exists(os.path.join(self.archive_dir, new_basename)):
|
||||
break
|
||||
dupe_idx += 1
|
||||
|
||||
return new_basename
|
||||
|
||||
def _add_warc(self, warc):
|
||||
warc_source = os.path.abspath(warc)
|
||||
source_dir, warc_basename = os.path.split(warc_source)
|
||||
|
||||
# don't overwrite existing warcs with duplicate names
|
||||
if os.path.exists(os.path.join(self.archive_dir, warc_basename)):
|
||||
warc_basename = self._rename_warc(warc_basename)
|
||||
logging.info(f'Warc {os.path.basename(warc)} already exists - renamed to {warc_basename}.')
|
||||
|
||||
warc_dest = os.path.join(self.archive_dir, warc_basename)
|
||||
shutil.copy2(warc_source, warc_dest)
|
||||
logging.info(f'Copied {warc} to {self.archive_dir} as {warc_basename}')
|
||||
return warc_dest
|
||||
|
||||
def _add_wacz_unpacked(self, wacz):
|
||||
wacz = os.path.abspath(wacz)
|
||||
temp_dir = mkdtemp()
|
||||
warc_regex = re.compile(r'.+\.warc(\.gz)?$')
|
||||
cdx_regex = re.compile(r'.+\.cdx(\.gz)?$')
|
||||
with ZipFile(wacz, 'r') as wacz_zip_file:
|
||||
archive_members = wacz_zip_file.namelist()
|
||||
warc_files = [file for file in archive_members if warc_regex.match(file)]
|
||||
if not warc_files:
|
||||
logging.warning(f'WACZ {wacz} does not contain any warc files.')
|
||||
return
|
||||
|
||||
# extract warc files
|
||||
for warc_file in warc_files:
|
||||
wacz_zip_file.extract(warc_file, temp_dir)
|
||||
|
||||
cdx_files = [file for file in archive_members if cdx_regex.match(file)]
|
||||
if not cdx_files:
|
||||
logging.warning(f'WACZ {wacz} does not contain any indices.')
|
||||
return
|
||||
|
||||
for cdx_file in cdx_files:
|
||||
wacz_zip_file.extract(cdx_file, temp_dir)
|
||||
|
||||
# copy extracted warc files to collections archive dir, use wacz filename as filename with added index if
|
||||
# multiple warc files exist
|
||||
warc_filename_mapping = {}
|
||||
full_paths = []
|
||||
for idx, extracted_warc_file in enumerate(warc_files):
|
||||
_, warc_ext = os.path.splitext(extracted_warc_file)
|
||||
if warc_ext == '.gz':
|
||||
warc_ext = '.warc.gz'
|
||||
warc_filename = os.path.basename(wacz)
|
||||
warc_filename, _ = os.path.splitext(warc_filename)
|
||||
warc_filename = f'{warc_filename}-{idx}{warc_ext}'
|
||||
warc_destination_path = os.path.join(self.archive_dir, warc_filename)
|
||||
|
||||
if os.path.exists(warc_destination_path):
|
||||
warc_filename = self._rename_warc(warc_filename)
|
||||
logging.info(f'Warc {warc_destination_path} already exists - renamed to {warc_filename}.')
|
||||
warc_destination_path = os.path.join(self.archive_dir, warc_filename)
|
||||
|
||||
warc_filename_mapping[os.path.basename(extracted_warc_file)] = warc_filename
|
||||
shutil.copy2(os.path.join(temp_dir, extracted_warc_file), warc_destination_path)
|
||||
full_paths.append(warc_destination_path)
|
||||
|
||||
# rewrite filenames in wacz indices and merge them with collection index file
|
||||
for cdx_file in cdx_files:
|
||||
self._add_wacz_index(os.path.join(self.indexes_dir, self.DEF_INDEX_FILE), os.path.join(temp_dir, cdx_file),
|
||||
warc_filename_mapping)
|
||||
|
||||
# delete temporary files
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
def _add_wacz_index(self, collection_index_path, wacz_index_path, filename_mapping):
|
||||
from pywb.warcserver.index.cdxobject import CDXObject
|
||||
|
||||
# rewrite wacz index to temporary index file
|
||||
tempdir = TemporaryDirectory()
|
||||
wacz_index_name = os.path.basename(wacz_index_path)
|
||||
rewritten_index_path = os.path.join(tempdir.name, wacz_index_name)
|
||||
|
||||
with open(rewritten_index_path, 'w') as rewritten_index:
|
||||
if wacz_index_path.endswith('.gz'):
|
||||
wacz_index = gzip.open(wacz_index_path, 'rb')
|
||||
else:
|
||||
wacz_index = open(wacz_index_path, 'rb')
|
||||
|
||||
for line in wacz_index:
|
||||
cdx_object = CDXObject(cdxline=line)
|
||||
if cdx_object['filename'] in filename_mapping:
|
||||
cdx_object['filename'] = filename_mapping[cdx_object['filename']]
|
||||
rewritten_index.write(cdx_object.to_cdxj())
|
||||
|
||||
if not os.path.isfile(collection_index_path):
|
||||
shutil.move(rewritten_index_path, collection_index_path)
|
||||
return
|
||||
|
||||
temp_coll_index_path = collection_index_path + '.tmp.' + timestamp20_now()
|
||||
self._merge_indices(collection_index_path, rewritten_index_path, temp_coll_index_path)
|
||||
shutil.move(temp_coll_index_path, collection_index_path)
|
||||
|
||||
tempdir.cleanup()
|
||||
|
||||
def reindex(self):
|
||||
cdx_file = os.path.join(self.indexes_dir, self.DEF_INDEX_FILE)
|
||||
@ -176,20 +310,24 @@ directory structure expected by pywb
|
||||
|
||||
merged_file = temp_file + '.merged'
|
||||
|
||||
last_line = None
|
||||
|
||||
with open(cdx_file, 'rb') as orig_index:
|
||||
with open(temp_file, 'rb') as new_index:
|
||||
with open(merged_file, 'w+b') as merged:
|
||||
for line in heapq.merge(orig_index, new_index):
|
||||
if last_line != line:
|
||||
merged.write(line)
|
||||
last_line = line
|
||||
self._merge_indices(cdx_file, temp_file, merged_file)
|
||||
|
||||
shutil.move(merged_file, cdx_file)
|
||||
#os.rename(merged_file, cdx_file)
|
||||
os.remove(temp_file)
|
||||
|
||||
@staticmethod
|
||||
def _merge_indices(index1, index2, dest):
|
||||
last_line = None
|
||||
|
||||
with open(index1, 'rb') as index1_f:
|
||||
with open(index2, 'rb') as index2_f:
|
||||
with open(dest, 'wb') as dest_f:
|
||||
for line in heapq.merge(index1_f, index2_f):
|
||||
if last_line != line:
|
||||
dest_f.write(line)
|
||||
last_line = line
|
||||
|
||||
def set_metadata(self, namevalue_pairs):
|
||||
metadata_yaml = os.path.join(self.curr_coll_dir, 'metadata.yaml')
|
||||
metadata = None
|
||||
@ -233,17 +371,20 @@ directory structure expected by pywb
|
||||
v = defaults[n]
|
||||
print('- {0}: (pywb/{1})'.format(n, v))
|
||||
|
||||
def _confirm_overwrite(self, full_path, msg):
|
||||
def _confirm_overwrite(self, full_path, msg, ignore=False):
|
||||
if not os.path.isfile(full_path):
|
||||
return True
|
||||
|
||||
if ignore:
|
||||
return False
|
||||
|
||||
res = get_input(msg)
|
||||
try:
|
||||
res = strtobool(res)
|
||||
except ValueError:
|
||||
res = False
|
||||
|
||||
if not res:
|
||||
if not res and not ignore:
|
||||
raise IOError('Skipping, {0} already exists'.format(full_path))
|
||||
|
||||
def _get_template_path(self, template_name, verb):
|
||||
@ -264,7 +405,7 @@ directory structure expected by pywb
|
||||
|
||||
return full_path, filename
|
||||
|
||||
def add_template(self, template_name, force=False):
|
||||
def add_template(self, template_name, force=False, ignore=False):
|
||||
full_path, filename = self._get_template_path(template_name, 'add')
|
||||
|
||||
msg = ('Template file "{0}" ({1}) already exists. ' +
|
||||
@ -272,7 +413,11 @@ directory structure expected by pywb
|
||||
msg = msg.format(full_path, template_name)
|
||||
|
||||
if not force:
|
||||
self._confirm_overwrite(full_path, msg)
|
||||
res = self._confirm_overwrite(full_path, msg, ignore)
|
||||
if ignore and not res:
|
||||
return
|
||||
|
||||
os.makedirs(os.path.dirname(full_path), exist_ok=True)
|
||||
|
||||
data = resource_string('pywb', filename)
|
||||
with open(full_path, 'w+b') as fh:
|
||||
@ -282,6 +427,9 @@ directory structure expected by pywb
|
||||
msg = 'Copied default template "{0}" to "{1}"'
|
||||
print(msg.format(filename, full_path))
|
||||
|
||||
if template_name != "base_html":
|
||||
self.add_template("base_html", force=False, ignore=True)
|
||||
|
||||
def remove_template(self, template_name, force=False):
|
||||
full_path, filename = self._get_template_path(template_name, 'remove')
|
||||
|
||||
@ -335,6 +483,8 @@ Create manage file based web archive collections
|
||||
# epilog=epilog,
|
||||
formatter_class=RawTextHelpFormatter)
|
||||
|
||||
parser.add_argument("-V", "--version", action="version", version=get_version())
|
||||
|
||||
subparsers = parser.add_subparsers(dest='type')
|
||||
subparsers.required = True
|
||||
|
||||
@ -357,16 +507,23 @@ Create manage file based web archive collections
|
||||
listcmd = subparsers.add_parser('list', help=list_help)
|
||||
listcmd.set_defaults(func=do_list)
|
||||
|
||||
# Add Warcs
|
||||
# Add Warcs or Waczs
|
||||
def do_add(r):
|
||||
m = CollectionsManager(r.coll_name)
|
||||
m.add_warcs(r.files)
|
||||
m.add_archives(r.files, r.unpack_wacz)
|
||||
|
||||
addwarc_help = 'Copy ARCS/WARCS to collection directory and reindex'
|
||||
addwarc = subparsers.add_parser('add', help=addwarc_help)
|
||||
addwarc.add_argument('coll_name')
|
||||
addwarc.add_argument('files', nargs='+')
|
||||
addwarc.set_defaults(func=do_add)
|
||||
add_archives_help = 'Copy ARCs/WARCs to collection directory and reindex'
|
||||
add_unpack_wacz_help = 'Copy WARCs from WACZ to collection directory and reindex'
|
||||
add_archives = subparsers.add_parser('add', help=add_archives_help)
|
||||
add_archives.add_argument(
|
||||
'--unpack-wacz',
|
||||
dest='unpack_wacz',
|
||||
action='store_true',
|
||||
help=add_unpack_wacz_help
|
||||
)
|
||||
add_archives.add_argument('coll_name')
|
||||
add_archives.add_argument('files', nargs='+')
|
||||
add_archives.set_defaults(func=do_add)
|
||||
|
||||
# Reindex All
|
||||
def do_reindex(r):
|
||||
@ -441,6 +598,23 @@ Create manage file based web archive collections
|
||||
ACLManager.init_parser(acl)
|
||||
acl.set_defaults(func=do_acl)
|
||||
|
||||
# LOC
|
||||
from pywb.manager.locmanager import LocManager, loc_avail
|
||||
|
||||
def do_loc(r):
|
||||
if not loc_avail:
|
||||
print("You must install i18n extensions with 'pip install pywb[i18n]' to use localization features")
|
||||
return
|
||||
|
||||
loc = LocManager()
|
||||
loc.process(r)
|
||||
|
||||
loc_help = 'Generate strings for i18n/localization'
|
||||
loc = subparsers.add_parser('i18n', help=loc_help)
|
||||
if loc_avail:
|
||||
LocManager.init_parser(loc)
|
||||
loc.set_defaults(func=do_loc)
|
||||
|
||||
# Parse
|
||||
r = parser.parse_args(args=args)
|
||||
r.func(r)
|
||||
|
@ -30,6 +30,7 @@ class MultiFileWARCWriter(BaseWARCWriter):
|
||||
self.dir_template = dir_template
|
||||
self.key_template = kwargs.get('key_template', self.dir_template)
|
||||
self.dedup_index = kwargs.get('dedup_index')
|
||||
self.dedup_by_url = kwargs.get('dedup_by_url')
|
||||
self.filename_template = filename_template
|
||||
self.max_size = max_size
|
||||
if max_idle_secs > 0:
|
||||
@ -48,7 +49,7 @@ class MultiFileWARCWriter(BaseWARCWriter):
|
||||
|
||||
try:
|
||||
url = record.rec_headers.get_header('WARC-Target-URI')
|
||||
digest = record.rec_headers.get_header('WARC-Payload-Digest')
|
||||
digest = record.rec_headers.get_header('WARC-Payload-Digest') if not self.dedup_by_url else None
|
||||
iso_dt = record.rec_headers.get_header('WARC-Date')
|
||||
result = self.dedup_index.lookup_revisit(params, digest, url, iso_dt)
|
||||
except Exception as e:
|
||||
|
@ -24,8 +24,7 @@ class RecorderApp(object):
|
||||
|
||||
self.rec_source_name = kwargs.get('name', 'recorder')
|
||||
|
||||
self.create_buff_func = kwargs.get('create_buff_func',
|
||||
self.default_create_buffer)
|
||||
self.create_buff_func = kwargs.get('create_buff_func') or self.default_create_buffer
|
||||
|
||||
self.write_queue = gevent.queue.Queue()
|
||||
gevent.spawn(self._write_loop)
|
||||
|
@ -2,6 +2,7 @@ from warcio.timeutils import iso_date_to_timestamp
|
||||
|
||||
from io import BytesIO
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
from pywb.utils.canonicalize import calc_search_range
|
||||
from pywb.utils.format import res_template
|
||||
@ -48,9 +49,11 @@ class WritableRedisIndexer(RedisIndexSource):
|
||||
return base_name
|
||||
|
||||
def add_warc_file(self, full_filename, params):
|
||||
base_filename = self._get_rel_or_base_name(full_filename, params)
|
||||
file_key = res_template(self.file_key_template, params)
|
||||
if not file_key:
|
||||
return
|
||||
|
||||
base_filename = self._get_rel_or_base_name(full_filename, params)
|
||||
full_load_path = self.full_warc_prefix + full_filename
|
||||
|
||||
self.redis.hset(file_key, base_filename, full_load_path)
|
||||
@ -99,3 +102,29 @@ class WritableRedisIndexer(RedisIndexSource):
|
||||
return res
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class RedisPendingCounterTempBuffer(tempfile.SpooledTemporaryFile):
|
||||
def __init__(self, max_size, redis_url, params, name, timeout=30):
|
||||
redis_url = res_template(redis_url, params)
|
||||
super(RedisPendingCounterTempBuffer, self).__init__(max_size=max_size)
|
||||
self.redis, self.key = RedisIndexSource.parse_redis_url(redis_url)
|
||||
self.timeout = timeout
|
||||
|
||||
self.redis.incrby(self.key, 1)
|
||||
self.redis.expire(self.key, self.timeout)
|
||||
|
||||
def write(self, buf):
|
||||
super(RedisPendingCounterTempBuffer, self).write(buf)
|
||||
self.redis.expire(self.key, self.timeout)
|
||||
|
||||
def close(self):
|
||||
try:
|
||||
super(RedisPendingCounterTempBuffer, self).close()
|
||||
except:
|
||||
traceback.print_exc()
|
||||
|
||||
self.redis.incrby(self.key, -1)
|
||||
self.redis.expire(self.key, self.timeout)
|
||||
|
||||
|
@ -607,7 +607,8 @@ class TestRecorder(LiveServerTests, HttpBinLiveTests, FakeRedisTests, TempDirTes
|
||||
writer.close()
|
||||
assert len(writer.fh_cache) == 0
|
||||
|
||||
@pytest.mark.skipif(os.environ.get('CI') is not None, reason='Skip Test on CI')
|
||||
#@pytest.mark.skipif(os.environ.get('CI') is not None, reason='Skip Test on CI')
|
||||
@pytest.mark.skip
|
||||
def test_record_video_metadata(self):
|
||||
pytest.importorskip('youtube_dl')
|
||||
warc_path = to_path(self.root_dir + '/warcs/{user}/{coll}/')
|
||||
|
@ -391,7 +391,7 @@ class StreamingRewriter(object):
|
||||
# ============================================================================
|
||||
class RewriteInfo(object):
|
||||
TAG_REGEX = re.compile(b'^(\xef\xbb\xbf)?\s*\<')
|
||||
TAG_REGEX2 = re.compile(b'^.*<\w+[\s>]')
|
||||
TAG_REGEX2 = re.compile(b'^.*<[!]?\w+[\s>]')
|
||||
JSON_REGEX = re.compile(b'^\s*[{[][{"]') # if it starts with this then highly likely not HTML
|
||||
|
||||
JSONP_CONTAINS = ['callback=jQuery',
|
||||
@ -524,7 +524,7 @@ class RewriteInfo(object):
|
||||
if not self.text_type:
|
||||
return False
|
||||
|
||||
if self.url_rewriter.wburl.mod == 'id_':
|
||||
if self.is_identity():
|
||||
return False
|
||||
|
||||
if self.url_rewriter.rewrite_opts.get('is_ajax'):
|
||||
@ -537,9 +537,11 @@ class RewriteInfo(object):
|
||||
|
||||
return True
|
||||
|
||||
def is_identity(self):
|
||||
return self.url_rewriter.wburl.mod in ('id_', 'ir_')
|
||||
|
||||
def is_url_rw(self):
|
||||
if self.url_rewriter.wburl.mod in ('id_', 'bn_', 'wkrf_'):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
@ -20,7 +20,7 @@ from pywb.rewrite.rewrite_js_workers import JSWorkerRewriter
|
||||
from pywb import DEFAULT_RULES_FILE
|
||||
|
||||
import copy
|
||||
from werkzeug.useragents import UserAgent
|
||||
from ua_parser import user_agent_parser
|
||||
|
||||
|
||||
# ============================================================================
|
||||
@ -34,7 +34,7 @@ class DefaultRewriter(BaseContentRewriter):
|
||||
|
||||
'css': CSSRewriter,
|
||||
|
||||
'js': JSLocationOnlyRewriter,
|
||||
'js': JSWombatProxyRewriter,
|
||||
'js-proxy': JSNoneRewriter,
|
||||
'js-worker': JSWorkerRewriter,
|
||||
|
||||
@ -102,6 +102,7 @@ class DefaultRewriter(BaseContentRewriter):
|
||||
super(DefaultRewriter, self).__init__(rules_file, replay_mod)
|
||||
self.all_rewriters = copy.copy(self.DEFAULT_REWRITERS)
|
||||
|
||||
self.add_prefer_mod('raw', 'ir_')
|
||||
self.add_prefer_mod('raw', 'id_')
|
||||
self.add_prefer_mod('banner-only', 'bn_')
|
||||
self.add_prefer_mod('rewritten', replay_mod)
|
||||
@ -119,33 +120,44 @@ class RewriterWithJSProxy(DefaultRewriter):
|
||||
super(RewriterWithJSProxy, self).__init__(*args, **kwargs)
|
||||
|
||||
def get_rewriter(self, rw_type, rwinfo=None):
|
||||
if rw_type == 'js' and rwinfo:
|
||||
# check if UA allows this
|
||||
if self.ua_allows_obj_proxy(rwinfo.url_rewriter.rewrite_opts):
|
||||
return JSWombatProxyRewriter
|
||||
|
||||
# otherwise, return default rewriter
|
||||
if rw_type != 'js' or not rwinfo:
|
||||
return super(RewriterWithJSProxy, self).get_rewriter(rw_type, rwinfo)
|
||||
|
||||
def ua_allows_obj_proxy(self, opts):
|
||||
# check if should use old non-proxy rewriter
|
||||
if self.ua_no_obj_proxy(rwinfo.url_rewriter.rewrite_opts):
|
||||
print("loc only")
|
||||
return JSLocationOnlyRewriter
|
||||
else:
|
||||
# otherwise, return default, js proxy-capable rewriter
|
||||
return JSWombatProxyRewriter
|
||||
|
||||
def ua_no_obj_proxy(self, opts):
|
||||
ua = opts.get('ua')
|
||||
if not ua:
|
||||
ua_string = opts.get('ua_string')
|
||||
if ua_string:
|
||||
ua = UserAgent(ua_string)
|
||||
ua = user_agent_parser.ParseUserAgent(ua_string)
|
||||
|
||||
if ua is None:
|
||||
return True
|
||||
return False
|
||||
|
||||
supported = {
|
||||
'chrome': '49.0',
|
||||
'firefox': '44.0',
|
||||
'safari': '10.0',
|
||||
'opera': '36.0',
|
||||
'edge': '12.0',
|
||||
'msie': None,
|
||||
'chrome': 49,
|
||||
'firefox': 4,
|
||||
'safari': 10,
|
||||
'opera': 36,
|
||||
'edge': 12,
|
||||
'ie': 1000,
|
||||
}
|
||||
|
||||
min_vers = supported.get(ua.browser)
|
||||
min_vers = supported.get(ua.get("family", "").lower())
|
||||
if not min_vers:
|
||||
return False
|
||||
|
||||
try:
|
||||
ua_version = int(ua.get("major", 0))
|
||||
except:
|
||||
return False
|
||||
|
||||
return ua_version < min_vers
|
||||
|
||||
return (min_vers and ua.version >= min_vers)
|
||||
|
@ -177,7 +177,7 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
return ''
|
||||
|
||||
values = (url.strip() for url in re.split(self.SRCSET_REGEX, value) if url)
|
||||
values = [self._rewrite_url(v.strip()) for v in values]
|
||||
values = [self._rewrite_url(v.split(' ')[0].strip()) + (' ' + ' '.join(v.split(' ')[1:])).rstrip() for v in values if v]
|
||||
return ', '.join(values)
|
||||
|
||||
def _rewrite_meta_refresh(self, meta_refresh):
|
||||
@ -268,7 +268,7 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
unesc_value = self.try_unescape(value)
|
||||
rewritten_value = self.url_rewriter.rewrite(unesc_value, mod, force_abs)
|
||||
|
||||
# if no rewriting has occured, ensure we return original, not reencoded value
|
||||
# if no rewriting has occurred, ensure we return original, not reencoded value
|
||||
if rewritten_value == value:
|
||||
return orig_value
|
||||
|
||||
@ -416,12 +416,6 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
rw_mod = handler.get(attr_name)
|
||||
attr_value = self._rewrite_url(attr_value, rw_mod)
|
||||
|
||||
# special case: data- attrs, conditional rewrite
|
||||
elif attr_name and attr_value and attr_name.startswith('data-'):
|
||||
if attr_value.startswith(self.DATA_RW_PROTOCOLS):
|
||||
rw_mod = 'oe_'
|
||||
attr_value = self._rewrite_url(attr_value, rw_mod)
|
||||
|
||||
# special case: base tag
|
||||
elif (tag == 'base') and (attr_name == 'href') and attr_value:
|
||||
rw_mod = handler.get(attr_name)
|
||||
@ -439,6 +433,12 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
# URL not skipped, likely src='js/....', forcing abs to make sure, cause PHP MIME(JS) === HTML
|
||||
attr_value = self._rewrite_url(attr_value, rw_mod, True)
|
||||
self._write_attr('__wb_orig_src', ov, empty_attr=None)
|
||||
|
||||
elif attr_name == 'target':
|
||||
target = attr_value
|
||||
if target in ('_blank', '_parent', '_top'):
|
||||
attr_value = '___wb_replay_top_frame'
|
||||
|
||||
else:
|
||||
# rewrite url using tag handler
|
||||
rw_mod = handler.get(attr_name)
|
||||
@ -469,7 +469,7 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
rw_mod = self.PRELOAD_TYPES.get(preload, rw_mod)
|
||||
|
||||
# for html imports with an optional as (google exclusive)
|
||||
elif rel == 'import':
|
||||
elif rel == 'import' or rel == 'alternate':
|
||||
rw_mod = 'mp_'
|
||||
|
||||
elif rel == 'stylesheet':
|
||||
@ -668,7 +668,7 @@ class HTMLRewriter(HTMLRewriterMixin, HTMLParser):
|
||||
if self.parse_comments:
|
||||
#data = self._rewrite_script(data)
|
||||
|
||||
# Rewrite with seperate HTMLRewriter
|
||||
# Rewrite with separate HTMLRewriter
|
||||
comment_rewriter = HTMLRewriter(self.url_rewriter,
|
||||
defmod=self.defmod)
|
||||
|
||||
|
@ -13,8 +13,21 @@ class RxRules(object):
|
||||
return string.replace("https", "http")
|
||||
|
||||
@staticmethod
|
||||
def replace_str(replacer):
|
||||
return lambda x, _: x.replace('this', replacer)
|
||||
def replace_str(replacer, match='this'):
|
||||
return lambda x, _: x.replace(match, replacer)
|
||||
|
||||
@staticmethod
|
||||
def replace_prefix_from(prefix, match):
|
||||
def do_replace(x, _):
|
||||
start = x.find(match)
|
||||
if start == 0:
|
||||
return prefix
|
||||
if start > 0:
|
||||
return x[:start] + prefix
|
||||
return x
|
||||
|
||||
return do_replace
|
||||
|
||||
|
||||
@staticmethod
|
||||
def format(template):
|
||||
@ -42,7 +55,7 @@ class RxRules(object):
|
||||
regex_str = '|'.join(['(' + rx + ')' for rx, op, count in rules])
|
||||
|
||||
# ensure it's not middle of a word, wrap in non-capture group
|
||||
regex_str = '(?<!\w)(?:' + regex_str + ')'
|
||||
regex_str = '(?:' + regex_str + ')'
|
||||
|
||||
return re.compile(regex_str, re.M)
|
||||
|
||||
@ -82,7 +95,9 @@ if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ this.__WB_source = obj;
|
||||
# By using a function the expression injected is an call expression that plays nice in those cases
|
||||
this_rw = '_____WB$wombat$check$this$function_____(this)'
|
||||
|
||||
check_loc = '((self.__WB_check_loc && self.__WB_check_loc(location)) || {}).href = '
|
||||
check_loc = '((self.__WB_check_loc && self.__WB_check_loc(location, arguments)) || {}).href = '
|
||||
|
||||
eval_str = 'WB_wombat_runEval2((_______eval_arg, isGlobal) => { var ge = eval; return isGlobal ? ge(_______eval_arg) : eval(_______eval_arg); }).eval(this, (function() { return arguments })(),'
|
||||
|
||||
self.local_objs = [
|
||||
'window',
|
||||
@ -96,21 +111,20 @@ if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ this.__WB_source = obj;
|
||||
]
|
||||
|
||||
local_declares = '\n'.join([local_var_line.format(obj, local_init_func_name) for obj in self.local_objs])
|
||||
local_declares += "\nlet arguments;"
|
||||
|
||||
prop_str = '|'.join(self.local_objs)
|
||||
|
||||
rules = [
|
||||
# rewriting 'eval(....)' - invocation
|
||||
(r'(?<![$])\beval\s*\(', self.add_prefix('WB_wombat_runEval(function _____evalIsEvil(_______eval_arg$$) { return eval(_______eval_arg$$); }.bind(this)).'), 0),
|
||||
# rewriting 'eval(...)' - invocation
|
||||
(r'(?<!function)(?:\s|^)\beval\s*\(', self.replace_prefix_from(eval_str, 'eval'), 0),
|
||||
# rewriting 'x = eval' - no invocation
|
||||
(r'(?<![$])\beval\b', self.add_prefix('WB_wombat_'), 0),
|
||||
(r'(?<=[=,])\s*\beval\b\s*(?![(:.$])', self.replace_str('self.eval', 'eval'), 0),
|
||||
(r'(?<=\.)postMessage\b\(', self.add_prefix('__WB_pmw(self).'), 0),
|
||||
(r'(?<![$.])\s*location\b\s*[=]\s*(?![=])', self.add_suffix(check_loc), 0),
|
||||
(r'(?<![$.])\s*\blocation\b\s*[=]\s*(?![=])', self.add_suffix(check_loc), 0),
|
||||
# rewriting 'return this'
|
||||
(r'\breturn\s+this\b\s*(?![.$])', self.replace_str(this_rw), 0),
|
||||
# rewriting 'this.' special properties access on new line, with ; prepended
|
||||
(r'(?<=[\n])\s*this\b(?=(?:\.(?:{0})\b))'.format(prop_str), self.replace_str(';' + this_rw), 0),
|
||||
# rewriting 'this.' special properties access, not on new line (no ;)
|
||||
# rewriting 'this.' special properties access
|
||||
(r'(?<![$.])\s*this\b(?=(?:\.(?:{0})\b))'.format(prop_str), self.replace_str(this_rw), 0),
|
||||
# rewrite '= this' or ', this'
|
||||
(r'(?<=[=,])\s*this\b\s*(?![:.$])', self.replace_str(this_rw), 0),
|
||||
@ -122,9 +136,9 @@ if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ this.__WB_source = obj;
|
||||
|
||||
super(JSWombatProxyRules, self).__init__(rules)
|
||||
|
||||
self.first_buff = local_init_func + local_declares + '\n\n'
|
||||
self.first_buff = local_init_func + local_declares + '\n\n{'
|
||||
|
||||
self.last_buff = '\n\n}'
|
||||
self.last_buff = '\n\n}}'
|
||||
|
||||
|
||||
# =================================================================
|
||||
@ -343,7 +357,7 @@ class CSSRewriter(RegexRewriter):
|
||||
class XMLRules(RxRules):
|
||||
def __init__(self):
|
||||
rules = [
|
||||
('([A-Za-z:]+[\s=]+)?["\'\s]*(' +
|
||||
('(?<![\w])([A-Za-z:]+[\s=]+)?["\'\s]*(' +
|
||||
self.HTTPX_MATCH_STR + ')',
|
||||
self.archival_rewrite(), 2),
|
||||
]
|
||||
|
@ -86,3 +86,37 @@ def rewrite_fb_dash(string, *args):
|
||||
string += json.dumps(best_ids)
|
||||
return string
|
||||
|
||||
def rewrite_tw_dash(string, *args):
|
||||
try:
|
||||
best_variant = None
|
||||
best_bitrate = 0
|
||||
best_src = ""
|
||||
max_bitrate = 5000000
|
||||
|
||||
data = json.loads(string)
|
||||
for variant in data["variants"]:
|
||||
if (("content_type" in variant and variant["content_type"] != "video/mp4") or
|
||||
("type" in variant and variant["type"] != "video/mp4")):
|
||||
continue
|
||||
|
||||
bitrate = variant.get("bitrate")
|
||||
src = variant.get("src")
|
||||
|
||||
if bitrate and bitrate > best_bitrate and bitrate <= max_bitrate:
|
||||
best_variant = variant
|
||||
best_bitrate = bitrate
|
||||
# just compare src strings with dimensions
|
||||
elif src and src > best_src:
|
||||
best_variant = variant
|
||||
best_src = src
|
||||
|
||||
if best_variant:
|
||||
data["variants"] = [best_variant]
|
||||
|
||||
string = json.dumps(data)
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
return string
|
||||
|
||||
|
@ -26,6 +26,7 @@ class RewriteInputRequest(DirectWSGIInputRequest):
|
||||
self.url = url
|
||||
self.rewriter = rewriter
|
||||
self.extra_cookie = None
|
||||
self.warcserver_headers = {}
|
||||
|
||||
is_proxy = ('wsgiprox.proxy_host' in env)
|
||||
|
||||
@ -82,6 +83,11 @@ class RewriteInputRequest(DirectWSGIInputRequest):
|
||||
elif name in ('HTTP_IF_MODIFIED_SINCE', 'HTTP_IF_UNMODIFIED_SINCE'):
|
||||
continue
|
||||
|
||||
elif name == 'HTTP_X_PYWB_ACL_USER':
|
||||
name = name[5:].title().replace('_', '-')
|
||||
self.warcserver_headers[name] = value
|
||||
continue
|
||||
|
||||
elif name == 'HTTP_X_FORWARDED_PROTO':
|
||||
name = 'X-Forwarded-Proto'
|
||||
if self.splits:
|
||||
|
@ -5,11 +5,9 @@ from pywb.utils.loaders import load
|
||||
|
||||
from six.moves.urllib.parse import urlsplit, quote
|
||||
|
||||
from jinja2 import Environment, TemplateNotFound, contextfunction
|
||||
from jinja2 import Environment, TemplateNotFound, pass_context, select_autoescape
|
||||
from jinja2 import FileSystemLoader, PackageLoader, ChoiceLoader
|
||||
|
||||
from babel.support import Translations
|
||||
|
||||
from webassets.ext.jinja2 import AssetsExtension
|
||||
from webassets.loaders import YAMLLoader
|
||||
from webassets.env import Resolver
|
||||
@ -17,6 +15,7 @@ from webassets.env import Resolver
|
||||
from pkg_resources import resource_filename
|
||||
|
||||
import os
|
||||
import logging
|
||||
|
||||
try:
|
||||
import ujson as json
|
||||
@ -77,10 +76,12 @@ class JinjaEnv(object):
|
||||
|
||||
if overlay:
|
||||
jinja_env = overlay.jinja_env.overlay(loader=loader,
|
||||
autoescape=select_autoescape(),
|
||||
trim_blocks=True,
|
||||
extensions=extensions)
|
||||
else:
|
||||
jinja_env = RelEnvironment(loader=loader,
|
||||
autoescape=select_autoescape(),
|
||||
trim_blocks=True,
|
||||
extensions=extensions)
|
||||
|
||||
@ -98,6 +99,8 @@ class JinjaEnv(object):
|
||||
assets_env.resolver = PkgResResolver()
|
||||
jinja_env.assets_environment = assets_env
|
||||
|
||||
self.default_locale = ''
|
||||
|
||||
def _make_loaders(self, paths, packages):
|
||||
"""Initialize the template loaders based on the supplied paths and packages.
|
||||
|
||||
@ -117,20 +120,26 @@ class JinjaEnv(object):
|
||||
|
||||
return loaders
|
||||
|
||||
def init_loc(self, locales_root_dir, locales, loc_map):
|
||||
def init_loc(self, locales_root_dir, locales, loc_map, default_locale):
|
||||
locales = locales or []
|
||||
locales_root_dir = locales_root_dir or os.path.join('i18n', 'translations')
|
||||
default_locale = default_locale or 'en'
|
||||
self.default_locale = default_locale
|
||||
|
||||
if locales_root_dir:
|
||||
if locales:
|
||||
try:
|
||||
from babel.support import Translations
|
||||
for loc in locales:
|
||||
loc_map[loc] = Translations.load(locales_root_dir, [loc, 'en'])
|
||||
#jinja_env.jinja_env.install_gettext_translations(translations)
|
||||
loc_map[loc] = Translations.load(locales_root_dir, [loc, default_locale])
|
||||
except:
|
||||
logging.warn("Ignoring Locales. You must install i18n extensions with 'pip install pywb[i18n]' to use localization features")
|
||||
|
||||
def get_translate(context):
|
||||
loc = context.get('env', {}).get('pywb_lang')
|
||||
loc = context.get('env', {}).get('pywb_lang', default_locale)
|
||||
return loc_map.get(loc)
|
||||
|
||||
def override_func(jinja_env, name):
|
||||
@contextfunction
|
||||
@pass_context
|
||||
def get_override(context, text):
|
||||
translate = get_translate(context)
|
||||
if not translate:
|
||||
@ -149,7 +158,7 @@ class JinjaEnv(object):
|
||||
|
||||
# Special _Q() function to return %-encoded text, necessary for use
|
||||
# with text in banner
|
||||
@contextfunction
|
||||
@pass_context
|
||||
def quote_gettext(context, text):
|
||||
translate = get_translate(context)
|
||||
if not translate:
|
||||
@ -160,15 +169,16 @@ class JinjaEnv(object):
|
||||
|
||||
self.jinja_env.globals['locales'] = list(loc_map.keys())
|
||||
self.jinja_env.globals['_Q'] = quote_gettext
|
||||
self.jinja_env.globals['default_locale'] = default_locale
|
||||
|
||||
@contextfunction
|
||||
@pass_context
|
||||
def switch_locale(context, locale):
|
||||
environ = context.get('env')
|
||||
curr_loc = environ.get('pywb_lang', '')
|
||||
|
||||
request_uri = environ.get('REQUEST_URI', environ.get('PATH_INFO'))
|
||||
|
||||
if curr_loc:
|
||||
if curr_loc and request_uri.startswith('/' + curr_loc + '/'):
|
||||
return request_uri.replace(curr_loc, locale, 1)
|
||||
|
||||
app_prefix = environ.get('pywb.app_prefix', '')
|
||||
@ -178,7 +188,7 @@ class JinjaEnv(object):
|
||||
|
||||
return app_prefix + '/' + locale + request_uri
|
||||
|
||||
@contextfunction
|
||||
@pass_context
|
||||
def get_locale_prefixes(context):
|
||||
environ = context.get('env')
|
||||
locale_prefixes = {}
|
||||
@ -186,11 +196,11 @@ class JinjaEnv(object):
|
||||
orig_prefix = environ.get('pywb.app_prefix', '')
|
||||
coll = environ.get('SCRIPT_NAME', '')
|
||||
|
||||
if orig_prefix:
|
||||
if orig_prefix and coll.startswith(orig_prefix):
|
||||
coll = coll[len(orig_prefix):]
|
||||
|
||||
curr_loc = environ.get('pywb_lang', '')
|
||||
if curr_loc:
|
||||
if curr_loc and coll.startswith('/' + curr_loc):
|
||||
coll = coll[len(curr_loc) + 1:]
|
||||
|
||||
for locale in loc_map.keys():
|
||||
@ -312,7 +322,7 @@ class BaseInsertView(object):
|
||||
kwargs.update(params)
|
||||
|
||||
kwargs['env'] = env
|
||||
kwargs['static_prefix'] = env.get('pywb.host_prefix', '') + env.get('pywb.app_prefix', '') + '/static'
|
||||
kwargs['static_prefix'] = env.get('pywb.static_prefix', '/static')
|
||||
|
||||
|
||||
return template.render(**kwargs)
|
||||
@ -361,7 +371,7 @@ class HeadInsertView(BaseInsertView):
|
||||
|
||||
if self.banner_view:
|
||||
banner_html = self.banner_view.render_to_string(env, cdx=cdx, **params)
|
||||
params['banner_html'] = banner_html
|
||||
params['custom_banner_html'] = banner_html
|
||||
|
||||
return self.render_to_string(env, cdx=cdx, **params)
|
||||
|
||||
@ -395,10 +405,11 @@ class TopFrameView(BaseInsertView):
|
||||
|
||||
embed_url = wb_url.to_str(mod=replay_mod)
|
||||
|
||||
timestamp = ''
|
||||
if wb_url.timestamp:
|
||||
timestamp = wb_url.timestamp
|
||||
else:
|
||||
timestamp = timestamp_now()
|
||||
#else:
|
||||
# timestamp = timestamp_now()
|
||||
|
||||
is_proxy = 'wsgiprox.proxy_host' in env
|
||||
|
||||
|
@ -13,7 +13,7 @@ from pywb.utils.io import chunk_encode_iter
|
||||
|
||||
from pywb.rewrite.wburl import WbUrl
|
||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||
from pywb.rewrite.default_rewriter import DefaultRewriter, RewriterWithJSProxy
|
||||
from pywb.rewrite.default_rewriter import RewriterWithJSProxy
|
||||
|
||||
from pywb import get_test_dir
|
||||
|
||||
@ -39,8 +39,7 @@ def headers(request):
|
||||
class TestContentRewriter(object):
|
||||
@classmethod
|
||||
def setup_class(self):
|
||||
self.content_rewriter = DefaultRewriter()
|
||||
self.js_proxy_content_rewriter = RewriterWithJSProxy()
|
||||
self.content_rewriter = RewriterWithJSProxy()
|
||||
|
||||
def _create_response_record(self, url, headers, payload, warc_headers):
|
||||
writer = BufferWARCWriter()
|
||||
@ -65,7 +64,6 @@ class TestContentRewriter(object):
|
||||
record = self._create_response_record(url, headers, content, warc_headers)
|
||||
|
||||
wburl = WbUrl(ts + '/' + (request_url or url))
|
||||
url_rewriter = UrlRewriter(wburl, prefix)
|
||||
|
||||
cdx = CDXObject()
|
||||
cdx['url'] = url
|
||||
@ -79,11 +77,13 @@ class TestContentRewriter(object):
|
||||
return ''
|
||||
|
||||
if use_js_proxy:
|
||||
rewriter = self.js_proxy_content_rewriter
|
||||
rewrite_opts = {}
|
||||
else:
|
||||
rewriter = self.content_rewriter
|
||||
rewrite_opts = {'ua_string': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/10.0 Safari/537.36'}
|
||||
|
||||
return rewriter(record, url_rewriter, cookie_rewriter=None,
|
||||
url_rewriter = UrlRewriter(wburl, prefix, rewrite_opts=rewrite_opts)
|
||||
|
||||
return self.content_rewriter(record, url_rewriter, cookie_rewriter=None,
|
||||
head_insert_func=insert_func,
|
||||
cdx=cdx,
|
||||
environ=environ)
|
||||
@ -141,6 +141,17 @@ class TestContentRewriter(object):
|
||||
assert ('Content-Type', 'text/html; charset=utf-8') in headers.headers
|
||||
assert b''.join(gen).decode('utf-8') == exp
|
||||
|
||||
def test_rewrite_html_ignore_bom(self):
|
||||
headers = {'Content-Type': 'text/html'}
|
||||
content = u'\ufeff\ufeff\ufeff<!DOCTYPE html>\n<head>\n<a href="http://example.com"></a></body></html>'
|
||||
|
||||
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701mp_')
|
||||
|
||||
exp = '\ufeff\ufeff\ufeff<!DOCTYPE html>\n<head>\n<a href="http://localhost:8080/prefix/201701/http://example.com"></a></body></html>'
|
||||
assert is_rw
|
||||
assert ('Content-Type', 'text/html') in headers.headers
|
||||
assert b''.join(gen).decode('utf-8') == exp
|
||||
|
||||
def test_rewrite_html_utf_8_anchor(self):
|
||||
headers = {'Content-Type': 'text/html; charset=utf-8'}
|
||||
content = u'<html><body><a href="#éxample-tésté"></a></body></html>'
|
||||
|
@ -138,9 +138,9 @@ r"""
|
||||
>>> parse('<meta http-equiv="Content-Security-Policy" content="default-src http://example.com" />')
|
||||
<meta http-equiv="Content-Security-Policy" _content="default-src http://example.com"/>
|
||||
|
||||
# Custom -data attribs
|
||||
# Don't rewrite Custom -data attribs
|
||||
>>> parse('<div data-url="http://example.com/a/b/c.html" data-some-other-value="http://example.com/img.gif">')
|
||||
<div data-url="/web/20131226101010oe_/http://example.com/a/b/c.html" data-some-other-value="/web/20131226101010oe_/http://example.com/img.gif">
|
||||
<div data-url="http://example.com/a/b/c.html" data-some-other-value="http://example.com/img.gif">
|
||||
|
||||
# param tag -- rewrite conditionally if url
|
||||
>>> parse('<param value="http://example.com/"/>')
|
||||
@ -185,6 +185,10 @@ r"""
|
||||
>>> parse('<img srcset="//example.com/1x,1x 2w, //example1.com/foo 2x, http://example.com/bar,bar 4x">')
|
||||
<img srcset="/web/20131226101010///example.com/1x,1x 2w, /web/20131226101010///example1.com/foo 2x, /web/20131226101010/http://example.com/bar,bar 4x">
|
||||
|
||||
# complex srcset attrib
|
||||
>>> parse('<img srcset="http://test.com/yaşar-kunduz.jpg 320w, http://test.com/yaşar-konçalves-273x300.jpg 273w">')
|
||||
<img srcset="/web/20131226101010/http://test.com/ya%C5%9Far-kunduz.jpg 320w, /web/20131226101010/http://test.com/ya%C5%9Far-konc%CC%A7alves-273x300.jpg 273w">
|
||||
|
||||
# empty srcset attrib
|
||||
>>> parse('<img srcset="">')
|
||||
<img srcset="">
|
||||
@ -394,7 +398,7 @@ r"""
|
||||
|
||||
# parse attr with js proxy, rewrite location assignment
|
||||
>>> parse('<html><a href="javascript:location=\'foo.html\'"></a></html>', js_proxy=True)
|
||||
<html><a href="javascript:{ location=((self.__WB_check_loc && self.__WB_check_loc(location)) || {}).href = 'foo.html' }"></a></html>
|
||||
<html><a href="javascript:{ location=((self.__WB_check_loc && self.__WB_check_loc(location, arguments)) || {}).href = 'foo.html' }"></a></html>
|
||||
|
||||
# parse attr with js proxy, assigning to location.href, no location assignment rewrite needed
|
||||
>>> parse('<html><a href="javascript:location.href=\'foo.html\'"></a></html>', js_proxy=True)
|
||||
@ -412,6 +416,9 @@ r"""
|
||||
>>> parse('<!--[if !IE]> --><html><![endif]--><a href="http://example.com/"><!--[if IE]><![endif]--><a href="http://example.com/"></html>')
|
||||
<!--[if !IE]> --><html><![endif]><a href="/web/20131226101010/http://example.com/"><!--[if IE]><![endif]--><a href="/web/20131226101010/http://example.com/"></html>
|
||||
|
||||
# Test tag with a target
|
||||
>>> parse('<HTML><A Href=\"page.html\" target=\"_blank\">Text</a></hTmL>')
|
||||
<html><a href="page.html" target="___wb_replay_top_frame">Text</a></html>
|
||||
|
||||
# Test blank
|
||||
>>> parse('')
|
||||
|
@ -131,19 +131,19 @@ r"""
|
||||
#=================================================================
|
||||
|
||||
>>> _test_js_obj_proxy('var foo = this; location = bar')
|
||||
'var foo = _____WB$wombat$check$this$function_____(this); location = ((self.__WB_check_loc && self.__WB_check_loc(location)) || {}).href = bar'
|
||||
'var foo = _____WB$wombat$check$this$function_____(this); location = ((self.__WB_check_loc && self.__WB_check_loc(location, arguments)) || {}).href = bar'
|
||||
|
||||
>>> _test_js_obj_proxy('var that = this\n location = bar')
|
||||
'var that = _____WB$wombat$check$this$function_____(this)\n location = ((self.__WB_check_loc && self.__WB_check_loc(location)) || {}).href = bar'
|
||||
'var that = _____WB$wombat$check$this$function_____(this)\n location = ((self.__WB_check_loc && self.__WB_check_loc(location, arguments)) || {}).href = bar'
|
||||
|
||||
>>> _test_js_obj_proxy('location = "xyz"')
|
||||
'location = ((self.__WB_check_loc && self.__WB_check_loc(location)) || {}).href = "xyz"'
|
||||
'location = ((self.__WB_check_loc && self.__WB_check_loc(location, arguments)) || {}).href = "xyz"'
|
||||
|
||||
>>> _test_js_obj_proxy('var foo = this.location')
|
||||
'var foo = _____WB$wombat$check$this$function_____(this).location'
|
||||
|
||||
>>> _test_js_obj_proxy('A = B\nthis.location = "foo"')
|
||||
'A = B\n;_____WB$wombat$check$this$function_____(this).location = "foo"'
|
||||
'A = B\n_____WB$wombat$check$this$function_____(this).location = "foo"'
|
||||
|
||||
>>> _test_js_obj_proxy('var foo = this.location2')
|
||||
'var foo = this.location2'
|
||||
@ -213,10 +213,18 @@ r"""
|
||||
'this. alocation = http://example.com/'
|
||||
|
||||
>>> _test_js_obj_proxy(r'this. location = http://example.com/')
|
||||
'this. location = ((self.__WB_check_loc && self.__WB_check_loc(location)) || {}).href = http://example.com/'
|
||||
'this. location = ((self.__WB_check_loc && self.__WB_check_loc(location, arguments)) || {}).href = http://example.com/'
|
||||
|
||||
>>> _test_js_obj_proxy('eval(a)')
|
||||
'WB_wombat_runEval(function _____evalIsEvil(_______eval_arg$$) { return eval(_______eval_arg$$); }.bind(this)).eval(a)'
|
||||
'WB_wombat_runEval2((_______eval_arg, isGlobal) => { var ge = eval; return isGlobal ? ge(_______eval_arg) : eval(_______eval_arg); }).eval(this, (function() { return arguments })(),a)'
|
||||
|
||||
>>> _test_js_obj_proxy('abc eval(a)')
|
||||
'abc WB_wombat_runEval2((_______eval_arg, isGlobal) => { var ge = eval; return isGlobal ? ge(_______eval_arg) : eval(_______eval_arg); }).eval(this, (function() { return arguments })(),a)'
|
||||
|
||||
|
||||
|
||||
>>> _test_js_obj_proxy(',eval(a)')
|
||||
',eval(a)'
|
||||
|
||||
>>> _test_js_obj_proxy('this.$eval(a)')
|
||||
'this.$eval(a)'
|
||||
@ -225,13 +233,38 @@ r"""
|
||||
'x = this.$eval; x(a);'
|
||||
|
||||
>>> _test_js_obj_proxy('x = eval; x(a);')
|
||||
'x = WB_wombat_eval; x(a);'
|
||||
'x = self.eval; x(a);'
|
||||
|
||||
>>> _test_js_obj_proxy('$eval = eval; $eval(a);')
|
||||
'$eval = WB_wombat_eval; $eval(a);'
|
||||
'$eval = self.eval; $eval(a);'
|
||||
|
||||
>>> _test_js_obj_proxy('foo(a, eval(data));')
|
||||
'foo(a, WB_wombat_runEval2((_______eval_arg, isGlobal) => { var ge = eval; return isGlobal ? ge(_______eval_arg) : eval(_______eval_arg); }).eval(this, (function() { return arguments })(),data));'
|
||||
|
||||
>>> _test_js_obj_proxy('function eval() {}')
|
||||
'function eval() {}'
|
||||
|
||||
>>> _test_js_obj_proxy('window.eval(a);')
|
||||
'window.WB_wombat_runEval(function _____evalIsEvil(_______eval_arg$$) { return eval(_______eval_arg$$); }.bind(this)).eval(a);'
|
||||
'window.eval(a);'
|
||||
|
||||
>>> _test_js_obj_proxy('x = window.eval; x(a);')
|
||||
'x = window.eval; x(a);'
|
||||
|
||||
>>> _test_js_obj_proxy('obj = { eval : 1 }')
|
||||
'obj = { eval : 1 }'
|
||||
|
||||
>>> _test_js_obj_proxy('x = obj.eval')
|
||||
'x = obj.eval'
|
||||
|
||||
>>> _test_js_obj_proxy('x = obj.eval(a)')
|
||||
'x = obj.eval(a)'
|
||||
|
||||
>>> _test_js_obj_proxy('x = obj._eval(a)')
|
||||
'x = obj._eval(a)'
|
||||
|
||||
>>> _test_js_obj_proxy('x = obj.$eval(a)')
|
||||
'x = obj.$eval(a)'
|
||||
|
||||
|
||||
#=================================================================
|
||||
# XML Rewriting
|
||||
@ -334,7 +367,6 @@ def _test_xml(string):
|
||||
def _test_css(string):
|
||||
return CSSRewriter(urlrewriter).rewrite(string)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
|
@ -50,10 +50,18 @@ default_filters:
|
||||
- match: '[?&](\w*(bust|ts)\w*=1[\d]{12,15})(?=&|$)'
|
||||
replace: ''
|
||||
|
||||
# remove facbook link ID when pywb urls are shared on facebook
|
||||
- match: '[?&](fbclid)=(.*)+(?=&|$)'
|
||||
replace: ''
|
||||
|
||||
|
||||
|
||||
|
||||
rules:
|
||||
|
||||
# twitter rules
|
||||
#=================================================================
|
||||
|
||||
- url_prefix: 'com,twitter)/i/profiles/show/'
|
||||
|
||||
fuzzy_lookup: '/profiles/show/.*with_replies\?.*(max_id=[^&]+)'
|
||||
@ -68,6 +76,24 @@ rules:
|
||||
|
||||
fuzzy_lookup: '()'
|
||||
|
||||
- url_prefix: ['com,twitter,api)/2/', 'com,twitter)/i/api/2/', 'com,twitter)/i/api/graphql/']
|
||||
|
||||
rewrite:
|
||||
js_regexs:
|
||||
- match: 'video_info":(.*?}]})'
|
||||
group: 1
|
||||
function: 'pywb.rewrite.rewrite_dash:rewrite_tw_dash'
|
||||
|
||||
|
||||
- url_prefix: ['com,twimg,syndication,cdn)/tweet-result']
|
||||
|
||||
rewrite:
|
||||
js_regexs:
|
||||
- match: 'video":(.*?viewCount":\d+})'
|
||||
group: 1
|
||||
function: 'pywb.rewrite.rewrite_dash:rewrite_tw_dash'
|
||||
|
||||
|
||||
|
||||
# facebook rules
|
||||
#=================================================================
|
||||
@ -84,7 +110,7 @@ rules:
|
||||
|
||||
fuzzy_lookup:
|
||||
match: '("(?:cursor|cursorindex)":["\d\w]+)'
|
||||
find_all: true
|
||||
re_type: findall
|
||||
|
||||
- url_prefix: 'com,facebook)/ajax/pagelet/generic.php/profiletimeline'
|
||||
fuzzy_lookup: 'com,facebook\)/.*[?&](__adt=[^&]+).*[&]data=(?:.*?(?:[&]|(profile_id|pagelet_token)[^,]+))'
|
||||
@ -149,7 +175,7 @@ rules:
|
||||
|
||||
fuzzy_lookup:
|
||||
match: '("q[\d]+":|after:\\"[^"]+)'
|
||||
find_all: true
|
||||
re_type: findall
|
||||
|
||||
- url_prefix: 'com,facebook)/pages_reaction_units/more'
|
||||
|
||||
@ -170,6 +196,9 @@ rules:
|
||||
group: 1
|
||||
function: 'pywb.rewrite.rewrite_dash:rewrite_fb_dash'
|
||||
|
||||
- match: '"debugNoBatching\s?":(?:false|0)'
|
||||
replace: '"debugNoBatching":true'
|
||||
|
||||
parse_comments: true
|
||||
|
||||
- url_prefix: 'com,facebook'
|
||||
@ -196,6 +225,14 @@ rules:
|
||||
|
||||
- url_prefix: 'com,instagram)/'
|
||||
|
||||
rewrite:
|
||||
js_regexs:
|
||||
- match: '"is_dash_eligible":true'
|
||||
replace: '"is_dash_eligible":false'
|
||||
|
||||
- match: '"debugNoBatching\s?":(?:false|0)'
|
||||
replace: '"debugNoBatching":true'
|
||||
|
||||
fuzzy_lookup: '()'
|
||||
|
||||
|
||||
@ -344,7 +381,7 @@ rules:
|
||||
- videoFileId
|
||||
- signature
|
||||
|
||||
- url_prefix: ['net,akamaized,gcs-vimeo)/', 'net,akamaized,vod)/']
|
||||
- url_prefix: ['net,akamaized,gcs-vimeo)/', 'net,akamaized,vod)/', 'net,akamaized,vod-progressive)/']
|
||||
|
||||
fuzzy_lookup:
|
||||
match: '([/\d]+\.mp4)$'
|
||||
@ -410,6 +447,15 @@ rules:
|
||||
- action_load_comments
|
||||
- filter
|
||||
|
||||
- url_prefix: ['com,youtube)/embed', 'com,youtube-nocookie)/embed']
|
||||
|
||||
fuzzy_lookup:
|
||||
match: '()'
|
||||
|
||||
- url_prefix: ['com,youtube)/youtubei/v1', 'com,youtube-nocookie)/youtubei/v1']
|
||||
|
||||
fuzzy_lookup:
|
||||
- videoid
|
||||
|
||||
- url_prefix: 'com,googlevideo,'
|
||||
|
||||
@ -456,9 +502,15 @@ rules:
|
||||
- match: 'yt\.setConfig.*PLAYER_CONFIG.*args":\s*{'
|
||||
replace: '{0} "dash": "0", dashmpd: "", '
|
||||
|
||||
- match: 'yt\.setConfig.*PLAYER_VARS.*?{'
|
||||
replace: '{0}"dash":"0","dashmpd":"",'
|
||||
|
||||
- match: '(?:"player":|ytplayer\.config).*"args":\s*{'
|
||||
replace: '{0}"dash":"0","dashmpd":"",'
|
||||
|
||||
- match: '"0"\s*?==\s*?\w+\.dash\&\&'
|
||||
replace: '1&&'
|
||||
|
||||
|
||||
# testing rules -- not for valid domain
|
||||
#=================================================================
|
||||
@ -492,6 +544,12 @@ rules:
|
||||
rewrite:
|
||||
js_rewrite_location: urls
|
||||
|
||||
- url_prefix: 'com,example)/matched'
|
||||
fuzzy_lookup:
|
||||
re_type: sub
|
||||
match: 'matched'
|
||||
replace: 'replaced'
|
||||
|
||||
# all domain rules -- fallback to this dataset
|
||||
#=================================================================
|
||||
# Applies to all urls -- should be last
|
||||
|
@ -107,7 +107,7 @@ function fetchDone() {
|
||||
}
|
||||
|
||||
function fetchErrored(err) {
|
||||
console.warn("Fetch Failed: " + err);
|
||||
console.warn('Fetch Failed: ' + err);
|
||||
fetchDone();
|
||||
}
|
||||
|
||||
|
45
pywb/static/css/base.css
Normal file
45
pywb/static/css/base.css
Normal file
@ -0,0 +1,45 @@
|
||||
header {
|
||||
display: flex;
|
||||
display: -webkit-box;
|
||||
display: -moz-box;
|
||||
display: -webkit-flex;
|
||||
display: -ms-flexbox;
|
||||
|
||||
justify-content: space-between;
|
||||
-webkit-box-pack: justify;
|
||||
-moz-box-pack: justify;
|
||||
-ms-flex-pack: justify;
|
||||
}
|
||||
|
||||
header .language-select {
|
||||
position: absolute;
|
||||
top: 10px;
|
||||
right: 10px;
|
||||
}
|
||||
header .language-select ul {
|
||||
display: inline-block;
|
||||
list-style-type: none;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
header .language-select ul li {
|
||||
display: inline-block;
|
||||
}
|
||||
header .language-select ul li:not(:last-child):after {
|
||||
content: ' / ';
|
||||
}
|
||||
|
||||
header .language-select a:link,
|
||||
header .language-select a:visited,
|
||||
header .language-select a:active {
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
header .language-select a:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.error pre {
|
||||
white-space: pre-wrap;
|
||||
text-align: left;
|
||||
}
|
@ -1,191 +0,0 @@
|
||||
|
||||
#_wb_frame_top_banner
|
||||
{
|
||||
display: block !important;
|
||||
top: 0px !important;
|
||||
left: 0px !important;
|
||||
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif !important;
|
||||
width: 100% !important;
|
||||
font-size: 18px !important;
|
||||
background-color: #444 !important;
|
||||
color: white !important;
|
||||
z-index: 2147483643 !important;
|
||||
line-height: normal !important;
|
||||
|
||||
position: absolute !important;
|
||||
border: 0px;
|
||||
height: 44px !important;
|
||||
|
||||
display: flex !important;
|
||||
display: -webkit-box !important;
|
||||
display: -moz-box !important;
|
||||
display: -webkit-flex !important;
|
||||
display: -ms-flexbox !important;
|
||||
|
||||
justify-content: space-between;
|
||||
-webkit-box-pack: justify;
|
||||
-moz-box-pack: justify;
|
||||
-ms-flex-pack: justify;
|
||||
align-items: center;
|
||||
-webkit-box-align: center;
|
||||
-moz-box-align: center;
|
||||
-ms-flex-align: center;
|
||||
}
|
||||
|
||||
#title_or_url
|
||||
{
|
||||
display: block !important;
|
||||
white-space: nowrap;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
max-width: 100%;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#_wb_frame_top_banner ._wb_linked_logo
|
||||
{
|
||||
display: block;
|
||||
height: 26px;
|
||||
width: 71px;
|
||||
margin-left: 15px;
|
||||
flex-shrink: 0;
|
||||
-webkit-flex-shrink: 1 0;
|
||||
-moz-flex-shrink: 1 0;
|
||||
-ms-flex: 0 0 71px;
|
||||
}
|
||||
|
||||
#_wb_frame_top_banner ._wb_linked_logo img
|
||||
{
|
||||
width: auto;
|
||||
height: 100%;
|
||||
border: none;
|
||||
}
|
||||
|
||||
#_wb_capture_info
|
||||
{
|
||||
flex-grow: 1;
|
||||
-webkit-box-flex: 1;
|
||||
-moz-box-flex: 1;
|
||||
-webkit-flex-grow: 1;
|
||||
-ms-flex: 1;
|
||||
|
||||
min-width: 0;
|
||||
margin: 0 15px;
|
||||
|
||||
display: flex !important;
|
||||
display: -webkit-box !important;
|
||||
display: -moz-box !important;
|
||||
display: -webkit-flex !important;
|
||||
display: -ms-flexbox !important;
|
||||
|
||||
flex-direction: column;
|
||||
-webkit-box-direction: normal;
|
||||
-webkit-box-orient: vertical;
|
||||
-moz-box-direction: normal;
|
||||
-moz-box-orient: vertical;
|
||||
-ms-flex-direction: column;
|
||||
|
||||
justify-content: center;
|
||||
-webkit-box-pack: center;
|
||||
-moz-box-pack: center;
|
||||
-ms-flex-pack: center;
|
||||
|
||||
align-items: center;
|
||||
-webkit-box-align: center;
|
||||
-moz-box-align: center;
|
||||
-ms-flex-align: center;
|
||||
|
||||
height: 100%;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
}
|
||||
|
||||
._wb_capture_date
|
||||
{
|
||||
font-size: 13px;
|
||||
}
|
||||
|
||||
#_wb_frame_top_banner #_wb_ancillary_links
|
||||
{
|
||||
font-size: 12px;
|
||||
color: #FFF;
|
||||
text-align: right;
|
||||
margin: 0px 15px 0px 0px;
|
||||
padding: inherit;
|
||||
background-color: inherit;
|
||||
width: initial;
|
||||
flex-shrink: 1;
|
||||
-webkit-flex-shrink: 1;
|
||||
-moz-flex-shrink: 1;
|
||||
-ms-flex: 0 0 115px;
|
||||
}
|
||||
#_wb_frame_top_banner #_wb_ancillary_links a:link,
|
||||
#_wb_frame_top_banner #_wb_ancillary_links a:visited,
|
||||
#_wb_frame_top_banner #_wb_ancillary_links a:active
|
||||
{
|
||||
color: #FFF;
|
||||
text-decoration: none;
|
||||
}
|
||||
#_wb_frame_top_banner #_wb_ancillary_links a:hover
|
||||
{
|
||||
text-decoration: underline;
|
||||
}
|
||||
#_wb_frame_top_banner #_wb_ancillary_links a img
|
||||
{
|
||||
width: 10px;
|
||||
height: 10px;
|
||||
}
|
||||
|
||||
#wb_iframe_div
|
||||
{
|
||||
position: absolute;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
padding: 44px 0px 0px 0px;
|
||||
border: none;
|
||||
box-sizing: border-box;
|
||||
-moz-box-sizing: border-box;
|
||||
-webkit-box-sizing: border-box;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.wb_iframe
|
||||
{
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
border: 2px solid #FFF;
|
||||
border-width: 2px 0 0 0;
|
||||
padding: 0px 0px 0px 0px;
|
||||
overflow: scroll;
|
||||
}
|
||||
|
||||
.mobile {
|
||||
display: none;
|
||||
}
|
||||
|
||||
@media screen and (max-width: 500px) {
|
||||
#_wb_frame_top_banner ._wb_linked_logo
|
||||
{
|
||||
width: 26px;
|
||||
height: 26px;
|
||||
margin-left: 10px;
|
||||
}
|
||||
#_wb_frame_top_banner ._wb_linked_logo img:not(.mobile)
|
||||
{
|
||||
display: none;
|
||||
}
|
||||
#_wb_frame_top_banner .mobile
|
||||
{
|
||||
display: block;
|
||||
}
|
||||
|
||||
#_wb_capture_info
|
||||
{
|
||||
margin: 0 5px;
|
||||
}
|
||||
|
||||
#_wb_frame_top_banner .no-mobile
|
||||
{
|
||||
display: none;
|
||||
}
|
||||
}
|
@ -1,320 +0,0 @@
|
||||
/*
|
||||
Copyright(c) 2013-2018 Rhizome and Ilya Kreymer. Released under the GNU General Public License.
|
||||
|
||||
This file is part of pywb, https://github.com/webrecorder/pywb
|
||||
|
||||
pywb is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
pywb is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with pywb. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
*/
|
||||
|
||||
// Creates the default pywb banner.
|
||||
|
||||
(function() {
|
||||
if (window.top !== window) {
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* The default banner class
|
||||
*/
|
||||
function DefaultBanner() {
|
||||
if (!(this instanceof DefaultBanner)) return new DefaultBanner();
|
||||
this.banner = null;
|
||||
this.captureInfo = null;
|
||||
this.last_state = {};
|
||||
this.state = null;
|
||||
this.title = '';
|
||||
this.bannerUrlSet = false;
|
||||
this.onMessage = this.onMessage.bind(this);
|
||||
}
|
||||
|
||||
// Functions required to be exposed by all banners
|
||||
|
||||
/**
|
||||
* @desc Initialize (display) the banner
|
||||
*/
|
||||
DefaultBanner.prototype.init = function() {
|
||||
this.createBanner('_wb_frame_top_banner');
|
||||
|
||||
if (window.wbinfo) {
|
||||
this.set_banner(
|
||||
window.wbinfo.url,
|
||||
window.wbinfo.timestamp,
|
||||
window.wbinfo.is_live,
|
||||
window.wbinfo.is_framed ? '' : document.title
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @desc Called by ContentFrame to detect if the banner is still showing
|
||||
* that the page is loading
|
||||
* @returns {boolean}
|
||||
*/
|
||||
DefaultBanner.prototype.stillIndicatesLoading = function() {
|
||||
return !this.bannerUrlSet;
|
||||
};
|
||||
|
||||
/**
|
||||
* @param {string} url - The URL of the replayed page
|
||||
* @param {?string} ts - The timestamp of the replayed page.
|
||||
* If we are in live mode this is undefined/empty string
|
||||
* @param {boolean} is_live - A bool indicating if we are operating in live mode
|
||||
*/
|
||||
DefaultBanner.prototype.updateCaptureInfo = function(url, ts, is_live) {
|
||||
if (is_live && !ts) {
|
||||
ts = new Date().toISOString().replace(/[-T:.Z]/g, '');
|
||||
}
|
||||
this.set_banner(url, ts, is_live, null);
|
||||
};
|
||||
|
||||
/**
|
||||
* @desc Called by ContentFrame when a message is received from the replay iframe
|
||||
* @param {MessageEvent} event - The message event containing the message received
|
||||
* from the replayed page
|
||||
*/
|
||||
DefaultBanner.prototype.onMessage = function(event) {
|
||||
var type = event.data.wb_type;
|
||||
|
||||
if (type === 'load' || type === 'replace-url') {
|
||||
this.state = event.data;
|
||||
this.last_state = this.state;
|
||||
this.title = event.data.title || this.title;
|
||||
} else if (type === 'title') {
|
||||
this.state = this.last_state;
|
||||
this.title = event.data.title;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
||||
// favicon update
|
||||
if (type === 'load') {
|
||||
var head = document.querySelector('head');
|
||||
var oldLink = document.querySelectorAll("link[rel*='icon']");
|
||||
var i = 0;
|
||||
for (; i < oldLink.length; i++) {
|
||||
head.removeChild(oldLink[i]);
|
||||
}
|
||||
|
||||
if (this.state.icons) {
|
||||
for (i = 0; i < this.state.icons.length; i++) {
|
||||
var icon = this.state.icons[i];
|
||||
var link = document.createElement('link');
|
||||
link.rel = icon.rel;
|
||||
link.href = icon.href;
|
||||
head.appendChild(link);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.set_banner(
|
||||
this.state.url,
|
||||
this.state.ts,
|
||||
this.state.is_live,
|
||||
this.title
|
||||
);
|
||||
};
|
||||
|
||||
// Functions internal to the default banner
|
||||
|
||||
/**
|
||||
* @desc Navigate to different language, if available
|
||||
*/
|
||||
|
||||
DefaultBanner.prototype.changeLanguage = function(lang, evt) {
|
||||
evt.preventDefault();
|
||||
var path = window.location.href;
|
||||
if (path.indexOf(window.banner_info.prefix) == 0) {
|
||||
path = path.substring(window.banner_info.prefix.length);
|
||||
if (window.banner_info.locale_prefixes && window.banner_info.locale_prefixes[lang]) {
|
||||
window.location.pathname = window.banner_info.locale_prefixes[lang] + path;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @desc Creates the underlying HTML elements comprising the banner
|
||||
* @param {string} bid - The id for the banner
|
||||
*/
|
||||
DefaultBanner.prototype.createBanner = function(bid) {
|
||||
this.banner = document.createElement('wb_div', true);
|
||||
this.banner.setAttribute('id', bid);
|
||||
this.banner.setAttribute('lang', 'en');
|
||||
|
||||
if (window.banner_info.logoImg) {
|
||||
var logo = document.createElement("a");
|
||||
logo.setAttribute("href", "/" + (window.banner_info.locale ? window.banner_info.locale + "/" : ""));
|
||||
logo.setAttribute("class", "_wb_linked_logo");
|
||||
|
||||
var logoContents = "";
|
||||
logoContents += "<img src='" + window.banner_info.logoImg + "' alt='" + window.banner_info.logoAlt + "'>";
|
||||
logoContents += "<img src='" + window.banner_info.logoImg + "' class='mobile' alt='" + window.banner_info.logoAlt + "'>";
|
||||
|
||||
logo.innerHTML = logoContents;
|
||||
this.banner.appendChild(logo);
|
||||
}
|
||||
|
||||
this.captureInfo = document.createElement("span");
|
||||
this.captureInfo.setAttribute("id", "_wb_capture_info");
|
||||
this.captureInfo.innerHTML = window.banner_info.loadingLabel;
|
||||
this.banner.appendChild(this.captureInfo);
|
||||
|
||||
var ancillaryLinks = document.createElement("div");
|
||||
ancillaryLinks.setAttribute("id", "_wb_ancillary_links");
|
||||
|
||||
var calendarImg = window.banner_info.calendarImg || window.banner_info.staticPrefix + "/calendar.svg";
|
||||
|
||||
var calendarLink = document.createElement("a");
|
||||
calendarLink.setAttribute("id", "calendarLink");
|
||||
calendarLink.setAttribute("href", "#");
|
||||
calendarLink.innerHTML = "<img src='" + calendarImg + "' alt='" + window.banner_info.calendarAlt + "'><span class='no-mobile'> " +window.banner_info.calendarLabel + "</span>";
|
||||
ancillaryLinks.appendChild(calendarLink);
|
||||
this.calendarLink = calendarLink;
|
||||
|
||||
if (typeof window.banner_info.locales !== "undefined" && window.banner_info.locales.length) {
|
||||
var locales = window.banner_info.locales;
|
||||
var languages = document.createElement("div");
|
||||
|
||||
var label = document.createElement("span");
|
||||
label.setAttribute("class", "no-mobile");
|
||||
label.appendChild(document.createTextNode(window.banner_info.choiceLabel + " "));
|
||||
languages.appendChild(label);
|
||||
|
||||
for(var i = 0; i < locales.length; i++) {
|
||||
var locale = locales[i];
|
||||
var langLink = document.createElement("a");
|
||||
langLink.setAttribute("href", "#");
|
||||
langLink.addEventListener("click", this.changeLanguage.bind(this, locale));
|
||||
langLink.appendChild(document.createTextNode(locale));
|
||||
|
||||
languages.appendChild(langLink);
|
||||
if (i !== locales.length - 1) {
|
||||
languages.appendChild(document.createTextNode(" / "));
|
||||
}
|
||||
}
|
||||
|
||||
ancillaryLinks.appendChild(languages);
|
||||
}
|
||||
|
||||
this.banner.appendChild(ancillaryLinks);
|
||||
|
||||
document.body.insertBefore(this.banner, document.body.firstChild);
|
||||
};
|
||||
|
||||
/**
|
||||
* @desc Converts a timestamp to a date string. If is_gmt is truthy then
|
||||
* the returned data string will be the results of date.toGMTString otherwise
|
||||
* its date.toLocaleString()
|
||||
* @param {?string} ts - The timestamp to receive the correct date string for
|
||||
* @param {boolean} is_gmt - Is the returned date string to be in GMT time
|
||||
* @returns {string}
|
||||
*/
|
||||
DefaultBanner.prototype.ts_to_date = function(ts, is_gmt) {
|
||||
if (!ts) {
|
||||
return '';
|
||||
}
|
||||
|
||||
if (ts.length < 14) {
|
||||
ts += '00000000000000'.substr(ts.length);
|
||||
}
|
||||
|
||||
var datestr =
|
||||
ts.substring(0, 4) +
|
||||
'-' +
|
||||
ts.substring(4, 6) +
|
||||
'-' +
|
||||
ts.substring(6, 8) +
|
||||
'T' +
|
||||
ts.substring(8, 10) +
|
||||
':' +
|
||||
ts.substring(10, 12) +
|
||||
':' +
|
||||
ts.substring(12, 14) +
|
||||
'-00:00';
|
||||
|
||||
var date = new Date(datestr);
|
||||
|
||||
if (is_gmt) {
|
||||
return date.toGMTString();
|
||||
} else {
|
||||
return date.toLocaleString(window.banner_info.locale);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @desc Updates the contents displayed by the banner
|
||||
* @param {?string} url - The URL of the replayed page to be displayed in the banner
|
||||
* @param {?string} ts - A timestamp to be displayed in the banner
|
||||
* @param {boolean} is_live - Are we in live mode
|
||||
* @param {?string} title - The title of the replayed page to be displayed in the banner
|
||||
*/
|
||||
DefaultBanner.prototype.set_banner = function(url, ts, is_live, title) {
|
||||
var capture_str;
|
||||
var title_str;
|
||||
|
||||
if (!url) {
|
||||
this.captureInfo.innerHTML = window.banner_info.loadingLabel;
|
||||
this.bannerUrlSet = false;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!ts) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (title) {
|
||||
capture_str = title;
|
||||
} else {
|
||||
capture_str = url;
|
||||
}
|
||||
|
||||
title_str = capture_str;
|
||||
|
||||
capture_str = "<b id='title_or_url' title='" + capture_str + "'>" + capture_str + "</b>";
|
||||
|
||||
capture_str += "<span class='_wb_capture_date'>";
|
||||
|
||||
if (is_live) {
|
||||
title_str = window.banner_info.liveMsg + " " + title_str;
|
||||
capture_str += "<b>" + window.banner_info.liveMsg + " </b>";
|
||||
}
|
||||
|
||||
capture_str += this.ts_to_date(ts, window.banner_info.is_gmt);
|
||||
capture_str += "</span>";
|
||||
|
||||
this.calendarLink.setAttribute("href", window.banner_info.prefix + "*/" + url);
|
||||
this.calendarLink.style.display = is_live ? "none" : "";
|
||||
|
||||
this.captureInfo.innerHTML = capture_str;
|
||||
|
||||
window.document.title = title_str;
|
||||
|
||||
this.bannerUrlSet = true;
|
||||
};
|
||||
|
||||
// all banners will expose themselves by adding themselves as WBBanner on window
|
||||
window.WBBanner = new DefaultBanner();
|
||||
|
||||
// if wbinfo.url is set and not-framed, init banner in content frame
|
||||
if (window.wbinfo && window.wbinfo.url && !window.wbinfo.is_framed) {
|
||||
if (document.readyState === "loading") {
|
||||
document.addEventListener("DOMContentLoaded", function() {
|
||||
window.WBBanner.init();
|
||||
});
|
||||
} else {
|
||||
window.WBBanner.init();
|
||||
}
|
||||
}
|
||||
|
||||
})();
|
160
pywb/static/loading-spinner/loading-spinner.js
Normal file
160
pywb/static/loading-spinner/loading-spinner.js
Normal file
@ -0,0 +1,160 @@
|
||||
const smallSize = "75px";
|
||||
|
||||
|
||||
class LoadingSpinner {
|
||||
static #instanceCount = 0;
|
||||
|
||||
constructor(config={}) {
|
||||
this.config = {initialState:true, animationDuration:500, text:'Loading...', ...config};
|
||||
|
||||
if (LoadingSpinner.#instanceCount > 0) {
|
||||
throw new Error('Cannot make a second loading spinner (aka progress indicator)');
|
||||
}
|
||||
LoadingSpinner.#instanceCount++;
|
||||
|
||||
|
||||
const uuid = Math.floor(Math.random()*1000);
|
||||
this.classes = {
|
||||
el: `loading-spinner-${uuid}`,
|
||||
mask: `loading-spinner-mask-${uuid}`,
|
||||
hidden: `hidden-${uuid}`,
|
||||
spinning: `spinning-${uuid}`
|
||||
};
|
||||
|
||||
this.state = config.initialState;
|
||||
this.addStyles();
|
||||
this.addDom();
|
||||
}
|
||||
|
||||
toggle() {
|
||||
if (this.state) {
|
||||
this.setOn();
|
||||
} else {
|
||||
this.setOff();
|
||||
}
|
||||
}
|
||||
|
||||
setOn() {
|
||||
this.state = true;
|
||||
this.el.classList.remove(this.classes.hidden);
|
||||
setTimeout(function setSpinning() {
|
||||
this.el.classList.add(this.classes.spinning);
|
||||
}.bind(this), 10);
|
||||
}
|
||||
|
||||
setOff() {
|
||||
this.state = false;
|
||||
this.el.classList.remove(this.classes.spinning);
|
||||
setTimeout(function setHidden() {
|
||||
this.el.classList.add(this.classes.hidden);
|
||||
}.bind(this), this.config.animationDuration);
|
||||
}
|
||||
|
||||
addDom() {
|
||||
const text = this.config.text;
|
||||
const dom = `
|
||||
<div class="${this.classes.mask} ${this.classes[this.config.initialState ? 'spinning':'hidden']}">
|
||||
<div class="${this.classes.el}">
|
||||
<div data-loading-spinner="circle1"></div>
|
||||
<div data-loading-spinner="circle2"></div>
|
||||
<div data-loading-spinner="circle3"></div>
|
||||
<div data-loading-spinner="circle4"></div>
|
||||
<span data-loading-spinner="text">${text}</span>
|
||||
</div>
|
||||
</div>`;
|
||||
const wrapEl = document.createElement('div');
|
||||
wrapEl.innerHTML = dom;
|
||||
this.el = wrapEl.firstElementChild;
|
||||
document.getElementsByTagName('body')[0].appendChild(this.el);
|
||||
}
|
||||
|
||||
addStyles() {
|
||||
const duration = this.config.animationDuration;
|
||||
const stylesheetEl = document.createElement('style');
|
||||
document.head.appendChild(stylesheetEl);
|
||||
|
||||
const rules = [`
|
||||
.${this.classes.mask} {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 100vw;
|
||||
height: ${this.config.isSmall ? smallSize : "100vh"};
|
||||
z-index: 900;
|
||||
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
|
||||
background-color: rgba(255,255,255, .85);
|
||||
|
||||
opacity: 0;
|
||||
transition: opacity ${duration}ms ease-in;
|
||||
}`,`
|
||||
.${this.classes.mask}.${this.classes.spinning} {
|
||||
opacity: 1;
|
||||
}`,`
|
||||
.${this.classes.mask}.${this.classes.hidden} {
|
||||
display: none;
|
||||
}`,`
|
||||
.${this.classes.el} {
|
||||
position: relative;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
width: ${this.config.isSmall ? smallSize : "200px"};
|
||||
height: ${this.config.isSmall ? smallSize : "200px"};
|
||||
}`,`
|
||||
[data-loading-spinner^=circle] {
|
||||
position: absolute;
|
||||
margin: 0;
|
||||
border-radius: 50%;
|
||||
border-left-color: transparent;
|
||||
border-right-color: transparent;
|
||||
}`,`
|
||||
[data-loading-spinner=circle1] {
|
||||
border: 3px solid #444444;/* #0D4B9F; */
|
||||
width: 70%;
|
||||
height: 70%;
|
||||
animation: rotate 2s cubic-bezier(0.26, 1.36, 0.74, -0.29) infinite;
|
||||
}`,`
|
||||
[data-loading-spinner=circle2] {
|
||||
border: 3px solid #ddd;/* #E0EDFF; */
|
||||
width: 80%;
|
||||
height: 80%;
|
||||
animation: rotateReverse 2s cubic-bezier(0.26, 1.36, 0.74, -0.29) infinite;
|
||||
}`,`
|
||||
[data-loading-spinner=circle3] {
|
||||
border: 3px solid #656565;/* #005CDC; */
|
||||
width: 90%;
|
||||
height: 90%;
|
||||
animation: rotate 2s cubic-bezier(0.26, 1.36, 0.74, -0.29) infinite;
|
||||
}`,`
|
||||
[data-loading-spinner=circle4] {
|
||||
border: 3px solid #aaa; /* #94B6E5; */
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
animation: rotateReverse 2s cubic-bezier(0.26, 1.36, 0.74, -0.29) infinite;
|
||||
}`,`
|
||||
@keyframes rotate {
|
||||
from {
|
||||
transform: rotateZ(-360deg)
|
||||
}
|
||||
to {
|
||||
transform: rotateZ(0deg)
|
||||
}
|
||||
}`,`
|
||||
@keyframes rotateReverse {
|
||||
from {
|
||||
transform: rotateZ(360deg)
|
||||
}
|
||||
to {
|
||||
transform: rotateZ(0deg)
|
||||
}
|
||||
}`,`
|
||||
[data-loading-spinner=text] {
|
||||
font-size: 15px;
|
||||
}`];
|
||||
rules.forEach(rule => stylesheetEl.sheet.insertRule(rule));
|
||||
}
|
||||
}
|
12
pywb/static/loading-spinner/test.html
Normal file
12
pywb/static/loading-spinner/test.html
Normal file
@ -0,0 +1,12 @@
|
||||
<html>
|
||||
<head>
|
||||
<script src="loading-spinner.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<button onclick="loadingSpinner.setOn()">load...</button>
|
||||
<script>
|
||||
const loadingSpinner = new LoadingSpinner();
|
||||
loadingSpinner.el.addEventListener('click', e => loadingSpinner.setOff());
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
BIN
pywb/static/pywb-logo-sm.png
Normal file
BIN
pywb/static/pywb-logo-sm.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 4.5 KiB |
BIN
pywb/static/pywb-logo.png
Normal file
BIN
pywb/static/pywb-logo.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 10 KiB |
@ -57,14 +57,6 @@ function RenderCalendar(init) {
|
||||
};
|
||||
// regex for extracting the filter constraints and filter mods to human explanation
|
||||
this.filterRE = /filter([^a-z]+)([a-z]+):(.+)/i;
|
||||
this.filterMods = {
|
||||
'=': 'Contains',
|
||||
'==': 'Matches Exactly',
|
||||
'=~': 'Matches Regex',
|
||||
'=!': 'Does Not Contains',
|
||||
'=!=': 'Is Not',
|
||||
'=!~': 'Does Not Begins With'
|
||||
};
|
||||
this.text = init.text;
|
||||
this.versionString = null;
|
||||
}
|
||||
@ -371,16 +363,13 @@ RenderCalendar.prototype.createContainers = function() {
|
||||
},
|
||||
{ tag: 'textNode', value: ' ' },
|
||||
{
|
||||
tag: 'b',
|
||||
child: {
|
||||
tag: 'textNode',
|
||||
value: '',
|
||||
ref: function(refToElem) {
|
||||
renderCal.containers.versionsTextNode = refToElem;
|
||||
}
|
||||
}
|
||||
},
|
||||
{ tag: 'textNode', value: ' of ' + this.queryInfo.url }
|
||||
{ tag: 'b', innerText: ' ' + this.queryInfo.url }
|
||||
]
|
||||
});
|
||||
// create the row that will hold the results of the regular query
|
||||
@ -436,15 +425,14 @@ RenderCalendar.prototype.createContainers = function() {
|
||||
return;
|
||||
}
|
||||
// create the advanced results query info DOM structure
|
||||
var forString = ' for ';
|
||||
var forElems;
|
||||
|
||||
if (this.queryInfo.searchParams.matchType) {
|
||||
forString = ' for matching ';
|
||||
forString = ' ' + this.text.matching + ' ';
|
||||
forElems = [
|
||||
{ tag: 'b', innerText: this.queryInfo.url },
|
||||
{ tag: 'textNode', value: ' by ' },
|
||||
{ tag: 'b', innerText: this.queryInfo.searchParams.matchType }
|
||||
{ tag: 'textNode', value: ' ' + this.text.by + ' ' },
|
||||
{ tag: 'b', innerText: this.text.types[this.queryInfo.searchParams.matchType] }
|
||||
];
|
||||
} else {
|
||||
forElems = [{ tag: 'b', innerText: this.queryInfo.url }];
|
||||
@ -463,13 +451,13 @@ RenderCalendar.prototype.createContainers = function() {
|
||||
},
|
||||
{
|
||||
tag: 'b',
|
||||
children: [
|
||||
{
|
||||
child: {
|
||||
tag: 'textNode',
|
||||
value: '',
|
||||
ref: function(refToElem) {
|
||||
renderCal.containers.countTextNode = refToElem;
|
||||
}
|
||||
}
|
||||
},
|
||||
{ tag: 'textNode', value: ' ' },
|
||||
{
|
||||
@ -478,8 +466,6 @@ RenderCalendar.prototype.createContainers = function() {
|
||||
ref: function(refToElem) {
|
||||
renderCal.containers.versionsTextNode = refToElem;
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{ tag: 'textNode', value: forString }
|
||||
].concat(forElems)
|
||||
@ -508,7 +494,7 @@ RenderCalendar.prototype.createContainers = function() {
|
||||
{
|
||||
tag: 'p',
|
||||
className: 'text-center mb-0 mt-1',
|
||||
innerText: 'Filtering by'
|
||||
innerText: filteringBy
|
||||
},
|
||||
{
|
||||
tag: 'ul',
|
||||
@ -614,13 +600,13 @@ RenderCalendar.prototype.renderAdvancedSearchPart = function(cdxObj) {
|
||||
if (cdxObj.mime) {
|
||||
displayedInfo.push({
|
||||
tag: 'small',
|
||||
innerText: 'Mime Type: ' + cdxObj.mime
|
||||
innerText: this.text.mimeType + cdxObj.mime
|
||||
});
|
||||
}
|
||||
if (cdxObj.status) {
|
||||
displayedInfo.push({
|
||||
tag: 'small',
|
||||
innerText: 'HTTP Status: ' + cdxObj.status
|
||||
innerText: this.text.httpStatus + cdxObj.status
|
||||
});
|
||||
}
|
||||
displayedInfo.push({
|
||||
@ -785,6 +771,11 @@ RenderCalendar.prototype.addRegYearMonthDayListItem = function(
|
||||
a[href="replay url"]
|
||||
span[id=count_ts].badge.badge-info.badge-pill.float-right
|
||||
*/
|
||||
const options = {
|
||||
dateStyle: 'long',
|
||||
timeStyle: 'medium',
|
||||
};
|
||||
var dateTimeString = this.tsToDate(cdxObj.timestamp, false, options);
|
||||
this.createAndAddElementTo(ymlDL, {
|
||||
tag: 'li',
|
||||
className: 'list-group-item',
|
||||
@ -795,17 +786,7 @@ RenderCalendar.prototype.addRegYearMonthDayListItem = function(
|
||||
href: this.prefix + cdxObj.timestamp + '/' + cdxObj.url,
|
||||
target: '_blank'
|
||||
},
|
||||
innerText:
|
||||
timeInfo.month +
|
||||
' ' +
|
||||
timeInfo.day +
|
||||
this.dateOrdinal(timeInfo.day) +
|
||||
', ' +
|
||||
timeInfo.year +
|
||||
' ' +
|
||||
' at ' +
|
||||
timeInfo.time +
|
||||
' '
|
||||
innerText: dateTimeString
|
||||
},
|
||||
{
|
||||
tag: 'span',
|
||||
@ -960,7 +941,7 @@ RenderCalendar.prototype.niceFilterDisplay = function() {
|
||||
filterList.push({
|
||||
tag: 'li',
|
||||
className: 'list-group-item',
|
||||
innerText: match[2] + ' ' + this.filterMods[match[1]] + ' ' + match[3]
|
||||
innerText: match[2] + ' ' + filterMods[match[1]] + ' "' + match[3] + '"'
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -975,11 +956,11 @@ RenderCalendar.prototype.niceDateRange = function() {
|
||||
var from = this.queryInfo.searchParams.from;
|
||||
var to = this.queryInfo.searchParams.to;
|
||||
if (from && to) {
|
||||
return 'From ' + from + ' to ' + to;
|
||||
return [text.from, from, text.until, to].join(' ');
|
||||
} else if (from) {
|
||||
return 'From ' + from + ' until ' + 'present';
|
||||
return [text.from, from, text.until, text.present].join(' ');
|
||||
}
|
||||
return 'From earliest until ' + to;
|
||||
return [text.from, text.earliest, text.until, to].join(' ');
|
||||
};
|
||||
|
||||
/**
|
||||
@ -1020,32 +1001,14 @@ RenderCalendar.prototype.displayYearMonthDaysListId = function(year, month) {
|
||||
return '_' + year + '-' + month + '-Display-Days-List';
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns a numbers ordinal string
|
||||
* @param {number} d - The number to receive the ordinal string for
|
||||
* @returns {string}
|
||||
*/
|
||||
RenderCalendar.prototype.dateOrdinal = function(d) {
|
||||
if (d > 3 && d < 21) return 'th';
|
||||
switch (d % 10) {
|
||||
case 1:
|
||||
return 'st';
|
||||
case 2:
|
||||
return 'nd';
|
||||
case 3:
|
||||
return 'rd';
|
||||
default:
|
||||
return 'th';
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Converts the supplied timestamp to either a local data string or a gmt string (if is_gmt is true)
|
||||
* @param {string} ts - The timestamp to be converted to a string
|
||||
* @param {boolean} [is_gmt] - Should the timestamp be converted to a gmt string
|
||||
* @param {Object} [options] - String formatting options
|
||||
* @returns {string}
|
||||
*/
|
||||
RenderCalendar.prototype.tsToDate = function(ts, is_gmt) {
|
||||
RenderCalendar.prototype.tsToDate = function(ts, is_gmt, options) {
|
||||
if (ts.length < 14) return ts;
|
||||
var datestr =
|
||||
ts.substring(0, 4) +
|
||||
@ -1062,7 +1025,7 @@ RenderCalendar.prototype.tsToDate = function(ts, is_gmt) {
|
||||
'-00:00';
|
||||
|
||||
var date = new Date(datestr);
|
||||
return is_gmt ? date.toGMTString() : date.toLocaleString();
|
||||
return is_gmt ? date.toUTCString() : date.toLocaleString(document.documentElement.lang, options);
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -1,14 +1,6 @@
|
||||
var dtRE = /^\d{4,14}$/;
|
||||
var didSetWasValidated = false;
|
||||
var showBadDateTimeClass = 'show-optional-bad-input';
|
||||
var filterMods = {
|
||||
'=': 'Contains',
|
||||
'==': 'Matches Exactly',
|
||||
'=~': 'Matches Regex',
|
||||
'=!': 'Does Not Contains',
|
||||
'=!=': 'Is Not',
|
||||
'=!~': 'Does Not Begins With'
|
||||
};
|
||||
|
||||
var elemIds = {
|
||||
filtering: {
|
||||
@ -22,16 +14,34 @@ var elemIds = {
|
||||
},
|
||||
dateTime: {
|
||||
from: 'dt-from',
|
||||
fromTime: 'ts-from',
|
||||
fromBad: 'dt-from-bad',
|
||||
to: 'dt-to',
|
||||
toTime: 'ts-to',
|
||||
toBad: 'dt-to-bad'
|
||||
},
|
||||
match: 'match-type-select',
|
||||
url: 'search-url',
|
||||
form: 'search-form',
|
||||
resultsNewWindow: 'open-results-new-window'
|
||||
resultsNewWindow: 'open-results-new-window',
|
||||
advancedOptions: 'advanced-options',
|
||||
resetSearchForm: 'reset-search-form',
|
||||
};
|
||||
|
||||
function resetSearchForm(event) {
|
||||
for (const field of [
|
||||
elemIds.url,
|
||||
elemIds.match,
|
||||
elemIds.dateTime.from,
|
||||
elemIds.dateTime.fromTime,
|
||||
elemIds.dateTime.to,
|
||||
elemIds.dateTime.toTime,
|
||||
]) {
|
||||
document.getElementById(field).value = '';
|
||||
}
|
||||
clearFilters(event);
|
||||
}
|
||||
|
||||
function makeCheckDateRangeChecker(dtInputId, dtBadNotice) {
|
||||
var dtInput = document.getElementById(dtInputId);
|
||||
dtInput.onblur = function() {
|
||||
@ -64,7 +74,7 @@ function makeCheckDateRangeChecker(dtInputId, dtBadNotice) {
|
||||
|
||||
function createAndAddNoFilter(filterList) {
|
||||
var nothing = document.createElement('li');
|
||||
nothing.innerText = 'No Filter';
|
||||
nothing.innerText = noFilter;
|
||||
nothing.id = elemIds.filtering.nothing;
|
||||
filterList.appendChild(nothing);
|
||||
}
|
||||
@ -77,19 +87,24 @@ function addFilter(event) {
|
||||
if (!expr) return;
|
||||
var filterExpr = 'filter' + modifier + by + ':' + expr;
|
||||
var filterList = document.getElementById(elemIds.filtering.list);
|
||||
var previousFilters = filterList.children;
|
||||
for (var i = 0; i < previousFilters.length; ++i) {
|
||||
var filterData = previousFilters[i].dataset;
|
||||
if (filterData && filterData.filter && filterData.filter == filterExpr) return;
|
||||
}
|
||||
var filterNothing = document.getElementById(elemIds.filtering.nothing);
|
||||
if (filterNothing) {
|
||||
filterList.removeChild(filterNothing);
|
||||
}
|
||||
var li = document.createElement('li');
|
||||
li.innerText =
|
||||
'By ' +
|
||||
by[0].toUpperCase() +
|
||||
by.substr(1) +
|
||||
' ' +
|
||||
filterMods[modifier] +
|
||||
' ' +
|
||||
expr;
|
||||
' "' +
|
||||
expr +
|
||||
'"';
|
||||
li.dataset.filter = filterExpr;
|
||||
var nukeButton = document.createElement('button');
|
||||
nukeButton.type = 'button';
|
||||
@ -109,6 +124,7 @@ function addFilter(event) {
|
||||
};
|
||||
li.appendChild(nukeButton);
|
||||
filterList.appendChild(li);
|
||||
return true;
|
||||
}
|
||||
|
||||
function clearFilters(event) {
|
||||
@ -122,7 +138,7 @@ function clearFilters(event) {
|
||||
}
|
||||
|
||||
function performQuery(url) {
|
||||
var query = [window.wb_prefix + '*?url=' + url];
|
||||
var query = [window.wb_prefix + '*?url=' + encodeURIComponent(url)];
|
||||
var filterExpressions = document.getElementById(elemIds.filtering.list)
|
||||
.children;
|
||||
if (filterExpressions.length) {
|
||||
@ -139,11 +155,13 @@ function performQuery(url) {
|
||||
}
|
||||
var fromT = document.getElementById(elemIds.dateTime.from).value;
|
||||
if (fromT) {
|
||||
query.push('from=' + fromT.trim());
|
||||
fromT += document.getElementById(elemIds.dateTime.fromTime).value;
|
||||
query.push('from=' + fromT.replace(/[^0-9]/g, ''));
|
||||
}
|
||||
var toT = document.getElementById(elemIds.dateTime.to).value;
|
||||
if (toT) {
|
||||
query.push('to=' + toT.trim());
|
||||
toT += document.getElementById(elemIds.dateTime.toTime).value;
|
||||
query.push('to=' + toT.replace(/[^0-9]/g, ''));
|
||||
}
|
||||
var builtQuery = query.join('&');
|
||||
if (document.getElementById(elemIds.resultsNewWindow).checked) {
|
||||
@ -158,6 +176,24 @@ function performQuery(url) {
|
||||
}
|
||||
}
|
||||
|
||||
function validateFields(form) {
|
||||
if (!didSetWasValidated) {
|
||||
form.classList.add('was-validated');
|
||||
didSetWasValidated = true;
|
||||
}
|
||||
}
|
||||
|
||||
function submitForm(event, form, searchURLInput) {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
var url = searchURLInput.value;
|
||||
if (!url) {
|
||||
validateFields(form);
|
||||
return;
|
||||
}
|
||||
performQuery(url);
|
||||
}
|
||||
|
||||
$(document).ready(function() {
|
||||
$('[data-toggle="tooltip"]').tooltip({
|
||||
container: 'body',
|
||||
@ -171,21 +207,21 @@ $(document).ready(function() {
|
||||
elemIds.dateTime.to,
|
||||
document.getElementById(elemIds.dateTime.toBad)
|
||||
);
|
||||
document.getElementById(elemIds.resetSearchForm).onclick = resetSearchForm;
|
||||
document.getElementById(elemIds.filtering.add).onclick = addFilter;
|
||||
document.getElementById(elemIds.filtering.clear).onclick = clearFilters;
|
||||
var searchURLInput = document.getElementById(elemIds.url);
|
||||
var form = document.getElementById(elemIds.form);
|
||||
form.addEventListener('submit', function(event) {
|
||||
submitForm(event, form, searchURLInput);
|
||||
});
|
||||
var filteringExpression = document.getElementById(elemIds.filtering.expression);
|
||||
filteringExpression.addEventListener("keypress", function(event) {
|
||||
if (event.key === "Enter") {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
var url = searchURLInput.value;
|
||||
if (!url) {
|
||||
if (!didSetWasValidated) {
|
||||
form.classList.add('was-validated');
|
||||
didSetWasValidated = true;
|
||||
if (! addFilter()) {
|
||||
submitForm(event, form, searchURLInput);
|
||||
}
|
||||
return;
|
||||
}
|
||||
performQuery(url);
|
||||
});
|
||||
});
|
||||
|
15043
pywb/static/vue/vueui.js
Normal file
15043
pywb/static/vue/vueui.js
Normal file
File diff suppressed because one or more lines are too long
4
pywb/static/vue_banner.css
Normal file
4
pywb/static/vue_banner.css
Normal file
@ -0,0 +1,4 @@
|
||||
#wb_iframe_div, #replay_iframe {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
}
|
@ -65,6 +65,8 @@ ContentFrame.prototype.init_iframe = function() {
|
||||
return;
|
||||
}
|
||||
|
||||
this.iframe.setAttribute("name", "___wb_replay_top_frame");
|
||||
|
||||
this.extract_prefix();
|
||||
if (window.WBBanner) {
|
||||
this.wbBanner = window.WBBanner;
|
||||
@ -229,7 +231,13 @@ ContentFrame.prototype.initBannerUpdateCheck = function(newUrl, newTs) {
|
||||
* operating in live mode
|
||||
*/
|
||||
ContentFrame.prototype.load_url = function(newUrl, newTs) {
|
||||
this.iframe.src = this.make_url(newUrl, newTs, true);
|
||||
var newUrl = this.make_url(newUrl, newTs, true);
|
||||
if (this.iframe.src === newUrl) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.iframe.src = newUrl;
|
||||
|
||||
if (this.wbBanner) {
|
||||
this.initBannerUpdateCheck(newUrl, newTs);
|
||||
}
|
||||
|
File diff suppressed because one or more lines are too long
@ -1,6 +1,6 @@
|
||||
/*
|
||||
Wombat.js client-side rewriting engine for web archive replay
|
||||
Copyright (C) 2014-2020 Webrecorder Software, Rhizome, and Contributors. Released under the GNU Affero General Public License.
|
||||
Copyright (C) 2014-2024 Webrecorder Software, Rhizome, and Contributors. Released under the GNU Affero General Public License.
|
||||
|
||||
This file is part of wombat.js, see https://github.com/webrecorder/wombat.js for the full source
|
||||
Wombat.js is part of the Webrecorder project (https://github.com/webrecorder)
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
Wombat.js client-side rewriting engine for web archive replay
|
||||
Copyright (C) 2014-2020 Webrecorder Software, Rhizome, and Contributors. Released under the GNU Affero General Public License.
|
||||
Copyright (C) 2014-2024 Webrecorder Software, Rhizome, and Contributors. Released under the GNU Affero General Public License.
|
||||
|
||||
This file is part of wombat.js, see https://github.com/webrecorder/wombat.js for the full source
|
||||
Wombat.js is part of the Webrecorder project (https://github.com/webrecorder)
|
||||
|
@ -1,4 +1,5 @@
|
||||
{% if not env.pywb_proxy_magic or config.proxy.enable_banner | default(true) %}
|
||||
{% autoescape false %}
|
||||
<script>
|
||||
window.banner_info = {
|
||||
is_gmt: true,
|
||||
@ -16,13 +17,16 @@ window.banner_info = {
|
||||
locales: {{ locales }},
|
||||
locale_prefixes: {{ get_locale_prefixes() | tojson }},
|
||||
prefix: "{{ wb_prefix }}",
|
||||
staticPrefix: "{{ static_prefix }}"
|
||||
staticPrefix: "{{ static_prefix }}",
|
||||
|
||||
logoImg: "{{ ui.logo }}"
|
||||
};
|
||||
</script>
|
||||
<script src="{{ static_prefix }}/loading-spinner/loading-spinner.js"></script>
|
||||
<script src="{{ static_prefix }}/vue/vueui.js"></script>
|
||||
<link rel="stylesheet" href='{{ static_prefix }}/vue_banner.css'/>
|
||||
|
||||
<!-- default banner, create through js -->
|
||||
<script src='{{ static_prefix }}/default_banner.js'> </script>
|
||||
<link rel='stylesheet' href='{{ static_prefix }}/default_banner.css'/>
|
||||
|
||||
{% include 'bootstrap_jquery.html' ignore missing %}
|
||||
|
||||
{% endautoescape %}
|
||||
{% endif %}
|
||||
|
@ -1,17 +1,12 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="{{ env.pywb_lang | default('en') }}">
|
||||
<html lang="{{ env.pywb_lang | default(default_locale) }}">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=UTF-8;charset=utf-8"/>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
|
||||
<title>{% block title %}{% endblock %}</title>
|
||||
|
||||
<!-- jquery and bootstrap dependencies query view -->
|
||||
<link rel="stylesheet" href="{{ static_prefix }}/css/bootstrap.min.css"/>
|
||||
<link rel="stylesheet" href="{{ static_prefix }}/css/font-awesome.min.css">
|
||||
|
||||
<script src="{{ static_prefix }}/js/jquery-latest.min.js"></script>
|
||||
<script src="{{ static_prefix }}/js/bootstrap.min.js"></script>
|
||||
{% include 'bootstrap_jquery.html' ignore missing %}
|
||||
|
||||
{% block head %}
|
||||
{% include 'head.html' ignore missing %}
|
||||
|
6
pywb/templates/bootstrap_jquery.html
Normal file
6
pywb/templates/bootstrap_jquery.html
Normal file
@ -0,0 +1,6 @@
|
||||
<link rel="stylesheet" href="{{ static_prefix }}/css/bootstrap.min.css"/>
|
||||
<link rel="stylesheet" href="{{ static_prefix }}/css/font-awesome.min.css">
|
||||
<link rel="stylesheet" href="{{ static_prefix }}/css/base.css">
|
||||
|
||||
<script src="{{ static_prefix }}/js/jquery-latest.min.js"></script>
|
||||
<script src="{{ static_prefix }}/js/bootstrap.min.js"></script>
|
1
pywb/templates/custom_banner.html
Normal file
1
pywb/templates/custom_banner.html
Normal file
@ -0,0 +1 @@
|
||||
<!-- Add custom banner here. Used only in non-framed replay. -->
|
@ -1,29 +1,29 @@
|
||||
{% extends "base.html" %}
|
||||
{% block title %}Pywb Error{% endblock %}
|
||||
{% block title %}{{ _('Pywb Error') }}{% endblock %}
|
||||
{% block body %}
|
||||
<div class="container text-danger">
|
||||
<div class="container text-danger error">
|
||||
<div class="row justify-content-center">
|
||||
<h2 class="display-2">Pywb Error</h2>
|
||||
<h2 class="display-2">{{ _('Pywb Error') }}</h2>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div class="col-12 text-center">
|
||||
{% if err_status == 451 %}
|
||||
<p class="lead">Access Blocked to {{ err_msg }}</p>
|
||||
<p class="lead">{% trans %}Access Blocked to {{ err_msg }}{% endtrans %}</p>
|
||||
|
||||
{% elif err_status == 404 and err_details == 'coll_not_found' %}
|
||||
<p>Collection not found: <b>{{ err_msg }}</b></p>
|
||||
<p>{% trans %}Collection not found: <b>{{ err_msg }}{% endtrans %}</b></p>
|
||||
|
||||
<p><a href="/">See list of valid collections</a></p>
|
||||
<p><a href="/{{ env.pywb_lang | default('') }}">{{ _('See list of valid collections') }}</a></p>
|
||||
|
||||
{% elif err_status == 404 and err_details == 'static_file_not_found' %}
|
||||
<p>Static file not found: <b>{{ err_msg }}</b></p>
|
||||
<p>{% trans %}Static file not found: <b>{{ err_msg }}{% endtrans %}</b></p>
|
||||
|
||||
{% else %}
|
||||
|
||||
<p class="lead">{{ err_msg }}</p>
|
||||
|
||||
{% if err_details %}
|
||||
<p class="lead">Error Details:</p>
|
||||
<p class="lead">{% trans %}Error Details:{% endtrans %}</p>
|
||||
<pre>{{ err_details }}</pre>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
2
pywb/templates/footer.html
Normal file
2
pywb/templates/footer.html
Normal file
@ -0,0 +1,2 @@
|
||||
{# place content to be added at the very end of the <body> tag in this file below #}
|
||||
|
@ -14,11 +14,34 @@ html, body
|
||||
</style>
|
||||
<script src='{{ static_prefix }}/wb_frame.js'> </script>
|
||||
|
||||
{% autoescape false %}
|
||||
|
||||
{{ banner_html }}
|
||||
|
||||
{% include 'vue_loc.html' %}
|
||||
|
||||
</head>
|
||||
<body style="margin: 0px; padding: 0px;">
|
||||
|
||||
<div id="app" style="width: 100%; height: 200px"></div>
|
||||
<script>
|
||||
VueUI.main({
|
||||
staticPrefix: "{{ static_prefix }}",
|
||||
url: "{{ url }}",
|
||||
prefix: "{{ wb_prefix }}",
|
||||
timestamp: "{{ timestamp }}",
|
||||
logoUrl: "{{ ui.logo }}",
|
||||
navbarBackground: "{{ ui.navbar_background_hex | default('f8f9fa') }}",
|
||||
navbarColor: "{{ ui.navbar_color_hex | default('212529') }}",
|
||||
navbarLightButtons: "{{ ui.navbar_light_buttons }}",
|
||||
logoHomeUrl: "{{ ui.logo_home_url }}",
|
||||
disablePrinting: "{{ ui.disable_printing }}",
|
||||
allLocales: allLocales
|
||||
},
|
||||
"{{ env.pywb_lang | default('en') }}",
|
||||
i18nStrings);
|
||||
</script>
|
||||
|
||||
<div id="wb_iframe_div">
|
||||
<iframe id="replay_iframe" frameborder="0" seamless="seamless" scrolling="yes" class="wb_iframe" allow="autoplay; fullscreen"></iframe>
|
||||
</div>
|
||||
@ -31,3 +54,5 @@ html, body
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
{% endautoescape %}
|
||||
|
||||
|
1
pywb/templates/head.html
Normal file
1
pywb/templates/head.html
Normal file
@ -0,0 +1 @@
|
||||
{# place optional content to be injected into the <head> of every page in this file below #}
|
@ -1,3 +1,5 @@
|
||||
{% autoescape false %}
|
||||
|
||||
<!-- WB Insert -->
|
||||
<script>
|
||||
{% set urlsplit = cdx.url | urlsplit %}
|
||||
@ -25,6 +27,7 @@
|
||||
wbinfo.proxy_magic = "{{ env.pywb_proxy_magic }}";
|
||||
wbinfo.static_prefix = "{{ static_prefix }}/";
|
||||
wbinfo.enable_auto_fetch = {{ config.enable_auto_fetch | tobool }};
|
||||
wbinfo.target_frame = "___wb_replay_top_frame";
|
||||
</script>
|
||||
{% if env.pywb_proxy_magic %}
|
||||
{% set whichWombat = 'wombatProxyMode.js' %}
|
||||
@ -59,7 +62,13 @@
|
||||
|
||||
{% endif %}
|
||||
|
||||
{{ banner_html }}
|
||||
{% if not is_framed %}
|
||||
|
||||
{{ custom_banner_html }}
|
||||
|
||||
{% endif %}
|
||||
|
||||
{% endautoescape %}
|
||||
|
||||
<!-- End WB Insert -->
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user