mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 08:04:49 +01:00
Compare commits
53 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
7b0f8b5860 | ||
|
b44c93bf6e | ||
|
97fffe3a34 | ||
|
6205646b9b | ||
|
23891be2f1 | ||
|
b190dddee9 | ||
|
b9f1609df9 | ||
|
e89924bd39 | ||
|
b4c91c6633 | ||
|
1e2665af13 | ||
|
fee14d7fe8 | ||
|
5712945991 | ||
|
2fd6190b72 | ||
|
791a8d1033 | ||
|
86ee3bd752 | ||
|
d1e1636ae3 | ||
|
b4955cca66 | ||
|
f40e7ef18c | ||
|
6b4f9b323e | ||
|
7879dd0222 | ||
|
013746c10a | ||
|
79140441df | ||
|
af92a9726e | ||
|
83b2113be2 | ||
|
ed36830dc5 | ||
|
81b6a57dfb | ||
|
5c427b9ff2 | ||
|
454486bf75 | ||
|
b8693307d1 | ||
|
98be48d6e4 | ||
|
c441d83435 | ||
|
4a3e7ddff7 | ||
|
02288db81c | ||
|
4fc2b451d7 | ||
|
c8e78fd7c1 | ||
|
d44d640b93 | ||
|
03f9708d8d | ||
|
406fad95c2 | ||
|
d207c76bae | ||
|
131732d238 | ||
|
59d9beac05 | ||
|
0758e81b62 | ||
|
d392a8d908 | ||
|
9bc8a2e1ef | ||
|
43e5c8bac0 | ||
|
cdab280669 | ||
|
e6ec8b4aeb | ||
|
1790fd006a | ||
|
3d0673e32a | ||
|
3050fd2b2b | ||
|
3c94da04a2 | ||
|
2d19b6b18d | ||
|
6cc9cdc3ad |
2
.github/workflows/ci.yaml
vendored
2
.github/workflows/ci.yaml
vendored
@ -8,7 +8,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
max-parallel: 3
|
max-parallel: 3
|
||||||
matrix:
|
matrix:
|
||||||
python-version: [3.6, 3.7, 3.8, 3.9]
|
python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: checkout
|
- name: checkout
|
||||||
|
4
.gitignore
vendored
4
.gitignore
vendored
@ -53,3 +53,7 @@ git_hash.py
|
|||||||
|
|
||||||
# Sphinx documentation
|
# Sphinx documentation
|
||||||
docs/_build/*
|
docs/_build/*
|
||||||
|
|
||||||
|
# virtualenvs
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
44
CHANGES.rst
44
CHANGES.rst
@ -1,3 +1,41 @@
|
|||||||
|
pywb 2.7.3 changelist
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
* issue_792 catch warcio exception by @oskarhek in https://github.com/webrecorder/pywb/pull/793
|
||||||
|
* Add ui.logo_home_url as config.yaml option by @tw4l in https://github.com/webrecorder/pywb/pull/791
|
||||||
|
* [#795] Show error when adding duplicate warc file by @kuechensofa in https://github.com/webrecorder/pywb/pull/797
|
||||||
|
* Make search page more intuitive by @krakan in https://github.com/webrecorder/pywb/pull/794
|
||||||
|
* Modify search template buttons by @tw4l in https://github.com/webrecorder/pywb/pull/801
|
||||||
|
* [#804] Use default_locale when lang not set in the request by @krakan in https://github.com/webrecorder/pywb/pull/805
|
||||||
|
* feat: regex substitution on surt rules match by @mijho in https://github.com/webrecorder/pywb/pull/780
|
||||||
|
* Bump minimatch from 3.0.4 to 3.1.2 in /pywb/vueui by @dependabot in https://github.com/webrecorder/pywb/pull/777
|
||||||
|
* Bump decode-uri-component from 0.2.0 to 0.2.2 in /pywb/vueui by @dependabot in https://github.com/webrecorder/pywb/pull/786
|
||||||
|
* rules: add 'debugNoBatch' rewrite for fb and insta by @ikreymer in https://github.com/webrecorder/pywb/pull/806
|
||||||
|
* Vue main order by @tw4l in https://github.com/webrecorder/pywb/pull/809
|
||||||
|
* wombat: bump to 3.4.4 https://github.com/webrecorder/pywb/pull/808
|
||||||
|
|
||||||
|
pywb 2.7.2 changelist
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
* Fix regression introduced by improper wombat update in 2.7.1
|
||||||
|
* Fix `redirect_to_exact: false` functionality: if not set, UI will stay on current timestamp, but will display info on actual capture.
|
||||||
|
* Location bar nav now keeps current timestamp instead of defaulting to calendar view.
|
||||||
|
* 'Live' mode fixes, no longer cache live cdx entry, don't add timestamp when navigating in live mode without timestamp
|
||||||
|
* Calendar dropdown on replay now scrollable.
|
||||||
|
* Timeline toggle on replay is 'sticky', will stay on if toggled on replay.
|
||||||
|
* Capture text: use '|' as in 'Current Capture: [title] | [capture date]'
|
||||||
|
* Document title: Add 'Archived Page: ' prefix to avoid confusion with live pages.
|
||||||
|
|
||||||
|
pywb 2.7.1 changelist
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
* Add locale-dependent handling of first day of week by @krakan in https://github.com/webrecorder/pywb/pull/781
|
||||||
|
* Make filter expressions translatable by @krakan in https://github.com/webrecorder/pywb/pull/783
|
||||||
|
* Add title to top frame in framed replay
|
||||||
|
* Add missing tooltip translation strings
|
||||||
|
* Fix calendar and timeline rendering for replay URLs without a timestamp
|
||||||
|
* Update template documentation
|
||||||
|
|
||||||
pywb 2.7.0 changelist
|
pywb 2.7.0 changelist
|
||||||
~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
@ -1143,7 +1181,7 @@ pywb 0.9.6 changelist
|
|||||||
pywb 0.9.5 changelist
|
pywb 0.9.5 changelist
|
||||||
~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
* s3 loading: support ``s3://`` scheme in block loader, allowing for loading index and archive files from s3. ``boto`` library must be installed seperately
|
* s3 loading: support ``s3://`` scheme in block loader, allowing for loading index and archive files from s3. ``boto`` library must be installed separately
|
||||||
via ``pip install boto``. Attempt default boto auth path, and if that fails, attempt anonymous s3 connection.
|
via ``pip install boto``. Attempt default boto auth path, and if that fails, attempt anonymous s3 connection.
|
||||||
|
|
||||||
* Wombat/Client-Side Rewrite Customizations: New ``rewrite_opts.client`` settings from ``config.yaml`` are passed directly to wombat as json.
|
* Wombat/Client-Side Rewrite Customizations: New ``rewrite_opts.client`` settings from ``config.yaml`` are passed directly to wombat as json.
|
||||||
@ -1239,7 +1277,7 @@ pywb 0.9.1 changelist
|
|||||||
|
|
||||||
* cdx server query: add support for ``url=*.host`` and ``url=host/*`` as shortcuts for ``matchType=domain`` and ``matchType=prefix``
|
* cdx server query: add support for ``url=*.host`` and ``url=host/*`` as shortcuts for ``matchType=domain`` and ``matchType=prefix``
|
||||||
|
|
||||||
* zipnum cdx cluster: support loading index shared from prefix path instead of seperate location file.
|
* zipnum cdx cluster: support loading index shared from prefix path instead of separate location file.
|
||||||
|
|
||||||
The ``shard_index_loc`` config property may contain match and replace properties.
|
The ``shard_index_loc`` config property may contain match and replace properties.
|
||||||
Regex replacement is then used to obtain path prefix from the shard prefix path.
|
Regex replacement is then used to obtain path prefix from the shard prefix path.
|
||||||
@ -1605,7 +1643,7 @@ pywb 0.4.7 changelist
|
|||||||
|
|
||||||
* Rewrite: Parsing of html as raw bytes instead of decode/encode, detection still needed for non-ascii compatible encoding.
|
* Rewrite: Parsing of html as raw bytes instead of decode/encode, detection still needed for non-ascii compatible encoding.
|
||||||
|
|
||||||
* Indexing: Refactoring of cdx-indexer using a seperate 'archive record iterator' and pluggable cdx writer classes. Groundwork for creating custom indexers.
|
* Indexing: Refactoring of cdx-indexer using a separate 'archive record iterator' and pluggable cdx writer classes. Groundwork for creating custom indexers.
|
||||||
|
|
||||||
* Indexing: Support for 9 field cdx formats with -9 flag.
|
* Indexing: Support for 9 field cdx formats with -9 flag.
|
||||||
|
|
||||||
|
@ -51,13 +51,11 @@ The first time you run this command, it make take some time to build.
|
|||||||
Changes to the [Vue](https://vuejs.org/) frontend components require rebuilding the Vue bundle (`pywb/static/vue/vueui.js`) to take effect. After making changes to one or more Vue components, you can rebuild the static bundle and view the changes in your development environment like so:
|
Changes to the [Vue](https://vuejs.org/) frontend components require rebuilding the Vue bundle (`pywb/static/vue/vueui.js`) to take effect. After making changes to one or more Vue components, you can rebuild the static bundle and view the changes in your development environment like so:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cd pywb/vueui
|
./build-vue-ui.sh
|
||||||
yarn run build
|
docker compose up -d --build --force-recreate
|
||||||
cd ../..
|
|
||||||
docker compose up -d --force-recreate
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Changes that modify pywb's Python dependencies or the operating system may require rebuilding the container:
|
Changes that modify pywb's Python dependencies or the operating system also require rebuilding the container:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker compose up -d --build --force-recreate
|
docker compose up -d --build --force-recreate
|
||||||
|
12
README.rst
12
README.rst
@ -1,4 +1,4 @@
|
|||||||
Webrecorder pywb 2.7
|
Webrecorder pywb 2.8
|
||||||
====================
|
====================
|
||||||
|
|
||||||
.. image:: https://raw.githubusercontent.com/webrecorder/pywb/main/pywb/static/pywb-logo.png
|
.. image:: https://raw.githubusercontent.com/webrecorder/pywb/main/pywb/static/pywb-logo.png
|
||||||
@ -13,7 +13,7 @@ Web Archiving Tools for All
|
|||||||
|
|
||||||
`View the full pywb documentation <https://pywb.readthedocs.org>`_
|
`View the full pywb documentation <https://pywb.readthedocs.org>`_
|
||||||
|
|
||||||
**pywb** is a Python (2 and 3) web archiving toolkit for replaying web archives large and small as accurately as possible.
|
**pywb** is a Python 3 web archiving toolkit for replaying web archives large and small as accurately as possible.
|
||||||
The toolkit now also includes new features for creating high-fidelity web archives.
|
The toolkit now also includes new features for creating high-fidelity web archives.
|
||||||
|
|
||||||
This toolset forms the foundation of Webrecorder project, but also provides a generic web archiving toolkit
|
This toolset forms the foundation of Webrecorder project, but also provides a generic web archiving toolkit
|
||||||
@ -60,9 +60,7 @@ Installation for Deployment
|
|||||||
|
|
||||||
To install pywb for usage, you can use:
|
To install pywb for usage, you can use:
|
||||||
|
|
||||||
```shell
|
``pip install pywb``
|
||||||
pip install pywb
|
|
||||||
```
|
|
||||||
|
|
||||||
Note: depending on your Python installation, you may have to use `pip3` instead of `pip`.
|
Note: depending on your Python installation, you may have to use `pip3` instead of `pip`.
|
||||||
|
|
||||||
@ -70,9 +68,7 @@ Note: depending on your Python installation, you may have to use `pip3` instead
|
|||||||
Installation from local copy
|
Installation from local copy
|
||||||
----------------------------
|
----------------------------
|
||||||
|
|
||||||
```shell
|
``git clone https://github.com/webrecorder/pywb``
|
||||||
git clone https://github.com/webrecorder/pywb
|
|
||||||
```
|
|
||||||
|
|
||||||
To install from a locally cloned copy, install with ``pip install -e .`` or ``python setup.py install``.
|
To install from a locally cloned copy, install with ``pip install -e .`` or ``python setup.py install``.
|
||||||
|
|
||||||
|
7
build-vue-ui.sh
Executable file
7
build-vue-ui.sh
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
CURR_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
|
||||||
|
|
||||||
|
cd $CURR_DIR/pywb/vueui/
|
||||||
|
yarn install
|
||||||
|
yarn run build
|
@ -6,9 +6,11 @@ debug: true
|
|||||||
# Uncomment to set banner colors and logo
|
# Uncomment to set banner colors and logo
|
||||||
# ui:
|
# ui:
|
||||||
# logo: path/relative/from/static/logo.png
|
# logo: path/relative/from/static/logo.png
|
||||||
|
# logo_home_url: https://example.com
|
||||||
# navbar_background_hex: 0c49b0
|
# navbar_background_hex: 0c49b0
|
||||||
# navbar_color_hex: fff
|
# navbar_color_hex: fff
|
||||||
# navbar_light_buttons: true
|
# navbar_light_buttons: true
|
||||||
|
# disable_printing: true
|
||||||
|
|
||||||
collections:
|
collections:
|
||||||
all: $all
|
all: $all
|
||||||
@ -16,9 +18,6 @@ collections:
|
|||||||
index_paths: ./sample_archive/cdx/
|
index_paths: ./sample_archive/cdx/
|
||||||
archive_paths: ./sample_archive/warcs/
|
archive_paths: ./sample_archive/warcs/
|
||||||
|
|
||||||
ukwa: cdx+https://www.webarchive.org.uk/wayback/archive/cdx
|
|
||||||
is: cdx+http://beta.vefsafn.is/is/cdx
|
|
||||||
|
|
||||||
# Settings for each collection
|
# Settings for each collection
|
||||||
use_js_obj_proxy: true
|
use_js_obj_proxy: true
|
||||||
|
|
||||||
|
@ -105,6 +105,12 @@ Given these rules, a user would:
|
|||||||
* but would receive an 'access blocked' error message when viewing ``http://httpbin.org/`` (block)
|
* but would receive an 'access blocked' error message when viewing ``http://httpbin.org/`` (block)
|
||||||
* would receive a 404 not found error when viewing ``http://httpbin.org/anything`` (exclude)
|
* would receive a 404 not found error when viewing ``http://httpbin.org/anything`` (exclude)
|
||||||
|
|
||||||
|
To match any possible URL in an .aclj file, set ``*,`` as the leading SURT, for example::
|
||||||
|
|
||||||
|
*, - {"access": "allow"}
|
||||||
|
|
||||||
|
Lines starting with ``*,`` should generally be at the end of the file, respecting the reverse alphabetical order.
|
||||||
|
|
||||||
|
|
||||||
Access Types: allow, block, exclude, allow_ignore_embargo
|
Access Types: allow, block, exclude, allow_ignore_embargo
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
@ -149,6 +155,10 @@ To make this work, pywb must be running behind an Apache or Nginx system that is
|
|||||||
|
|
||||||
For example, this header may be set based on IP range, or based on password authentication.
|
For example, this header may be set based on IP range, or based on password authentication.
|
||||||
|
|
||||||
|
To allow a user access to all URLs, overriding more specific rules and the ``default_access`` configuration setting, use the ``*,`` SURT::
|
||||||
|
|
||||||
|
*, - {"access": "allow", "user": "staff"}
|
||||||
|
|
||||||
Further examples of how to set this header will be provided in the deployments section.
|
Further examples of how to set this header will be provided in the deployments section.
|
||||||
|
|
||||||
**Note: Do not use the user-based rules without configuring proper authentication on an Apache or Nginx frontend to set or remove this header, otherwise the 'X-Pywb-ACL-User' can easily be faked.**
|
**Note: Do not use the user-based rules without configuring proper authentication on an Apache or Nginx frontend to set or remove this header, otherwise the 'X-Pywb-ACL-User' can easily be faked.**
|
||||||
|
@ -46,6 +46,7 @@ It can be used to:
|
|||||||
|
|
||||||
* Create a new collection -- ``wb-manager init <coll>``
|
* Create a new collection -- ``wb-manager init <coll>``
|
||||||
* Add WARCs to collection -- ``wb-manager add <coll> <warc>``
|
* Add WARCs to collection -- ``wb-manager add <coll> <warc>``
|
||||||
|
* Unpack WACZs to add their WARCs and indices to collection -- ``wb-manager add --unpack-wacz <coll> <wacz>``
|
||||||
* Add override templates
|
* Add override templates
|
||||||
* Add and remove metadata to a collections ``metadata.yaml``
|
* Add and remove metadata to a collections ``metadata.yaml``
|
||||||
* List all collections
|
* List all collections
|
||||||
|
@ -48,8 +48,7 @@ Base Templates (and supporting templates)
|
|||||||
|
|
||||||
File: ``base.html``
|
File: ``base.html``
|
||||||
|
|
||||||
This template includes the HTML added to all other pages, replay and non-replay. Shared JS and CSS includes can be added here.
|
This template includes the HTML added to all pages other than framed replay. Shared JS and CSS includes meant for pages other than framed replay can be added here.
|
||||||
For theming all pywb UI, it may be useful to modify this template.
|
|
||||||
|
|
||||||
To customize the default pywb UI across multiple pages, the following additional templates
|
To customize the default pywb UI across multiple pages, the following additional templates
|
||||||
can also be overriden:
|
can also be overriden:
|
||||||
@ -61,7 +60,7 @@ can also be overriden:
|
|||||||
* ``footer.html`` -- Template for adding content as the "footer" of the ``<body>`` tag of the ``base`` template
|
* ``footer.html`` -- Template for adding content as the "footer" of the ``<body>`` tag of the ``base`` template
|
||||||
|
|
||||||
|
|
||||||
Note: The default pywb ``head.html`` and ``footer.html`` are currently blank. They can be populated to customize the rendering, add analytics, etc... as needed.
|
Note: The default pywb ``head.html`` and ``footer.html`` are currently blank. They can be populated to customize the rendering, add analytics, etc... as needed. Content such as styles or JS code (for example for analytics) must be added to the ``frame_insert.html`` template as well (details on that template below) to also be included in framed replay.
|
||||||
|
|
||||||
|
|
||||||
The ``base.html`` template also provides five blocks that can be supplied by templates that extend it.
|
The ``base.html`` template also provides five blocks that can be supplied by templates that extend it.
|
||||||
@ -172,9 +171,7 @@ Banner Template
|
|||||||
|
|
||||||
File: ``banner.html``
|
File: ``banner.html``
|
||||||
|
|
||||||
This template is used to render the banner and is used both in framed replay and frameless replay.
|
This template is used to render the banner for framed replay. It is rendered only rendered in the top/outer frame.
|
||||||
|
|
||||||
In framed replay, the template is only rendered in the top/outer frame, while in frameless replay, it is added to every page.
|
|
||||||
|
|
||||||
Template variables:
|
Template variables:
|
||||||
|
|
||||||
@ -192,7 +189,17 @@ Template variables:
|
|||||||
|
|
||||||
* ``{{ ui }}`` - an optional ``ui`` dictionary from ``config.yaml``, if any.
|
* ``{{ ui }}`` - an optional ``ui`` dictionary from ``config.yaml``, if any.
|
||||||
|
|
||||||
The default banner creates the UI dynamically in JavaScript using Vue.
|
The default banner creates the UI dynamically in JavaScript using Vue in the ``frame_insert.html`` template.
|
||||||
|
|
||||||
|
|
||||||
|
Custom Banner Template
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
File: ``custom_banner.html``
|
||||||
|
|
||||||
|
This template can be used to render a custom banner for frameless replay. It is blank by default.
|
||||||
|
|
||||||
|
In frameless replay, the content of this template is injected into the ``head_insert.html`` template to render the banner.
|
||||||
|
|
||||||
|
|
||||||
Head Insert Template
|
Head Insert Template
|
||||||
@ -204,7 +211,7 @@ This template represents the HTML injected into every replay page to add support
|
|||||||
|
|
||||||
This template is part of the core pywb replay, and modifying this template is not recommended.
|
This template is part of the core pywb replay, and modifying this template is not recommended.
|
||||||
|
|
||||||
For customizing the banner, modify the ``banner.html`` template instead.
|
For customizing the banner, modify the ``banner.html`` (framed replay) or ``custom_banner.html`` (frameless replay) template instead.
|
||||||
|
|
||||||
|
|
||||||
Top Frame Template
|
Top Frame Template
|
||||||
@ -221,16 +228,21 @@ This template is responsible for creating the iframe that will render the conten
|
|||||||
This template only renders the banner and is designed *not* to set the encoding to allow the browser to 'detect' the encoding for the containing iframe.
|
This template only renders the banner and is designed *not* to set the encoding to allow the browser to 'detect' the encoding for the containing iframe.
|
||||||
For this reason, the template should only contain ASCII text, and %-encode any non-ASCII characters.
|
For this reason, the template should only contain ASCII text, and %-encode any non-ASCII characters.
|
||||||
|
|
||||||
|
Content such as analytics code that is desired in the top frame of framed replay pages should be added to this template.
|
||||||
|
|
||||||
Template variables:
|
Template variables:
|
||||||
|
|
||||||
* ``{{ url }}`` - the URL being replayed.
|
* ``{{ url }}`` - the URL being replayed.
|
||||||
|
|
||||||
|
* ``{{ timestamp }}`` - the timestamp being replayed, e.g. ``20211226`` in ``http://localhost:8080/pywb/20211226/mp_/https://example.com/``
|
||||||
|
|
||||||
* ``{{ wb_url }}`` - A complete ``WbUrl`` object, which contains the ``url``, ``timestamp`` and ``mod`` properties, representing the replay url.
|
* ``{{ wb_url }}`` - A complete ``WbUrl`` object, which contains the ``url``, ``timestamp`` and ``mod`` properties, representing the replay url.
|
||||||
|
|
||||||
* ``{{ wb_prefix }}`` - the collection prefix, e.g. ``http://localhost:8080/pywb/``
|
* ``{{ wb_prefix }}`` - the collection prefix, e.g. ``http://localhost:8080/pywb/``
|
||||||
|
|
||||||
* ``{{ is_proxy }}`` - set to true if page is being loaded via an HTTP/S proxy (checks if WSGI env has ``wsgiprox.proxy_host`` set)
|
* ``{{ is_proxy }}`` - set to true if page is being loaded via an HTTP/S proxy (checks if WSGI env has ``wsgiprox.proxy_host`` set)
|
||||||
|
|
||||||
|
* ``{{ ui }}`` - an optional ``ui`` dictionary from ``config.yaml``, if any.
|
||||||
|
|
||||||
|
|
||||||
.. _custom-top-frame:
|
.. _custom-top-frame:
|
||||||
|
@ -95,8 +95,8 @@ add the WARC to a new collection and start pywb:
|
|||||||
|
|
||||||
docker pull webrecorder/pywb
|
docker pull webrecorder/pywb
|
||||||
docker run -e INIT_COLLECTION=my-web-archive -v /pywb-data:/webarchive \
|
docker run -e INIT_COLLECTION=my-web-archive -v /pywb-data:/webarchive \
|
||||||
-v /path/to:/source webrecorder/pywb wb-manager add default /path/to/my_warc.warc.gz
|
-v /path/to:/source webrecorder/pywb wb-manager add my-web-archive /source/my_warc.warc.gz
|
||||||
docker run -p 8080:8080 -v /pywb-data/:/webarchive wayback
|
docker run -p 8080:8080 -v /pywb-data/:/webarchive webrecorder/pywb wayback
|
||||||
|
|
||||||
This example is equivalent to the non-Docker example above.
|
This example is equivalent to the non-Docker example above.
|
||||||
|
|
||||||
@ -114,6 +114,8 @@ Using Existing Web Archive Collections
|
|||||||
Existing archives of WARCs/ARCs files can be used with pywb with minimal amount of setup. By using ``wb-manager add``,
|
Existing archives of WARCs/ARCs files can be used with pywb with minimal amount of setup. By using ``wb-manager add``,
|
||||||
WARC/ARC files will automatically be placed in the collection archive directory and indexed.
|
WARC/ARC files will automatically be placed in the collection archive directory and indexed.
|
||||||
|
|
||||||
|
In pywb 2.8.0 and later, preliminary support for WACZ files is also added with ``wb-manager add --unpack-wacz``. This will unpack the provided WACZ file, adding its WARCs and indices to the collection.
|
||||||
|
|
||||||
By default ``wb-manager``, places new collections in ``collections/<coll name>`` subdirectory in the current working directory. To specify a different root directory, the ``wb-manager -d <dir>``. Other options can be set in the config file.
|
By default ``wb-manager``, places new collections in ``collections/<coll name>`` subdirectory in the current working directory. To specify a different root directory, the ``wb-manager -d <dir>``. Other options can be set in the config file.
|
||||||
|
|
||||||
If you have a large number of existing CDX index files, pywb will be able to read them as well after running through a simple conversion process.
|
If you have a large number of existing CDX index files, pywb will be able to read them as well after running through a simple conversion process.
|
||||||
@ -154,20 +156,20 @@ To enable auto-indexing, run with ``wayback -a`` or ``wayback -a --auto-interval
|
|||||||
Creating a Web Archive
|
Creating a Web Archive
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
Using Webrecorder
|
Using ArchiveWeb.page
|
||||||
^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
If you do not have a web archive to test, one easy way to create one is to use `Webrecorder <https://webrecorder.io>`_
|
If you do not have a web archive to test, one easy way to create one is to use the `ArchiveWeb.page <https://archiveweb.page>`_ browser extension for Chrome and other Chromium-based browsers such as Brave Browser. ArchiveWeb.page records pages visited during an archiving session in the browser, and provides means of both replaying and downloading the archived items created.
|
||||||
|
|
||||||
After recording, you can click **Stop** and then click `Download Collection` to receive a WARC (`.warc.gz`) file.
|
Follow the instructions in `How To Create Web Archives with ArchiveWeb.page <https://archiveweb.page/en/usage/>`_. After recording, press **Stop** and then `download your collection <https://archiveweb.page/en/download/>`_ to receive a WARC (`.warc.gz`) file. If you choose to download your collection in the WACZ format, the WARC files can be found inside the zipped WACZ in the ``archive/`` directory.
|
||||||
|
|
||||||
You can then use this with work with pywb.
|
You can then use your WARCs to work with pywb.
|
||||||
|
|
||||||
|
|
||||||
Using pywb Recorder
|
Using pywb Recorder
|
||||||
^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
The core recording functionality in Webrecorder is also part of :mod:`pywb`. If you want to create a WARC locally, this can be
|
Recording functionality is also part of :mod:`pywb`. If you want to create a WARC locally, this can be
|
||||||
done by directly recording into your pywb collection:
|
done by directly recording into your pywb collection:
|
||||||
|
|
||||||
1. Create a collection: ``wb-manager init my-web-archive`` (if you haven't already created a web archive collection)
|
1. Create a collection: ``wb-manager init my-web-archive`` (if you haven't already created a web archive collection)
|
||||||
@ -180,6 +182,14 @@ In this configuration, the indexing happens every 10 seconds.. After 10 seconds,
|
|||||||
``http://localhost:8080/my-web-archive/http://example.com/``
|
``http://localhost:8080/my-web-archive/http://example.com/``
|
||||||
|
|
||||||
|
|
||||||
|
Using Browsertrix
|
||||||
|
^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
For a more automated browser-based web archiving experience, `Browsertrix <https://browsertrix.com/>`_ provides a web interface for configuring, scheduling, running, reviewing, and curating crawls of web content. Crawl activity is shown in a live screencast of the browsers used for crawling and all web archives created in Browsertrix can be easily downloaded from the application in the WACZ format.
|
||||||
|
|
||||||
|
`Browsertrix Crawler <https://crawler.docs.browsertrix.com/>`_, which provides the underlying crawling functionality of Browsertrix, can also be run standalone in a Docker container on your local computer.
|
||||||
|
|
||||||
|
|
||||||
HTTP/S Proxy Mode Access
|
HTTP/S Proxy Mode Access
|
||||||
------------------------
|
------------------------
|
||||||
|
|
||||||
|
@ -53,6 +53,36 @@ For example, to use the file ``./static/my-logo.png`` as the logo, set:
|
|||||||
logo: my-logo.png
|
logo: my-logo.png
|
||||||
|
|
||||||
|
|
||||||
|
Logo URL
|
||||||
|
^^^^^^^^
|
||||||
|
|
||||||
|
It is possible to configure the logo to link to any URL by setting ``ui.logo_home_url`` in ``config.yml`` to the URL of your choice.
|
||||||
|
|
||||||
|
If omitted, the logo will not link to any page.
|
||||||
|
|
||||||
|
For example, to have the logo redirect to ``https://example.com/web-archive-landing-page``, set:
|
||||||
|
|
||||||
|
.. code:: yaml
|
||||||
|
|
||||||
|
ui:
|
||||||
|
logo_home_url: https://example.com/web-archive-landing-page
|
||||||
|
|
||||||
|
|
||||||
|
Printing
|
||||||
|
^^^^^^^^
|
||||||
|
|
||||||
|
As of pywb 2.8, the replay header includes a print button that prints the contents of the replay iframe.
|
||||||
|
|
||||||
|
This button can be disabled by setting ``ui.disable_printing`` in ``config.yaml`` to any value.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
.. code:: yaml
|
||||||
|
|
||||||
|
ui:
|
||||||
|
disable_printing: true
|
||||||
|
|
||||||
|
|
||||||
Banner Colors
|
Banner Colors
|
||||||
^^^^^^^^^^^^^
|
^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from gevent.monkey import patch_all; patch_all()
|
from gevent.monkey import patch_all; patch_all()
|
||||||
|
|
||||||
from werkzeug.routing import Map, Rule, RequestRedirect, Submount
|
from werkzeug.routing import Map, Rule, RequestRedirect, Submount
|
||||||
from werkzeug.wsgi import pop_path_info
|
from wsgiref.util import shift_path_info
|
||||||
from six.moves.urllib.parse import urljoin, parse_qsl
|
from six.moves.urllib.parse import urljoin, parse_qsl
|
||||||
from six import iteritems
|
from six import iteritems
|
||||||
from warcio.utils import to_native_str
|
from warcio.utils import to_native_str
|
||||||
@ -108,6 +108,7 @@ class FrontEndApp(object):
|
|||||||
self.templates_dir = config.get('templates_dir', 'templates')
|
self.templates_dir = config.get('templates_dir', 'templates')
|
||||||
self.static_dir = config.get('static_dir', 'static')
|
self.static_dir = config.get('static_dir', 'static')
|
||||||
self.static_prefix = config.get('static_prefix', 'static')
|
self.static_prefix = config.get('static_prefix', 'static')
|
||||||
|
self.default_locale = config.get('default_locale', '')
|
||||||
|
|
||||||
metadata_templ = os.path.join(self.warcserver.root_dir, '{coll}', 'metadata.yaml')
|
metadata_templ = os.path.join(self.warcserver.root_dir, '{coll}', 'metadata.yaml')
|
||||||
self.metadata_cache = MetadataCache(metadata_templ)
|
self.metadata_cache = MetadataCache(metadata_templ)
|
||||||
@ -414,6 +415,14 @@ class FrontEndApp(object):
|
|||||||
# if coll == self.all_coll:
|
# if coll == self.all_coll:
|
||||||
# coll = '*'
|
# coll = '*'
|
||||||
|
|
||||||
|
config = self.warcserver.get_coll_config(coll)
|
||||||
|
is_live = config.get("index") == "$live"
|
||||||
|
|
||||||
|
if is_live:
|
||||||
|
cache_control = "no-store, no-cache"
|
||||||
|
else:
|
||||||
|
cache_control = "max-age=86400, must-revalidate"
|
||||||
|
|
||||||
cdx_url = base_url.format(coll=coll)
|
cdx_url = base_url.format(coll=coll)
|
||||||
|
|
||||||
if environ.get('QUERY_STRING'):
|
if environ.get('QUERY_STRING'):
|
||||||
@ -425,7 +434,11 @@ class FrontEndApp(object):
|
|||||||
cdx_url += 'limit=' + str(self.query_limit)
|
cdx_url += 'limit=' + str(self.query_limit)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
res = requests.get(cdx_url, stream=True)
|
headers = {}
|
||||||
|
for key in environ.keys():
|
||||||
|
if key.startswith("HTTP_X_"):
|
||||||
|
headers[key[5:].replace("_", "-")] = environ[key]
|
||||||
|
res = requests.get(cdx_url, stream=True, headers=headers)
|
||||||
|
|
||||||
status_line = '{} {}'.format(res.status_code, res.reason)
|
status_line = '{} {}'.format(res.status_code, res.reason)
|
||||||
content_type = res.headers.get('Content-Type')
|
content_type = res.headers.get('Content-Type')
|
||||||
@ -433,7 +446,7 @@ class FrontEndApp(object):
|
|||||||
return WbResponse.bin_stream(StreamIter(res.raw),
|
return WbResponse.bin_stream(StreamIter(res.raw),
|
||||||
content_type=content_type,
|
content_type=content_type,
|
||||||
status=status_line,
|
status=status_line,
|
||||||
headers=[("Cache-Control", "max-age=86400, must-revalidate")])
|
headers=[("Cache-Control", cache_control)])
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return WbResponse.text_response('Error: ' + str(e), status='400 Bad Request')
|
return WbResponse.text_response('Error: ' + str(e), status='400 Bad Request')
|
||||||
@ -545,9 +558,9 @@ class FrontEndApp(object):
|
|||||||
return
|
return
|
||||||
|
|
||||||
if coll != '$root':
|
if coll != '$root':
|
||||||
pop_path_info(environ)
|
shift_path_info(environ)
|
||||||
if record:
|
if record:
|
||||||
pop_path_info(environ)
|
shift_path_info(environ)
|
||||||
|
|
||||||
paths = [self.warcserver.root_dir]
|
paths = [self.warcserver.root_dir]
|
||||||
|
|
||||||
@ -590,7 +603,7 @@ class FrontEndApp(object):
|
|||||||
and message.
|
and message.
|
||||||
|
|
||||||
:param dict environ: The WSGI environment dictionary for the request
|
:param dict environ: The WSGI environment dictionary for the request
|
||||||
:param str err_type: The identifier for type of error that occured
|
:param str err_type: The identifier for type of error that occurred
|
||||||
:param str url: The url of the archived page that was requested
|
:param str url: The url of the archived page that was requested
|
||||||
"""
|
"""
|
||||||
raise AppPageNotFound(err_type, url)
|
raise AppPageNotFound(err_type, url)
|
||||||
@ -656,8 +669,12 @@ class FrontEndApp(object):
|
|||||||
|
|
||||||
lang = args.pop('lang', '')
|
lang = args.pop('lang', '')
|
||||||
if lang:
|
if lang:
|
||||||
pop_path_info(environ)
|
shift_path_info(environ)
|
||||||
|
|
||||||
|
if lang:
|
||||||
environ['pywb_lang'] = lang
|
environ['pywb_lang'] = lang
|
||||||
|
elif self.default_locale:
|
||||||
|
environ['pywb_lang'] = self.default_locale
|
||||||
|
|
||||||
response = endpoint(environ, **args)
|
response = endpoint(environ, **args)
|
||||||
|
|
||||||
|
@ -64,7 +64,7 @@ class RewriterApp(object):
|
|||||||
|
|
||||||
if not jinja_env:
|
if not jinja_env:
|
||||||
jinja_env = JinjaEnv(globals={'static_path': 'static'},
|
jinja_env = JinjaEnv(globals={'static_path': 'static'},
|
||||||
extensions=['jinja2.ext.i18n', 'jinja2.ext.with_'])
|
extensions=['jinja2.ext.i18n'])
|
||||||
jinja_env.jinja_env.install_null_translations()
|
jinja_env.jinja_env.install_null_translations()
|
||||||
|
|
||||||
self.jinja_env = jinja_env
|
self.jinja_env = jinja_env
|
||||||
|
@ -1,5 +1,9 @@
|
|||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
import warcio
|
||||||
|
|
||||||
# Use ujson if available
|
# Use ujson if available
|
||||||
try:
|
try:
|
||||||
@ -298,8 +302,11 @@ def write_multi_cdx_index(output, inputs, **options):
|
|||||||
with open(fullpath, 'rb') as infile:
|
with open(fullpath, 'rb') as infile:
|
||||||
entry_iter = record_iter(infile)
|
entry_iter = record_iter(infile)
|
||||||
|
|
||||||
|
try:
|
||||||
for entry in entry_iter:
|
for entry in entry_iter:
|
||||||
writer.write(entry, filename)
|
writer.write(entry, filename)
|
||||||
|
except warcio.exceptions.ArchiveLoadFailed:
|
||||||
|
logging.error('Error while indexing file %s, %s',filename,traceback.format_exc())
|
||||||
|
|
||||||
return writer
|
return writer
|
||||||
|
|
||||||
@ -377,7 +384,7 @@ url timestamp { ... }
|
|||||||
|
|
||||||
output_help = """
|
output_help = """
|
||||||
Output file or directory.
|
Output file or directory.
|
||||||
- If directory, each input file is written to a seperate output file
|
- If directory, each input file is written to a separate output file
|
||||||
with a .cdx extension
|
with a .cdx extension
|
||||||
- If output is '-', output is written to stdout
|
- If output is '-', output is written to stdout
|
||||||
"""
|
"""
|
||||||
|
@ -102,11 +102,11 @@ class ACLManager(CollectionsManager):
|
|||||||
|
|
||||||
except IOError as io:
|
except IOError as io:
|
||||||
if must_exist:
|
if must_exist:
|
||||||
print('Error Occured: ' + str(io))
|
print('Error Occurred: ' + str(io))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print('Error Occured: ' + str(e))
|
print('Error Occurred: ' + str(e))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def save_acl(self, r=None):
|
def save_acl(self, r=None):
|
||||||
|
@ -5,12 +5,16 @@ import logging
|
|||||||
import heapq
|
import heapq
|
||||||
import yaml
|
import yaml
|
||||||
import re
|
import re
|
||||||
|
import gzip
|
||||||
import six
|
import six
|
||||||
|
import pathlib
|
||||||
|
|
||||||
from distutils.util import strtobool
|
from distutils.util import strtobool
|
||||||
from pkg_resources import resource_string, get_distribution
|
from pkg_resources import resource_string, get_distribution
|
||||||
|
|
||||||
from argparse import ArgumentParser, RawTextHelpFormatter
|
from argparse import ArgumentParser, RawTextHelpFormatter
|
||||||
|
from tempfile import mkdtemp, TemporaryDirectory
|
||||||
|
from zipfile import ZipFile
|
||||||
|
|
||||||
from pywb.utils.loaders import load_yaml_config
|
from pywb.utils.loaders import load_yaml_config
|
||||||
from warcio.timeutils import timestamp20_now
|
from warcio.timeutils import timestamp20_now
|
||||||
@ -47,6 +51,9 @@ directory structure expected by pywb
|
|||||||
|
|
||||||
COLLS_DIR = 'collections'
|
COLLS_DIR = 'collections'
|
||||||
|
|
||||||
|
WARC_RX = re.compile(r'.*\.w?arc(\.gz)?$')
|
||||||
|
WACZ_RX = re.compile(r'.*\.wacz$')
|
||||||
|
|
||||||
def __init__(self, coll_name, colls_dir=None, must_exist=True):
|
def __init__(self, coll_name, colls_dir=None, must_exist=True):
|
||||||
colls_dir = colls_dir or self.COLLS_DIR
|
colls_dir = colls_dir or self.COLLS_DIR
|
||||||
self.default_config = load_yaml_config(DEFAULT_CONFIG)
|
self.default_config = load_yaml_config(DEFAULT_CONFIG)
|
||||||
@ -115,19 +122,142 @@ directory structure expected by pywb
|
|||||||
'To create a new collection, run\n\n{1} init {0}')
|
'To create a new collection, run\n\n{1} init {0}')
|
||||||
raise IOError(msg.format(self.coll_name, sys.argv[0]))
|
raise IOError(msg.format(self.coll_name, sys.argv[0]))
|
||||||
|
|
||||||
def add_warcs(self, warcs):
|
def add_archives(self, archives, unpack_wacz=False):
|
||||||
if not os.path.isdir(self.archive_dir):
|
if not os.path.isdir(self.archive_dir):
|
||||||
raise IOError('Directory {0} does not exist'.
|
raise IOError('Directory {0} does not exist'.
|
||||||
format(self.archive_dir))
|
format(self.archive_dir))
|
||||||
|
|
||||||
full_paths = []
|
invalid_archives = []
|
||||||
for filename in warcs:
|
warc_paths = []
|
||||||
filename = os.path.abspath(filename)
|
for archive in archives:
|
||||||
shutil.copy2(filename, self.archive_dir)
|
if self.WARC_RX.match(archive):
|
||||||
full_paths.append(os.path.join(self.archive_dir, filename))
|
full_path = self._add_warc(archive)
|
||||||
logging.info('Copied ' + filename + ' to ' + self.archive_dir)
|
if full_path:
|
||||||
|
warc_paths.append(full_path)
|
||||||
|
elif self.WACZ_RX.match(archive):
|
||||||
|
if unpack_wacz:
|
||||||
|
self._add_wacz_unpacked(archive)
|
||||||
|
else:
|
||||||
|
raise NotImplementedError('Adding waczs without unpacking is not yet implemented. Use '
|
||||||
|
'\'--unpack-wacz\' flag to add the wacz\'s content.')
|
||||||
|
else:
|
||||||
|
invalid_archives.append(archive)
|
||||||
|
|
||||||
self._index_merge_warcs(full_paths, self.DEF_INDEX_FILE)
|
self._index_merge_warcs(warc_paths, self.DEF_INDEX_FILE)
|
||||||
|
|
||||||
|
if invalid_archives:
|
||||||
|
logging.warning(f'Invalid archives weren\'t added: {", ".join(invalid_archives)}')
|
||||||
|
|
||||||
|
def _rename_warc(self, warc_basename):
|
||||||
|
dupe_idx = 1
|
||||||
|
ext = ''.join(pathlib.Path(warc_basename).suffixes)
|
||||||
|
pre_ext_name = warc_basename.split(ext)[0]
|
||||||
|
|
||||||
|
while True:
|
||||||
|
new_basename = f'{pre_ext_name}-{dupe_idx}{ext}'
|
||||||
|
if not os.path.exists(os.path.join(self.archive_dir, new_basename)):
|
||||||
|
break
|
||||||
|
dupe_idx += 1
|
||||||
|
|
||||||
|
return new_basename
|
||||||
|
|
||||||
|
def _add_warc(self, warc):
|
||||||
|
warc_source = os.path.abspath(warc)
|
||||||
|
source_dir, warc_basename = os.path.split(warc_source)
|
||||||
|
|
||||||
|
# don't overwrite existing warcs with duplicate names
|
||||||
|
if os.path.exists(os.path.join(self.archive_dir, warc_basename)):
|
||||||
|
warc_basename = self._rename_warc(warc_basename)
|
||||||
|
logging.info(f'Warc {os.path.basename(warc)} already exists - renamed to {warc_basename}.')
|
||||||
|
|
||||||
|
warc_dest = os.path.join(self.archive_dir, warc_basename)
|
||||||
|
shutil.copy2(warc_source, warc_dest)
|
||||||
|
logging.info(f'Copied {warc} to {self.archive_dir} as {warc_basename}')
|
||||||
|
return warc_dest
|
||||||
|
|
||||||
|
def _add_wacz_unpacked(self, wacz):
|
||||||
|
wacz = os.path.abspath(wacz)
|
||||||
|
temp_dir = mkdtemp()
|
||||||
|
warc_regex = re.compile(r'.+\.warc(\.gz)?$')
|
||||||
|
cdx_regex = re.compile(r'.+\.cdx(\.gz)?$')
|
||||||
|
with ZipFile(wacz, 'r') as wacz_zip_file:
|
||||||
|
archive_members = wacz_zip_file.namelist()
|
||||||
|
warc_files = [file for file in archive_members if warc_regex.match(file)]
|
||||||
|
if not warc_files:
|
||||||
|
logging.warning(f'WACZ {wacz} does not contain any warc files.')
|
||||||
|
return
|
||||||
|
|
||||||
|
# extract warc files
|
||||||
|
for warc_file in warc_files:
|
||||||
|
wacz_zip_file.extract(warc_file, temp_dir)
|
||||||
|
|
||||||
|
cdx_files = [file for file in archive_members if cdx_regex.match(file)]
|
||||||
|
if not cdx_files:
|
||||||
|
logging.warning(f'WACZ {wacz} does not contain any indices.')
|
||||||
|
return
|
||||||
|
|
||||||
|
for cdx_file in cdx_files:
|
||||||
|
wacz_zip_file.extract(cdx_file, temp_dir)
|
||||||
|
|
||||||
|
# copy extracted warc files to collections archive dir, use wacz filename as filename with added index if
|
||||||
|
# multiple warc files exist
|
||||||
|
warc_filename_mapping = {}
|
||||||
|
full_paths = []
|
||||||
|
for idx, extracted_warc_file in enumerate(warc_files):
|
||||||
|
_, warc_ext = os.path.splitext(extracted_warc_file)
|
||||||
|
if warc_ext == '.gz':
|
||||||
|
warc_ext = '.warc.gz'
|
||||||
|
warc_filename = os.path.basename(wacz)
|
||||||
|
warc_filename, _ = os.path.splitext(warc_filename)
|
||||||
|
warc_filename = f'{warc_filename}-{idx}{warc_ext}'
|
||||||
|
warc_destination_path = os.path.join(self.archive_dir, warc_filename)
|
||||||
|
|
||||||
|
if os.path.exists(warc_destination_path):
|
||||||
|
warc_filename = self._rename_warc(warc_filename)
|
||||||
|
logging.info(f'Warc {warc_destination_path} already exists - renamed to {warc_filename}.')
|
||||||
|
warc_destination_path = os.path.join(self.archive_dir, warc_filename)
|
||||||
|
|
||||||
|
warc_filename_mapping[os.path.basename(extracted_warc_file)] = warc_filename
|
||||||
|
shutil.copy2(os.path.join(temp_dir, extracted_warc_file), warc_destination_path)
|
||||||
|
full_paths.append(warc_destination_path)
|
||||||
|
|
||||||
|
# rewrite filenames in wacz indices and merge them with collection index file
|
||||||
|
for cdx_file in cdx_files:
|
||||||
|
self._add_wacz_index(os.path.join(self.indexes_dir, self.DEF_INDEX_FILE), os.path.join(temp_dir, cdx_file),
|
||||||
|
warc_filename_mapping)
|
||||||
|
|
||||||
|
# delete temporary files
|
||||||
|
shutil.rmtree(temp_dir)
|
||||||
|
|
||||||
|
def _add_wacz_index(self, collection_index_path, wacz_index_path, filename_mapping):
|
||||||
|
from pywb.warcserver.index.cdxobject import CDXObject
|
||||||
|
|
||||||
|
# rewrite wacz index to temporary index file
|
||||||
|
tempdir = TemporaryDirectory()
|
||||||
|
wacz_index_name = os.path.basename(wacz_index_path)
|
||||||
|
rewritten_index_path = os.path.join(tempdir.name, wacz_index_name)
|
||||||
|
|
||||||
|
with open(rewritten_index_path, 'w') as rewritten_index:
|
||||||
|
if wacz_index_path.endswith('.gz'):
|
||||||
|
wacz_index = gzip.open(wacz_index_path, 'rb')
|
||||||
|
else:
|
||||||
|
wacz_index = open(wacz_index_path, 'rb')
|
||||||
|
|
||||||
|
for line in wacz_index:
|
||||||
|
cdx_object = CDXObject(cdxline=line)
|
||||||
|
if cdx_object['filename'] in filename_mapping:
|
||||||
|
cdx_object['filename'] = filename_mapping[cdx_object['filename']]
|
||||||
|
rewritten_index.write(cdx_object.to_cdxj())
|
||||||
|
|
||||||
|
if not os.path.isfile(collection_index_path):
|
||||||
|
shutil.move(rewritten_index_path, collection_index_path)
|
||||||
|
return
|
||||||
|
|
||||||
|
temp_coll_index_path = collection_index_path + '.tmp.' + timestamp20_now()
|
||||||
|
self._merge_indices(collection_index_path, rewritten_index_path, temp_coll_index_path)
|
||||||
|
shutil.move(temp_coll_index_path, collection_index_path)
|
||||||
|
|
||||||
|
tempdir.cleanup()
|
||||||
|
|
||||||
def reindex(self):
|
def reindex(self):
|
||||||
cdx_file = os.path.join(self.indexes_dir, self.DEF_INDEX_FILE)
|
cdx_file = os.path.join(self.indexes_dir, self.DEF_INDEX_FILE)
|
||||||
@ -180,20 +310,24 @@ directory structure expected by pywb
|
|||||||
|
|
||||||
merged_file = temp_file + '.merged'
|
merged_file = temp_file + '.merged'
|
||||||
|
|
||||||
last_line = None
|
self._merge_indices(cdx_file, temp_file, merged_file)
|
||||||
|
|
||||||
with open(cdx_file, 'rb') as orig_index:
|
|
||||||
with open(temp_file, 'rb') as new_index:
|
|
||||||
with open(merged_file, 'w+b') as merged:
|
|
||||||
for line in heapq.merge(orig_index, new_index):
|
|
||||||
if last_line != line:
|
|
||||||
merged.write(line)
|
|
||||||
last_line = line
|
|
||||||
|
|
||||||
shutil.move(merged_file, cdx_file)
|
shutil.move(merged_file, cdx_file)
|
||||||
#os.rename(merged_file, cdx_file)
|
#os.rename(merged_file, cdx_file)
|
||||||
os.remove(temp_file)
|
os.remove(temp_file)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _merge_indices(index1, index2, dest):
|
||||||
|
last_line = None
|
||||||
|
|
||||||
|
with open(index1, 'rb') as index1_f:
|
||||||
|
with open(index2, 'rb') as index2_f:
|
||||||
|
with open(dest, 'wb') as dest_f:
|
||||||
|
for line in heapq.merge(index1_f, index2_f):
|
||||||
|
if last_line != line:
|
||||||
|
dest_f.write(line)
|
||||||
|
last_line = line
|
||||||
|
|
||||||
def set_metadata(self, namevalue_pairs):
|
def set_metadata(self, namevalue_pairs):
|
||||||
metadata_yaml = os.path.join(self.curr_coll_dir, 'metadata.yaml')
|
metadata_yaml = os.path.join(self.curr_coll_dir, 'metadata.yaml')
|
||||||
metadata = None
|
metadata = None
|
||||||
@ -373,16 +507,23 @@ Create manage file based web archive collections
|
|||||||
listcmd = subparsers.add_parser('list', help=list_help)
|
listcmd = subparsers.add_parser('list', help=list_help)
|
||||||
listcmd.set_defaults(func=do_list)
|
listcmd.set_defaults(func=do_list)
|
||||||
|
|
||||||
# Add Warcs
|
# Add Warcs or Waczs
|
||||||
def do_add(r):
|
def do_add(r):
|
||||||
m = CollectionsManager(r.coll_name)
|
m = CollectionsManager(r.coll_name)
|
||||||
m.add_warcs(r.files)
|
m.add_archives(r.files, r.unpack_wacz)
|
||||||
|
|
||||||
addwarc_help = 'Copy ARCS/WARCS to collection directory and reindex'
|
add_archives_help = 'Copy ARCs/WARCs to collection directory and reindex'
|
||||||
addwarc = subparsers.add_parser('add', help=addwarc_help)
|
add_unpack_wacz_help = 'Copy WARCs from WACZ to collection directory and reindex'
|
||||||
addwarc.add_argument('coll_name')
|
add_archives = subparsers.add_parser('add', help=add_archives_help)
|
||||||
addwarc.add_argument('files', nargs='+')
|
add_archives.add_argument(
|
||||||
addwarc.set_defaults(func=do_add)
|
'--unpack-wacz',
|
||||||
|
dest='unpack_wacz',
|
||||||
|
action='store_true',
|
||||||
|
help=add_unpack_wacz_help
|
||||||
|
)
|
||||||
|
add_archives.add_argument('coll_name')
|
||||||
|
add_archives.add_argument('files', nargs='+')
|
||||||
|
add_archives.set_defaults(func=do_add)
|
||||||
|
|
||||||
# Reindex All
|
# Reindex All
|
||||||
def do_reindex(r):
|
def do_reindex(r):
|
||||||
|
@ -268,7 +268,7 @@ class HTMLRewriterMixin(StreamingRewriter):
|
|||||||
unesc_value = self.try_unescape(value)
|
unesc_value = self.try_unescape(value)
|
||||||
rewritten_value = self.url_rewriter.rewrite(unesc_value, mod, force_abs)
|
rewritten_value = self.url_rewriter.rewrite(unesc_value, mod, force_abs)
|
||||||
|
|
||||||
# if no rewriting has occured, ensure we return original, not reencoded value
|
# if no rewriting has occurred, ensure we return original, not reencoded value
|
||||||
if rewritten_value == value:
|
if rewritten_value == value:
|
||||||
return orig_value
|
return orig_value
|
||||||
|
|
||||||
@ -668,7 +668,7 @@ class HTMLRewriter(HTMLRewriterMixin, HTMLParser):
|
|||||||
if self.parse_comments:
|
if self.parse_comments:
|
||||||
#data = self._rewrite_script(data)
|
#data = self._rewrite_script(data)
|
||||||
|
|
||||||
# Rewrite with seperate HTMLRewriter
|
# Rewrite with separate HTMLRewriter
|
||||||
comment_rewriter = HTMLRewriter(self.url_rewriter,
|
comment_rewriter = HTMLRewriter(self.url_rewriter,
|
||||||
defmod=self.defmod)
|
defmod=self.defmod)
|
||||||
|
|
||||||
|
@ -124,9 +124,7 @@ if (!self.__WB_pmw) {{ self.__WB_pmw = function(obj) {{ this.__WB_source = obj;
|
|||||||
(r'(?<![$.])\s*\blocation\b\s*[=]\s*(?![=])', self.add_suffix(check_loc), 0),
|
(r'(?<![$.])\s*\blocation\b\s*[=]\s*(?![=])', self.add_suffix(check_loc), 0),
|
||||||
# rewriting 'return this'
|
# rewriting 'return this'
|
||||||
(r'\breturn\s+this\b\s*(?![.$])', self.replace_str(this_rw), 0),
|
(r'\breturn\s+this\b\s*(?![.$])', self.replace_str(this_rw), 0),
|
||||||
# rewriting 'this.' special properties access on new line, with ; prepended
|
# rewriting 'this.' special properties access
|
||||||
(r'\n\s*this\b(?=(?:\.(?:{0})\b))'.format(prop_str), self.replace_str(';' + this_rw), 0),
|
|
||||||
# rewriting 'this.' special properties access, not on new line (no ;)
|
|
||||||
(r'(?<![$.])\s*this\b(?=(?:\.(?:{0})\b))'.format(prop_str), self.replace_str(this_rw), 0),
|
(r'(?<![$.])\s*this\b(?=(?:\.(?:{0})\b))'.format(prop_str), self.replace_str(this_rw), 0),
|
||||||
# rewrite '= this' or ', this'
|
# rewrite '= this' or ', this'
|
||||||
(r'(?<=[=,])\s*this\b\s*(?![:.$])', self.replace_str(this_rw), 0),
|
(r'(?<=[=,])\s*this\b\s*(?![:.$])', self.replace_str(this_rw), 0),
|
||||||
|
@ -5,7 +5,7 @@ from pywb.utils.loaders import load
|
|||||||
|
|
||||||
from six.moves.urllib.parse import urlsplit, quote
|
from six.moves.urllib.parse import urlsplit, quote
|
||||||
|
|
||||||
from jinja2 import Environment, TemplateNotFound, contextfunction, select_autoescape
|
from jinja2 import Environment, TemplateNotFound, pass_context, select_autoescape
|
||||||
from jinja2 import FileSystemLoader, PackageLoader, ChoiceLoader
|
from jinja2 import FileSystemLoader, PackageLoader, ChoiceLoader
|
||||||
|
|
||||||
from webassets.ext.jinja2 import AssetsExtension
|
from webassets.ext.jinja2 import AssetsExtension
|
||||||
@ -139,7 +139,7 @@ class JinjaEnv(object):
|
|||||||
return loc_map.get(loc)
|
return loc_map.get(loc)
|
||||||
|
|
||||||
def override_func(jinja_env, name):
|
def override_func(jinja_env, name):
|
||||||
@contextfunction
|
@pass_context
|
||||||
def get_override(context, text):
|
def get_override(context, text):
|
||||||
translate = get_translate(context)
|
translate = get_translate(context)
|
||||||
if not translate:
|
if not translate:
|
||||||
@ -158,7 +158,7 @@ class JinjaEnv(object):
|
|||||||
|
|
||||||
# Special _Q() function to return %-encoded text, necessary for use
|
# Special _Q() function to return %-encoded text, necessary for use
|
||||||
# with text in banner
|
# with text in banner
|
||||||
@contextfunction
|
@pass_context
|
||||||
def quote_gettext(context, text):
|
def quote_gettext(context, text):
|
||||||
translate = get_translate(context)
|
translate = get_translate(context)
|
||||||
if not translate:
|
if not translate:
|
||||||
@ -171,14 +171,14 @@ class JinjaEnv(object):
|
|||||||
self.jinja_env.globals['_Q'] = quote_gettext
|
self.jinja_env.globals['_Q'] = quote_gettext
|
||||||
self.jinja_env.globals['default_locale'] = default_locale
|
self.jinja_env.globals['default_locale'] = default_locale
|
||||||
|
|
||||||
@contextfunction
|
@pass_context
|
||||||
def switch_locale(context, locale):
|
def switch_locale(context, locale):
|
||||||
environ = context.get('env')
|
environ = context.get('env')
|
||||||
curr_loc = environ.get('pywb_lang', '')
|
curr_loc = environ.get('pywb_lang', '')
|
||||||
|
|
||||||
request_uri = environ.get('REQUEST_URI', environ.get('PATH_INFO'))
|
request_uri = environ.get('REQUEST_URI', environ.get('PATH_INFO'))
|
||||||
|
|
||||||
if curr_loc:
|
if curr_loc and request_uri.startswith('/' + curr_loc + '/'):
|
||||||
return request_uri.replace(curr_loc, locale, 1)
|
return request_uri.replace(curr_loc, locale, 1)
|
||||||
|
|
||||||
app_prefix = environ.get('pywb.app_prefix', '')
|
app_prefix = environ.get('pywb.app_prefix', '')
|
||||||
@ -188,7 +188,7 @@ class JinjaEnv(object):
|
|||||||
|
|
||||||
return app_prefix + '/' + locale + request_uri
|
return app_prefix + '/' + locale + request_uri
|
||||||
|
|
||||||
@contextfunction
|
@pass_context
|
||||||
def get_locale_prefixes(context):
|
def get_locale_prefixes(context):
|
||||||
environ = context.get('env')
|
environ = context.get('env')
|
||||||
locale_prefixes = {}
|
locale_prefixes = {}
|
||||||
@ -196,11 +196,11 @@ class JinjaEnv(object):
|
|||||||
orig_prefix = environ.get('pywb.app_prefix', '')
|
orig_prefix = environ.get('pywb.app_prefix', '')
|
||||||
coll = environ.get('SCRIPT_NAME', '')
|
coll = environ.get('SCRIPT_NAME', '')
|
||||||
|
|
||||||
if orig_prefix:
|
if orig_prefix and coll.startswith(orig_prefix):
|
||||||
coll = coll[len(orig_prefix):]
|
coll = coll[len(orig_prefix):]
|
||||||
|
|
||||||
curr_loc = environ.get('pywb_lang', '')
|
curr_loc = environ.get('pywb_lang', '')
|
||||||
if curr_loc:
|
if curr_loc and coll.startswith('/' + curr_loc):
|
||||||
coll = coll[len(curr_loc) + 1:]
|
coll = coll[len(curr_loc) + 1:]
|
||||||
|
|
||||||
for locale in loc_map.keys():
|
for locale in loc_map.keys():
|
||||||
@ -405,10 +405,11 @@ class TopFrameView(BaseInsertView):
|
|||||||
|
|
||||||
embed_url = wb_url.to_str(mod=replay_mod)
|
embed_url = wb_url.to_str(mod=replay_mod)
|
||||||
|
|
||||||
|
timestamp = ''
|
||||||
if wb_url.timestamp:
|
if wb_url.timestamp:
|
||||||
timestamp = wb_url.timestamp
|
timestamp = wb_url.timestamp
|
||||||
else:
|
#else:
|
||||||
timestamp = timestamp_now()
|
# timestamp = timestamp_now()
|
||||||
|
|
||||||
is_proxy = 'wsgiprox.proxy_host' in env
|
is_proxy = 'wsgiprox.proxy_host' in env
|
||||||
|
|
||||||
|
@ -143,7 +143,7 @@ r"""
|
|||||||
'var foo = _____WB$wombat$check$this$function_____(this).location'
|
'var foo = _____WB$wombat$check$this$function_____(this).location'
|
||||||
|
|
||||||
>>> _test_js_obj_proxy('A = B\nthis.location = "foo"')
|
>>> _test_js_obj_proxy('A = B\nthis.location = "foo"')
|
||||||
'A = B\n;_____WB$wombat$check$this$function_____(this).location = "foo"'
|
'A = B\n_____WB$wombat$check$this$function_____(this).location = "foo"'
|
||||||
|
|
||||||
>>> _test_js_obj_proxy('var foo = this.location2')
|
>>> _test_js_obj_proxy('var foo = this.location2')
|
||||||
'var foo = this.location2'
|
'var foo = this.location2'
|
||||||
|
@ -110,7 +110,7 @@ rules:
|
|||||||
|
|
||||||
fuzzy_lookup:
|
fuzzy_lookup:
|
||||||
match: '("(?:cursor|cursorindex)":["\d\w]+)'
|
match: '("(?:cursor|cursorindex)":["\d\w]+)'
|
||||||
find_all: true
|
re_type: findall
|
||||||
|
|
||||||
- url_prefix: 'com,facebook)/ajax/pagelet/generic.php/profiletimeline'
|
- url_prefix: 'com,facebook)/ajax/pagelet/generic.php/profiletimeline'
|
||||||
fuzzy_lookup: 'com,facebook\)/.*[?&](__adt=[^&]+).*[&]data=(?:.*?(?:[&]|(profile_id|pagelet_token)[^,]+))'
|
fuzzy_lookup: 'com,facebook\)/.*[?&](__adt=[^&]+).*[&]data=(?:.*?(?:[&]|(profile_id|pagelet_token)[^,]+))'
|
||||||
@ -175,7 +175,7 @@ rules:
|
|||||||
|
|
||||||
fuzzy_lookup:
|
fuzzy_lookup:
|
||||||
match: '("q[\d]+":|after:\\"[^"]+)'
|
match: '("q[\d]+":|after:\\"[^"]+)'
|
||||||
find_all: true
|
re_type: findall
|
||||||
|
|
||||||
- url_prefix: 'com,facebook)/pages_reaction_units/more'
|
- url_prefix: 'com,facebook)/pages_reaction_units/more'
|
||||||
|
|
||||||
@ -196,6 +196,9 @@ rules:
|
|||||||
group: 1
|
group: 1
|
||||||
function: 'pywb.rewrite.rewrite_dash:rewrite_fb_dash'
|
function: 'pywb.rewrite.rewrite_dash:rewrite_fb_dash'
|
||||||
|
|
||||||
|
- match: '"debugNoBatching\s?":(?:false|0)'
|
||||||
|
replace: '"debugNoBatching":true'
|
||||||
|
|
||||||
parse_comments: true
|
parse_comments: true
|
||||||
|
|
||||||
- url_prefix: 'com,facebook'
|
- url_prefix: 'com,facebook'
|
||||||
@ -227,6 +230,9 @@ rules:
|
|||||||
- match: '"is_dash_eligible":true'
|
- match: '"is_dash_eligible":true'
|
||||||
replace: '"is_dash_eligible":false'
|
replace: '"is_dash_eligible":false'
|
||||||
|
|
||||||
|
- match: '"debugNoBatching\s?":(?:false|0)'
|
||||||
|
replace: '"debugNoBatching":true'
|
||||||
|
|
||||||
fuzzy_lookup: '()'
|
fuzzy_lookup: '()'
|
||||||
|
|
||||||
|
|
||||||
@ -538,6 +544,12 @@ rules:
|
|||||||
rewrite:
|
rewrite:
|
||||||
js_rewrite_location: urls
|
js_rewrite_location: urls
|
||||||
|
|
||||||
|
- url_prefix: 'com,example)/matched'
|
||||||
|
fuzzy_lookup:
|
||||||
|
re_type: sub
|
||||||
|
match: 'matched'
|
||||||
|
replace: 'replaced'
|
||||||
|
|
||||||
# all domain rules -- fallback to this dataset
|
# all domain rules -- fallback to this dataset
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# Applies to all urls -- should be last
|
# Applies to all urls -- should be last
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 20 KiB |
@ -57,14 +57,6 @@ function RenderCalendar(init) {
|
|||||||
};
|
};
|
||||||
// regex for extracting the filter constraints and filter mods to human explanation
|
// regex for extracting the filter constraints and filter mods to human explanation
|
||||||
this.filterRE = /filter([^a-z]+)([a-z]+):(.+)/i;
|
this.filterRE = /filter([^a-z]+)([a-z]+):(.+)/i;
|
||||||
this.filterMods = {
|
|
||||||
'=': 'Contains',
|
|
||||||
'==': 'Matches Exactly',
|
|
||||||
'=~': 'Matches Regex',
|
|
||||||
'=!': 'Does Not Contains',
|
|
||||||
'=!=': 'Is Not',
|
|
||||||
'=!~': 'Does Not Begins With'
|
|
||||||
};
|
|
||||||
this.text = init.text;
|
this.text = init.text;
|
||||||
this.versionString = null;
|
this.versionString = null;
|
||||||
}
|
}
|
||||||
@ -433,7 +425,6 @@ RenderCalendar.prototype.createContainers = function() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// create the advanced results query info DOM structure
|
// create the advanced results query info DOM structure
|
||||||
var forString = ' for ';
|
|
||||||
var forElems;
|
var forElems;
|
||||||
|
|
||||||
if (this.queryInfo.searchParams.matchType) {
|
if (this.queryInfo.searchParams.matchType) {
|
||||||
@ -503,7 +494,7 @@ RenderCalendar.prototype.createContainers = function() {
|
|||||||
{
|
{
|
||||||
tag: 'p',
|
tag: 'p',
|
||||||
className: 'text-center mb-0 mt-1',
|
className: 'text-center mb-0 mt-1',
|
||||||
innerText: 'Filtering by'
|
innerText: filteringBy
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
tag: 'ul',
|
tag: 'ul',
|
||||||
@ -950,7 +941,7 @@ RenderCalendar.prototype.niceFilterDisplay = function() {
|
|||||||
filterList.push({
|
filterList.push({
|
||||||
tag: 'li',
|
tag: 'li',
|
||||||
className: 'list-group-item',
|
className: 'list-group-item',
|
||||||
innerText: match[2] + ' ' + this.filterMods[match[1]] + ' ' + match[3]
|
innerText: match[2] + ' ' + filterMods[match[1]] + ' "' + match[3] + '"'
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -965,11 +956,11 @@ RenderCalendar.prototype.niceDateRange = function() {
|
|||||||
var from = this.queryInfo.searchParams.from;
|
var from = this.queryInfo.searchParams.from;
|
||||||
var to = this.queryInfo.searchParams.to;
|
var to = this.queryInfo.searchParams.to;
|
||||||
if (from && to) {
|
if (from && to) {
|
||||||
return 'From ' + from + ' to ' + to;
|
return [text.from, from, text.until, to].join(' ');
|
||||||
} else if (from) {
|
} else if (from) {
|
||||||
return 'From ' + from + ' until ' + 'present';
|
return [text.from, from, text.until, text.present].join(' ');
|
||||||
}
|
}
|
||||||
return 'From earliest until ' + to;
|
return [text.from, text.earliest, text.until, to].join(' ');
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1,14 +1,6 @@
|
|||||||
var dtRE = /^\d{4,14}$/;
|
var dtRE = /^\d{4,14}$/;
|
||||||
var didSetWasValidated = false;
|
var didSetWasValidated = false;
|
||||||
var showBadDateTimeClass = 'show-optional-bad-input';
|
var showBadDateTimeClass = 'show-optional-bad-input';
|
||||||
var filterMods = {
|
|
||||||
'=': 'Contains',
|
|
||||||
'==': 'Matches Exactly',
|
|
||||||
'=~': 'Matches Regex',
|
|
||||||
'=!': 'Does Not Contains',
|
|
||||||
'=!=': 'Is Not',
|
|
||||||
'=!~': 'Does Not Begins With'
|
|
||||||
};
|
|
||||||
|
|
||||||
var elemIds = {
|
var elemIds = {
|
||||||
filtering: {
|
filtering: {
|
||||||
@ -22,17 +14,34 @@ var elemIds = {
|
|||||||
},
|
},
|
||||||
dateTime: {
|
dateTime: {
|
||||||
from: 'dt-from',
|
from: 'dt-from',
|
||||||
|
fromTime: 'ts-from',
|
||||||
fromBad: 'dt-from-bad',
|
fromBad: 'dt-from-bad',
|
||||||
to: 'dt-to',
|
to: 'dt-to',
|
||||||
|
toTime: 'ts-to',
|
||||||
toBad: 'dt-to-bad'
|
toBad: 'dt-to-bad'
|
||||||
},
|
},
|
||||||
match: 'match-type-select',
|
match: 'match-type-select',
|
||||||
url: 'search-url',
|
url: 'search-url',
|
||||||
form: 'search-form',
|
form: 'search-form',
|
||||||
resultsNewWindow: 'open-results-new-window',
|
resultsNewWindow: 'open-results-new-window',
|
||||||
advancedOptions: 'advanced-options'
|
advancedOptions: 'advanced-options',
|
||||||
|
resetSearchForm: 'reset-search-form',
|
||||||
};
|
};
|
||||||
|
|
||||||
|
function resetSearchForm(event) {
|
||||||
|
for (const field of [
|
||||||
|
elemIds.url,
|
||||||
|
elemIds.match,
|
||||||
|
elemIds.dateTime.from,
|
||||||
|
elemIds.dateTime.fromTime,
|
||||||
|
elemIds.dateTime.to,
|
||||||
|
elemIds.dateTime.toTime,
|
||||||
|
]) {
|
||||||
|
document.getElementById(field).value = '';
|
||||||
|
}
|
||||||
|
clearFilters(event);
|
||||||
|
}
|
||||||
|
|
||||||
function makeCheckDateRangeChecker(dtInputId, dtBadNotice) {
|
function makeCheckDateRangeChecker(dtInputId, dtBadNotice) {
|
||||||
var dtInput = document.getElementById(dtInputId);
|
var dtInput = document.getElementById(dtInputId);
|
||||||
dtInput.onblur = function() {
|
dtInput.onblur = function() {
|
||||||
@ -65,7 +74,7 @@ function makeCheckDateRangeChecker(dtInputId, dtBadNotice) {
|
|||||||
|
|
||||||
function createAndAddNoFilter(filterList) {
|
function createAndAddNoFilter(filterList) {
|
||||||
var nothing = document.createElement('li');
|
var nothing = document.createElement('li');
|
||||||
nothing.innerText = 'No Filter';
|
nothing.innerText = noFilter;
|
||||||
nothing.id = elemIds.filtering.nothing;
|
nothing.id = elemIds.filtering.nothing;
|
||||||
filterList.appendChild(nothing);
|
filterList.appendChild(nothing);
|
||||||
}
|
}
|
||||||
@ -78,19 +87,24 @@ function addFilter(event) {
|
|||||||
if (!expr) return;
|
if (!expr) return;
|
||||||
var filterExpr = 'filter' + modifier + by + ':' + expr;
|
var filterExpr = 'filter' + modifier + by + ':' + expr;
|
||||||
var filterList = document.getElementById(elemIds.filtering.list);
|
var filterList = document.getElementById(elemIds.filtering.list);
|
||||||
|
var previousFilters = filterList.children;
|
||||||
|
for (var i = 0; i < previousFilters.length; ++i) {
|
||||||
|
var filterData = previousFilters[i].dataset;
|
||||||
|
if (filterData && filterData.filter && filterData.filter == filterExpr) return;
|
||||||
|
}
|
||||||
var filterNothing = document.getElementById(elemIds.filtering.nothing);
|
var filterNothing = document.getElementById(elemIds.filtering.nothing);
|
||||||
if (filterNothing) {
|
if (filterNothing) {
|
||||||
filterList.removeChild(filterNothing);
|
filterList.removeChild(filterNothing);
|
||||||
}
|
}
|
||||||
var li = document.createElement('li');
|
var li = document.createElement('li');
|
||||||
li.innerText =
|
li.innerText =
|
||||||
'By ' +
|
|
||||||
by[0].toUpperCase() +
|
by[0].toUpperCase() +
|
||||||
by.substr(1) +
|
by.substr(1) +
|
||||||
' ' +
|
' ' +
|
||||||
filterMods[modifier] +
|
filterMods[modifier] +
|
||||||
' ' +
|
' "' +
|
||||||
expr;
|
expr +
|
||||||
|
'"';
|
||||||
li.dataset.filter = filterExpr;
|
li.dataset.filter = filterExpr;
|
||||||
var nukeButton = document.createElement('button');
|
var nukeButton = document.createElement('button');
|
||||||
nukeButton.type = 'button';
|
nukeButton.type = 'button';
|
||||||
@ -110,6 +124,7 @@ function addFilter(event) {
|
|||||||
};
|
};
|
||||||
li.appendChild(nukeButton);
|
li.appendChild(nukeButton);
|
||||||
filterList.appendChild(li);
|
filterList.appendChild(li);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
function clearFilters(event) {
|
function clearFilters(event) {
|
||||||
@ -140,11 +155,13 @@ function performQuery(url) {
|
|||||||
}
|
}
|
||||||
var fromT = document.getElementById(elemIds.dateTime.from).value;
|
var fromT = document.getElementById(elemIds.dateTime.from).value;
|
||||||
if (fromT) {
|
if (fromT) {
|
||||||
query.push('from=' + fromT.trim());
|
fromT += document.getElementById(elemIds.dateTime.fromTime).value;
|
||||||
|
query.push('from=' + fromT.replace(/[^0-9]/g, ''));
|
||||||
}
|
}
|
||||||
var toT = document.getElementById(elemIds.dateTime.to).value;
|
var toT = document.getElementById(elemIds.dateTime.to).value;
|
||||||
if (toT) {
|
if (toT) {
|
||||||
query.push('to=' + toT.trim());
|
toT += document.getElementById(elemIds.dateTime.toTime).value;
|
||||||
|
query.push('to=' + toT.replace(/[^0-9]/g, ''));
|
||||||
}
|
}
|
||||||
var builtQuery = query.join('&');
|
var builtQuery = query.join('&');
|
||||||
if (document.getElementById(elemIds.resultsNewWindow).checked) {
|
if (document.getElementById(elemIds.resultsNewWindow).checked) {
|
||||||
@ -166,6 +183,17 @@ function validateFields(form) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function submitForm(event, form, searchURLInput) {
|
||||||
|
event.preventDefault();
|
||||||
|
event.stopPropagation();
|
||||||
|
var url = searchURLInput.value;
|
||||||
|
if (!url) {
|
||||||
|
validateFields(form);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
performQuery(url);
|
||||||
|
}
|
||||||
|
|
||||||
$(document).ready(function() {
|
$(document).ready(function() {
|
||||||
$('[data-toggle="tooltip"]').tooltip({
|
$('[data-toggle="tooltip"]').tooltip({
|
||||||
container: 'body',
|
container: 'body',
|
||||||
@ -179,21 +207,21 @@ $(document).ready(function() {
|
|||||||
elemIds.dateTime.to,
|
elemIds.dateTime.to,
|
||||||
document.getElementById(elemIds.dateTime.toBad)
|
document.getElementById(elemIds.dateTime.toBad)
|
||||||
);
|
);
|
||||||
|
document.getElementById(elemIds.resetSearchForm).onclick = resetSearchForm;
|
||||||
document.getElementById(elemIds.filtering.add).onclick = addFilter;
|
document.getElementById(elemIds.filtering.add).onclick = addFilter;
|
||||||
document.getElementById(elemIds.filtering.clear).onclick = clearFilters;
|
document.getElementById(elemIds.filtering.clear).onclick = clearFilters;
|
||||||
var searchURLInput = document.getElementById(elemIds.url);
|
var searchURLInput = document.getElementById(elemIds.url);
|
||||||
var form = document.getElementById(elemIds.form);
|
var form = document.getElementById(elemIds.form);
|
||||||
form.addEventListener('submit', function(event) {
|
form.addEventListener('submit', function(event) {
|
||||||
|
submitForm(event, form, searchURLInput);
|
||||||
|
});
|
||||||
|
var filteringExpression = document.getElementById(elemIds.filtering.expression);
|
||||||
|
filteringExpression.addEventListener("keypress", function(event) {
|
||||||
|
if (event.key === "Enter") {
|
||||||
event.preventDefault();
|
event.preventDefault();
|
||||||
event.stopPropagation();
|
if (! addFilter()) {
|
||||||
var url = searchURLInput.value;
|
submitForm(event, form, searchURLInput);
|
||||||
if (!url) {
|
|
||||||
validateFields(form);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
performQuery(url);
|
|
||||||
});
|
|
||||||
document.getElementById(elemIds.advancedOptions).onclick = function() {
|
|
||||||
validateFields(form);
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
});
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 2.5 KiB |
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
Wombat.js client-side rewriting engine for web archive replay
|
Wombat.js client-side rewriting engine for web archive replay
|
||||||
Copyright (C) 2014-2020 Webrecorder Software, Rhizome, and Contributors. Released under the GNU Affero General Public License.
|
Copyright (C) 2014-2024 Webrecorder Software, Rhizome, and Contributors. Released under the GNU Affero General Public License.
|
||||||
|
|
||||||
This file is part of wombat.js, see https://github.com/webrecorder/wombat.js for the full source
|
This file is part of wombat.js, see https://github.com/webrecorder/wombat.js for the full source
|
||||||
Wombat.js is part of the Webrecorder project (https://github.com/webrecorder)
|
Wombat.js is part of the Webrecorder project (https://github.com/webrecorder)
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
Wombat.js client-side rewriting engine for web archive replay
|
Wombat.js client-side rewriting engine for web archive replay
|
||||||
Copyright (C) 2014-2020 Webrecorder Software, Rhizome, and Contributors. Released under the GNU Affero General Public License.
|
Copyright (C) 2014-2024 Webrecorder Software, Rhizome, and Contributors. Released under the GNU Affero General Public License.
|
||||||
|
|
||||||
This file is part of wombat.js, see https://github.com/webrecorder/wombat.js for the full source
|
This file is part of wombat.js, see https://github.com/webrecorder/wombat.js for the full source
|
||||||
Wombat.js is part of the Webrecorder project (https://github.com/webrecorder)
|
Wombat.js is part of the Webrecorder project (https://github.com/webrecorder)
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 1.6 KiB |
@ -3,7 +3,7 @@
|
|||||||
{% block body %}
|
{% block body %}
|
||||||
<div class="container text-danger error">
|
<div class="container text-danger error">
|
||||||
<div class="row justify-content-center">
|
<div class="row justify-content-center">
|
||||||
<h2 class="display-2">Pywb Error</h2>
|
<h2 class="display-2">{{ _('Pywb Error') }}</h2>
|
||||||
</div>
|
</div>
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="col-12 text-center">
|
<div class="col-12 text-center">
|
||||||
|
@ -25,8 +25,21 @@ html, body
|
|||||||
|
|
||||||
<div id="app" style="width: 100%; height: 200px"></div>
|
<div id="app" style="width: 100%; height: 200px"></div>
|
||||||
<script>
|
<script>
|
||||||
VueUI.main("{{ static_prefix }}", "{{ url }}", "{{ wb_prefix }}", "{{ timestamp }}", "{{ ui.logo }}", "{{ ui.navbar_background_hex | default('f8f9fa') }}", "{{ ui.navbar_color_hex | default('212529') }}", "{{ ui.navbar_light_buttons }}", "{{ env.pywb_lang | default('en') }}",
|
VueUI.main({
|
||||||
allLocales, i18nStrings);
|
staticPrefix: "{{ static_prefix }}",
|
||||||
|
url: "{{ url }}",
|
||||||
|
prefix: "{{ wb_prefix }}",
|
||||||
|
timestamp: "{{ timestamp }}",
|
||||||
|
logoUrl: "{{ ui.logo }}",
|
||||||
|
navbarBackground: "{{ ui.navbar_background_hex | default('f8f9fa') }}",
|
||||||
|
navbarColor: "{{ ui.navbar_color_hex | default('212529') }}",
|
||||||
|
navbarLightButtons: "{{ ui.navbar_light_buttons }}",
|
||||||
|
logoHomeUrl: "{{ ui.logo_home_url }}",
|
||||||
|
disablePrinting: "{{ ui.disable_printing }}",
|
||||||
|
allLocales: allLocales
|
||||||
|
},
|
||||||
|
"{{ env.pywb_lang | default('en') }}",
|
||||||
|
i18nStrings);
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<div id="wb_iframe_div">
|
<div id="wb_iframe_div">
|
||||||
|
216
pywb/templates/instructions.html
Normal file
216
pywb/templates/instructions.html
Normal file
@ -0,0 +1,216 @@
|
|||||||
|
<div class="modal fade" id="searchInstructions" tabindex="-1" role="dialog" aria-labelledby="searchInstructionsTitle" aria-hidden="true">
|
||||||
|
<div class="modal-dialog modal-lg" role="document">
|
||||||
|
<div class="modal-content">
|
||||||
|
<div class="modal-header">
|
||||||
|
<h6 class="modal-title text-muted" id="searchInstructionsTitle">{{ _("Search instructions") }}</h6>
|
||||||
|
<button type="button" class="close" data-dismiss="modal" aria-label="{{ _('Close') }}">
|
||||||
|
<span aria-hidden="true">×</span>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<div class="modal-body">
|
||||||
|
<h5>{{ _("URL") }}</h5>
|
||||||
|
<table class="table table-hover table-condensed">
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<p>
|
||||||
|
{%trans%}A URL consists of several parts:{%endtrans%}
|
||||||
|
{%trans%}<code>protocol</code>://<code>host</code>:<code>port</code>/<code>path</code>?<code>query</code>{%endtrans%}
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
{%trans%}The <code>protocol://</code> prefix is ignored when searching as it's not part of the searchable data.{%endtrans%}
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
{%trans%}A leading <kbd>www.</kbd> in the <code>host</code> will also be ignored for the same reason.{%endtrans%}
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
{%trans%}The <code>host</code> contains one or more parts separated by periods (<kbd>.</kbd>).{%endtrans%}
|
||||||
|
{%trans%}The part before the first period is called the <code>hostname</code>.{%endtrans%}
|
||||||
|
{%trans%}The part after the last period is the <code>top level domain</code>.{%endtrans%}
|
||||||
|
{%trans%}Every part added to the left of the top level domain <code>sub-domain</code>.{%endtrans%}
|
||||||
|
{%trans%}I.e. <code>x.y.z</code> is a <code>sub-domain</code> of <code>y.z</code>{%endtrans%}
|
||||||
|
{%trans%}which in turn is a <code>sub-domain</code> of the <code>top level domain</code> <code>z</code>{%endtrans%}
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
{%trans%}See <em>Match Type</em> below for interpretations of the search string.{%endtrans%}
|
||||||
|
</p>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<h5>{{ _("Results Display") }}</h5>
|
||||||
|
<table class="table table-hover table-condensed">
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<p>
|
||||||
|
{%trans%}For the <em>Default</em> search mode, the results are shown in a calendar view unless a filter is also added.{%endtrans%}
|
||||||
|
{%trans%}For all other cases the results will be displayed in a list.{%endtrans%}
|
||||||
|
</p>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<h5>{{ _("Search Options") }}</h5>
|
||||||
|
<h6>{{ _("Match Type") }}</h6>
|
||||||
|
<p> {{ _("There are four different search modes:") }}</p>
|
||||||
|
|
||||||
|
<table class="table table-hover table-condensed">
|
||||||
|
<tr>
|
||||||
|
<td><em>{{ _("Default") }}</em></td>
|
||||||
|
<td>
|
||||||
|
<p>
|
||||||
|
{%trans%}In the default mode the exact URL (minus the ignored prefixes mentioned above) is searched for.{%endtrans%}
|
||||||
|
{%trans%}If one leading or trailing wildcard asterisk (<kbd>*</kbd>) is added, see <em>Prefix</em> and <em>Domain</em> below.{%endtrans%}
|
||||||
|
</p>
|
||||||
|
<p class="text-muted">
|
||||||
|
{%trans%}Any other asterisks will be considered literal parts of the search string.{%endtrans%}
|
||||||
|
{%trans%}Hence, adding both a leading and a trailing wildcard asterisk is not possible.{%endtrans%}
|
||||||
|
</p>
|
||||||
|
|
||||||
|
{%trans%}Example:{%endtrans%}
|
||||||
|
<p class="ml-5 text-lowercase">
|
||||||
|
<em>{{ _("URL") }}: <strong>https://http.cat/206</strong></em> & <em>{{ _("Match Type") }}: <strong>{{ _("Default") }}</strong></em>
|
||||||
|
<span class="float-right">
|
||||||
|
<button onclick="fillForm('search-url=https://http.cat/206&match-type-select=');" class="btn btn-outline-info" role="button" aria-label="{{ _('Fill') }}">{{ _('Fill') }}</button>
|
||||||
|
<button onclick="fillForm('search-url=https://http.cat/206&match-type-select=', true);" class="btn btn-outline-primary" role="button" aria-label="{{ _('Search') }}">{{ _('Search') }}</button>
|
||||||
|
</span>
|
||||||
|
</p>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td><em>{{ _("Prefix") }}</em></td>
|
||||||
|
<td>
|
||||||
|
<p>
|
||||||
|
{%trans%}This will return all URL:s that begin with the given string.{%endtrans%}
|
||||||
|
{%trans%}It returns the same results as <em>Default</em> with a trailing wildcard asterisk.{%endtrans%}
|
||||||
|
</p>
|
||||||
|
|
||||||
|
{%trans%}Examples:{%endtrans%}
|
||||||
|
<p class="ml-5 text-lowercase">
|
||||||
|
<em>{{ _("URL") }}: <strong>https://http.cat/2</strong></em> & <em>{{ _("Match Type") }}: <strong>{{ _("Prefix") }}</strong></em>
|
||||||
|
<span class="float-right">
|
||||||
|
<button onclick="fillForm('search-url=https://http.cat/2&match-type-select=prefix');" class="btn btn-outline-info" role="button" aria-label="{{ _('Fill') }}">{{ _('Fill') }}</button>
|
||||||
|
<button onclick="fillForm('search-url=https://http.cat/2&match-type-select=prefix', true);" class="btn btn-outline-primary" role="button" aria-label="{{ _('Search') }}">{{ _('Search') }}</button>
|
||||||
|
</span>
|
||||||
|
</p>
|
||||||
|
<p class="ml-5 text-lowercase">
|
||||||
|
<em>{{ _("URL") }}: <strong>https://http.cat/2*</strong></em> & <em>{{ _("Match Type") }}: <strong>{{ _("Default") }}</strong></em>
|
||||||
|
<span class="float-right">
|
||||||
|
<button onclick="fillForm('search-url=https://http.cat/2*&match-type-select=');" class="btn btn-outline-info" role="button" aria-label="{{ _('Fill') }}">{{ _('Fill') }}</button>
|
||||||
|
<button onclick="fillForm('search-url=https://http.cat/2*&match-type-select=', true);" class="btn btn-outline-primary" role="button" aria-label="{{ _('Search') }}">{{ _('Search') }}</button>
|
||||||
|
</span>
|
||||||
|
</p>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td><em>{{ _("Host") }}</em></td>
|
||||||
|
<td>
|
||||||
|
<p>
|
||||||
|
{%trans%}This will ignore any path and query parts of the URL and return all URL:s with the specified <code>host</code> part.{%endtrans%}
|
||||||
|
</p>
|
||||||
|
|
||||||
|
{%trans%}Example:{%endtrans%}
|
||||||
|
<p class="ml-5 text-lowercase">
|
||||||
|
<em>{{ _("URL") }}: <strong>https://http.cat/</strong></em> & <em>{{ _("Match Type") }}: <strong>{{ _("Host") }}</strong></em>
|
||||||
|
<span class="float-right">
|
||||||
|
<button onclick="fillForm('search-url=https://http.cat/&match-type-select=host');" class="btn btn-outline-info" role="button" aria-label="{{ _('Fill') }}">{{ _('Fill') }}</button>
|
||||||
|
<button onclick="fillForm('search-url=https://http.cat/&match-type-select=host', true);" class="btn btn-outline-primary" role="button" aria-label="{{ _('Search') }}">{{ _('Search') }}</button>
|
||||||
|
</span>
|
||||||
|
</p>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td><em>{{ _("Domain") }}</em></td>
|
||||||
|
<td>
|
||||||
|
<p>
|
||||||
|
{%trans%}This is similar to the previous but doesn't require the whole <code>host</code>.{%endtrans%}
|
||||||
|
{%trans%}It returns the same results as <em>Default</em> with a leading wildcard asterisk and a period (i.e. <kbd>*.</kbd>).{%endtrans%}
|
||||||
|
{%trans%}The leading wildcard matches zero or more <code>sub-domains</code> as well as zero or one <code>hostname</code>.{%endtrans%}
|
||||||
|
</p>
|
||||||
|
|
||||||
|
{%trans%}Examples:{%endtrans%}
|
||||||
|
<p class="ml-5 text-lowercase">
|
||||||
|
<em>{{ _("URL") }}: <strong>cat/</strong></em> & <em>{{ _("Match Type") }}: <strong>{{ _("Domain") }}</strong></em>
|
||||||
|
<span class="float-right">
|
||||||
|
<button onclick="fillForm('search-url=cat/&match-type-select=domain');" class="btn btn-outline-info" role="button" aria-label="{{ _('Fill') }}">{{ _('Fill') }}</button>
|
||||||
|
<button onclick="fillForm('search-url=cat/&match-type-select=domain', true);" class="btn btn-outline-primary" role="button" aria-label="{{ _('Search') }}">{{ _('Search') }}</button>
|
||||||
|
</span>
|
||||||
|
</p>
|
||||||
|
<p class="ml-5 text-lowercase">
|
||||||
|
<em>{{ _("URL") }}: <strong>*.cat/</strong></em> & <em>{{ _("Match Type") }}: <strong>{{ _("Default") }}</strong></em>
|
||||||
|
<span class="float-right">
|
||||||
|
<button onclick="fillForm('search-url=*.cat/&match-type-select=');" class="btn btn-outline-info" role="button" aria-label="{{ _('Fill') }}">{{ _('Fill') }}</button>
|
||||||
|
<button onclick="fillForm('search-url=*.cat/&match-type-select=', true);" class="btn btn-outline-primary" role="button" aria-label="{{ _('Search') }}">{{ _('Search') }}</button>
|
||||||
|
</span>
|
||||||
|
</p>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<h6>{{ _("Date/Time Range") }}</h6>
|
||||||
|
<table class="table table-hover table-condensed">
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<p>
|
||||||
|
{%trans%}One may specify a start and/or an end timestamp to further restrict the search - both are inclusive.{%endtrans%}
|
||||||
|
{%trans%}The timestamps consist of a date and an optional time of day.{%endtrans%}
|
||||||
|
{%trans%}The layout of these input fields are subject to which browser is used.{%endtrans%}
|
||||||
|
</p>
|
||||||
|
|
||||||
|
{%trans%}Example:{%endtrans%}
|
||||||
|
<p class="ml-5 text-lowercase">
|
||||||
|
<em>{{ _("URL") }}: <strong>https://http.cat/2</strong></em> & <em>{{ _("Match Type") }}: <strong>{{ _("Prefix") }}</strong></em> & <em>{{ _("From") }}: <strong>2022-02-02 09:00</strong></em>
|
||||||
|
<span class="float-right">
|
||||||
|
<button onclick="fillForm('search-url=https://http.cat/2&match-type-select=prefix&dt-from=2022-02-02&ts-from=09:00');" class="btn btn-outline-info" role="button" aria-label="{{ _('Fill') }}">{{ _('Fill') }}</button>
|
||||||
|
<button onclick="fillForm('search-url=https://http.cat/2&match-type-select=prefix&dt-from=2022-02-02&ts-from=09:00', true);" class="btn btn-outline-primary" role="button" aria-label="{{ _('Search') }}">{{ _('Search') }}</button>
|
||||||
|
</span>
|
||||||
|
</p>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<h6>{{ _("Filtering") }}</h6>
|
||||||
|
<table class="table table-hover table-condensed">
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<p>
|
||||||
|
{%trans%}Finally one may add extra filters for Mime Type, Status and URL.{%endtrans%}
|
||||||
|
{%trans%}For each filter one needs to specify one of the three attributes, one of a set of relations and a string.{%endtrans%}
|
||||||
|
{%trans%}If more than one filter is added, they will all be applied to the list of results.{%endtrans%}
|
||||||
|
</p>
|
||||||
|
<p class="text-muted">{%trans%}Remember to actually add the filter before submitting the search.{%endtrans%}</p>
|
||||||
|
|
||||||
|
{%trans%}Example:{%endtrans%}
|
||||||
|
<p class="ml-5 text-lowercase">
|
||||||
|
<em>{{ _("URL") }}: <strong>https://http.cat/2/</strong></em> & <em>{{ _("Match Type") }}: <strong>{{ _("Prefix") }}</strong></em> & <em>{{ _("Filtering") }}: <strong>{{ _("HTTP Status") }} {{ _("Is Not") }} "301"</strong></em>
|
||||||
|
<span class="float-right">
|
||||||
|
<button onclick="fillForm('search-url=https://http.cat/2&match-type-select=prefix&filter-by=status&filter-modifier==!=&filter-expression=301');" class="btn btn-outline-info" role="button" aria-label="{{ _('Fill') }}">{{ _('Fill') }}</button>
|
||||||
|
<button onclick="fillForm('search-url=https://http.cat/2&match-type-select=prefix&filter-by=status&filter-modifier==!=&filter-expression=301', true);" class="btn btn-outline-primary" role="button" aria-label="{{ _('Search') }}">{{ _('Search') }}</button>
|
||||||
|
</span>
|
||||||
|
</p>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
function fillForm(query, search = false) {
|
||||||
|
$('#searchInstructions').modal('hide');
|
||||||
|
$('#advancedOptions').collapse('show');
|
||||||
|
for (const item of query.split('&')) {
|
||||||
|
var pair = item.split('=');
|
||||||
|
var field = document.getElementById(pair[0]);
|
||||||
|
if (field) field.value = pair.slice(1).join('=');
|
||||||
|
if (pair[0] == "filter-expression") addFilter(event);
|
||||||
|
}
|
||||||
|
if (search) $('#search-button').click();
|
||||||
|
}
|
||||||
|
</script>
|
@ -69,8 +69,22 @@
|
|||||||
'host': "{{ _('host') }}",
|
'host': "{{ _('host') }}",
|
||||||
'domain': "{{ _('domain') }}",
|
'domain': "{{ _('domain') }}",
|
||||||
},
|
},
|
||||||
|
from: "{{ _('From') }}",
|
||||||
|
until: "{{ _('until') }}",
|
||||||
|
present: "{{ _('present') }}",
|
||||||
|
earliest: "{{ _('earliest') }}",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
var filterMods = {
|
||||||
|
'=': "{{ _('Contains') }}",
|
||||||
|
'==': "{{ _('Matches Exactly') }}",
|
||||||
|
'=~': "{{ _('Matches Regex') }}",
|
||||||
|
'=!': "{{ _('Does Not Contain') }}",
|
||||||
|
'=!=': "{{ _('Is Not') }}",
|
||||||
|
'=!~': "{{ _('Does Not Begin With') }}"
|
||||||
|
};
|
||||||
|
var filteringBy = "{{ _('Filtering by') }}";
|
||||||
|
var forString = " {{ _('for') }} ";
|
||||||
var renderCal = new RenderCalendar({ prefix: "{{ prefix }}", staticPrefix: "{{ static_prefix }}", text: text });
|
var renderCal = new RenderCalendar({ prefix: "{{ prefix }}", staticPrefix: "{{ static_prefix }}", text: text });
|
||||||
renderCal.init();
|
renderCal.init();
|
||||||
</script>
|
</script>
|
||||||
@ -80,8 +94,21 @@
|
|||||||
<div id="app" style="width: 100%; height: 100%"></div>
|
<div id="app" style="width: 100%; height: 100%"></div>
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
VueUI.main("{{ static_prefix }}", "{{ url }}", "{{ prefix }}", undefined, "{{ ui.logo }}", "{{ ui.navbar_background_hex | default('f8f9fa') }}", "{{ ui.navbar_color_hex | default('212529') }}", "{{ ui.navbar_light_buttons }}", "{{ env.pywb_lang | default('en') }}",
|
VueUI.main({
|
||||||
allLocales, i18nStrings);
|
staticPrefix: "{{ static_prefix }}",
|
||||||
|
url: "{{ url }}",
|
||||||
|
prefix: "{{ prefix }}",
|
||||||
|
timestamp: undefined,
|
||||||
|
logoUrl: "{{ ui.logo }}",
|
||||||
|
navbarBackground: "{{ ui.navbar_background_hex | default('f8f9fa') }}",
|
||||||
|
navbarColor: "{{ ui.navbar_color_hex | default('212529') }}",
|
||||||
|
navbarLightButtons: "{{ ui.navbar_light_buttons }}",
|
||||||
|
logoHomeUrl: "{{ ui.logo_home_url }}",
|
||||||
|
disablePrinting: "{{ ui.disable_printing }}",
|
||||||
|
allLocales: allLocales
|
||||||
|
},
|
||||||
|
"{{ env.pywb_lang | default('en') }}",
|
||||||
|
i18nStrings);
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
@ -3,6 +3,16 @@
|
|||||||
{% block head %}
|
{% block head %}
|
||||||
{{ super() }}
|
{{ super() }}
|
||||||
<script>
|
<script>
|
||||||
|
var filterMods = {
|
||||||
|
'=': "{{ _('Contains') }}",
|
||||||
|
'==': "{{ _('Matches Exactly') }}",
|
||||||
|
'=~': "{{ _('Matches Regex') }}",
|
||||||
|
'=!': "{{ _('Does Not Contain') }}",
|
||||||
|
'=!=': "{{ _('Is Not') }}",
|
||||||
|
'=!~': "{{ _('Does Not Begin With') }}"
|
||||||
|
};
|
||||||
|
var noFilter = "{{ _('No Filter') }}";
|
||||||
|
|
||||||
// TODO: cleanup
|
// TODO: cleanup
|
||||||
window.wb_prefix = "{{ wb_prefix }}";
|
window.wb_prefix = "{{ wb_prefix }}";
|
||||||
</script>
|
</script>
|
||||||
@ -21,17 +31,22 @@ window.wb_prefix = "{{ wb_prefix }}";
|
|||||||
<form class="needs-validation" id="search-form" novalidate>
|
<form class="needs-validation" id="search-form" novalidate>
|
||||||
<div class="form-row">
|
<div class="form-row">
|
||||||
<div class="col-12">
|
<div class="col-12">
|
||||||
<label for="search-url" class="lead" aria-label="Search For Col">
|
<label for="search-url" class="lead" aria-label="{{ _('Search Collection') }}">
|
||||||
{% set coll_title = metadata.title if metadata and metadata.title else coll %}
|
{% set coll_title = metadata.title if metadata and metadata.title else coll %}
|
||||||
{% autoescape false %}
|
{% autoescape false %}
|
||||||
{% trans %}Search the {{ coll_title }} collection by url:{% endtrans %}
|
{% trans %}Search the {{ coll_title }} collection by url:{% endtrans %}
|
||||||
{% endautoescape %}
|
{% endautoescape %}
|
||||||
</label>
|
</label>
|
||||||
<input aria-label="url" aria-required="true" class="form-control form-control-lg" id="search-url"
|
<a tabindex="0" class="btn btn-sm float-right btn-light" role="button" data-toggle="modal" data-target="#searchInstructions">{{ _('Help') }}</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="form-row">
|
||||||
|
<div class="col-12">
|
||||||
|
<input aria-label="{{ _('URL') }}" aria-required="true" class="form-control form-control-lg" id="search-url"
|
||||||
name="search" placeholder="{{ _('Enter a URL to search for') }}"
|
name="search" placeholder="{{ _('Enter a URL to search for') }}"
|
||||||
title="{{ _('Enter a URL to search for') }}" type="search" required/>
|
title="{{ _('Enter a URL to search for') }}" type="search" required autofocus />
|
||||||
<div class="invalid-feedback">
|
<div class="invalid-feedback">
|
||||||
{% trans %}'Please enter a URL{% endtrans %}
|
{% trans %}Please enter a URL{% endtrans %}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -43,23 +58,26 @@ window.wb_prefix = "{{ wb_prefix }}";
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="col-7">
|
<div class="col-7">
|
||||||
<button type="submit" class="btn btn-outline-primary float-right" role="button" aria-label="Search">
|
<button type="submit" id="search-button" class="btn btn-primary float-right" role="button" aria-label="{{ _('Search') }}">
|
||||||
{% trans %}Search{% endtrans %}
|
{% trans %}Search{% endtrans %}
|
||||||
</button>
|
</button>
|
||||||
<button class="btn btn-outline-info float-right mr-3" type="button" role="button"
|
<button class="btn btn-outline-secondary float-right mr-3" type="button" role="button"
|
||||||
data-toggle="collapse" data-target="#advancedOptions" id="advanced-options"
|
data-toggle="collapse" data-target="#advancedOptions" id="advanced-options"
|
||||||
aria-expanded="false" aria-controls="advancedOptions" aria-label="Advanced Search Options">
|
aria-expanded="false" aria-controls="advancedOptions" aria-label="{{ _('Search Options') }}">
|
||||||
{{ _('Advanced Search Options') }}
|
{{ _('Search Options') }}
|
||||||
|
</button>
|
||||||
|
<button id="reset-search-form" class="btn btn-outline-danger float-right mr-3" type="button" role="button" aria-label="{{ _('Reset Options') }}">
|
||||||
|
{{ _('Reset') }}
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="collapse mt-3" id="advancedOptions">
|
<div class="collapse mt-3" id="advancedOptions">
|
||||||
<div class="form-group form-row">
|
<div class="form-group form-row">
|
||||||
<label for="match-type-select" class="col-sm-2 col-form-label" aria-label="Match Type">
|
<label for="match-type-select" class="col-sm-2 col-form-label" aria-label="{{ _('Match Type') }}">
|
||||||
{{ _('Match Type:') }}
|
{{ _('Match Type:') }}
|
||||||
</label>
|
</label>
|
||||||
<select id="match-type-select" class="form-control form-control col-sm-6">
|
<select id="match-type-select" class="form-control form-control col-sm-6">
|
||||||
<option value=""></option>
|
<option value="">{% trans %}Default{% endtrans %}</option>
|
||||||
<option value="prefix">{% trans %}Prefix{% endtrans %}</option>
|
<option value="prefix">{% trans %}Prefix{% endtrans %}</option>
|
||||||
<option value="host">{% trans %}Host{% endtrans %}</option>
|
<option value="host">{% trans %}Host{% endtrans %}</option>
|
||||||
<option value="domain">{% trans %}Domain{% endtrans %}</option>
|
<option value="domain">{% trans %}Domain{% endtrans %}</option>
|
||||||
@ -67,57 +85,43 @@ window.wb_prefix = "{{ wb_prefix }}";
|
|||||||
</div>
|
</div>
|
||||||
<p style="cursor: help;">
|
<p style="cursor: help;">
|
||||||
<span data-toggle="tooltip" data-placement="right"
|
<span data-toggle="tooltip" data-placement="right"
|
||||||
title="Restricts the results to the given date/time range (inclusive)">
|
title="{{ _('Restricts the results to the given date/time range (inclusive)') }}">
|
||||||
{{ _('Date/Time Range') }}
|
{{ _('Date/Time Range') }}
|
||||||
</span>
|
</span>
|
||||||
</p>
|
</p>
|
||||||
<div class="form-row">
|
<div class="form-row">
|
||||||
<div class="col-6">
|
<div class="col-6">
|
||||||
<label class="sr-only" for="dt-from" aria-label="Date/Time Range From">{% trans %}From:{% endtrans %}</label>
|
<label class="sr-only" for="dt-from" aria-label="{{ _('Date/Time Range From') }}">{% trans %}From:{% endtrans %}</label>
|
||||||
<div class="input-group">
|
<div class="input-group">
|
||||||
<div class="input-group-prepend">
|
<div class="input-group-prepend">
|
||||||
<div class="input-group-text">{% trans %}From:{% endtrans %}</div>
|
<div class="input-group-text">{% trans %}From:{% endtrans %}</div>
|
||||||
</div>
|
</div>
|
||||||
<input id="dt-from" type="number" name="date-range-from" class="form-control"
|
<input id="dt-from" type="date" placeholder="yyyy-mm-dd" name="date-range-from" class="form-control">
|
||||||
pattern="^\d{4,14}$">
|
<input id="ts-from" type="time" placeholder="hh:mm:ss" name="date-range-from-ts" class="form-control">
|
||||||
<div class="invalid-feedback" id="dt-from-bad">
|
|
||||||
{% trans %}Please enter a valid <b>From</b> timestamp. Timestamps may be 4 <= ts <=14 digits{% endtrans %}
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="col-6">
|
<div class="col-6">
|
||||||
<label class="sr-only" for="dt-to" aria-label="Date/Time Range To">{% trans %}To:{% endtrans %}</label>
|
<label class="sr-only" for="dt-to" aria-label="{{ _('Date/Time Range To') }}">{% trans %}To:{% endtrans %}</label>
|
||||||
<div class="input-group">
|
<div class="input-group">
|
||||||
<div class="input-group-prepend">
|
<div class="input-group-prepend">
|
||||||
<div class="input-group-text">{% trans %}To:{% endtrans %}</div>
|
<div class="input-group-text">{% trans %}To:{% endtrans %}</div>
|
||||||
</div>
|
</div>
|
||||||
<input id="dt-to" type="number" name="date-range-to" class="form-control" pattern="^\d{4,14}$">
|
<input id="dt-to" type="date" placeholder="yyyy-mm-dd" name="date-range-to" class="form-control">
|
||||||
<div class="invalid-feedback" id="dt-to-bad">
|
<input id="ts-to" type="time" placeholder="hh:mm:ss" name="date-range-to-ts" class="form-control">
|
||||||
{% trans %}Please enter a valid <b>To</b> timestamp. Timestamps may be 4 <= ts <=14 digits{% endtrans %}
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="form-group mt-3">
|
<div class="form-group mt-3">
|
||||||
<div class="form-row">
|
<div class="form-row">
|
||||||
<div class="col-6">
|
<div class="col-12">
|
||||||
<p>{% trans %}Filtering{% endtrans %}</p>
|
<p>{% trans %}Filtering{% endtrans %}</p>
|
||||||
</div>
|
</div>
|
||||||
<div class="col-6">
|
|
||||||
<button id="clear-filters" class="btn btn-outline-warning float-right" type="button">
|
|
||||||
{% trans %}Clear Filters{% endtrans %}
|
|
||||||
</button>
|
|
||||||
<button id="add-filter" class="btn btn-outline-secondary float-right mr-2" type="button">
|
|
||||||
{% trans %}Add Filter{% endtrans %}
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
<div class="form-row">
|
<div class="form-row">
|
||||||
<div class="col-6">
|
<div class="col-6">
|
||||||
<div class="row pb-1">
|
<div class="row pb-1">
|
||||||
<label for="filter-by" class="col-form-label col-3">{% trans %}By:{% endtrans %}</label>
|
<label for="filter-by" class="col-form-label col-3">{% trans %}By:{% endtrans %}</label>
|
||||||
<select id="filter-by" class="form-control col-7">
|
<select id="filter-by" class="form-control col-7">
|
||||||
<option value="" selected></option>
|
|
||||||
<option value="mime">{% trans %}Mime Type{% endtrans %}</option>
|
<option value="mime">{% trans %}Mime Type{% endtrans %}</option>
|
||||||
<option value="status">{% trans %}Status{% endtrans %}</option>
|
<option value="status">{% trans %}Status{% endtrans %}</option>
|
||||||
<option value="url">{% trans %}URL{% endtrans %}</option>
|
<option value="url">{% trans %}URL{% endtrans %}</option>
|
||||||
@ -131,20 +135,27 @@ window.wb_prefix = "{{ wb_prefix }}";
|
|||||||
<option value="=~">{% trans %}Matches Regex{% endtrans %}</option>
|
<option value="=~">{% trans %}Matches Regex{% endtrans %}</option>
|
||||||
<option value="=!">{% trans %}Does Not Contain{% endtrans %}</option>
|
<option value="=!">{% trans %}Does Not Contain{% endtrans %}</option>
|
||||||
<option value="=!=">{% trans %}Is Not{% endtrans %}</option>
|
<option value="=!=">{% trans %}Is Not{% endtrans %}</option>
|
||||||
<option value="=!~">{% trans %}Does Not Begins With{% endtrans %}</option>
|
<option value="=!~">{% trans %}Does Not Begin With{% endtrans %}</option>
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
<div class="row">
|
<div class="row pb-1">
|
||||||
<label for="filter-expression" class="col-form-label col-3">{% trans %}Expr:{% endtrans %}</label>
|
<label for="filter-expression" class="col-form-label col-3">{% trans %}Expr:{% endtrans %}</label>
|
||||||
<input type="text" id="filter-expression" class="form-control col-7"
|
<input type="text" id="filter-expression" class="form-control col-7"
|
||||||
placeholder="{% trans %}Enter an expression to filter by{% endtrans %}"
|
placeholder="{% trans %}Enter an expression to filter by{% endtrans %}"
|
||||||
>
|
>
|
||||||
</div>
|
</div>
|
||||||
|
<button id="add-filter" class="btn btn-outline-secondary mt-2" type="button">
|
||||||
|
{% trans %}Add Filter{% endtrans %}
|
||||||
|
</button>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
<div class="col-6">
|
<div class="col-6">
|
||||||
<ul id="filter-list" class="filter-list">
|
<ul id="filter-list" class="filter-list">
|
||||||
<li id="filtering-nothing">{% trans %}No Filter{% endtrans %}</li>
|
<li id="filtering-nothing">{% trans %}No Filter{% endtrans %}</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
<button id="clear-filters" class="btn btn-outline-danger float-right mr-2" type="button">
|
||||||
|
{% trans %}Clear Filters{% endtrans %}
|
||||||
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -182,4 +193,5 @@ window.wb_prefix = "{{ wb_prefix }}";
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
{% include "instructions.html" %}
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
@ -43,10 +43,14 @@
|
|||||||
sat_long: "{{ _Q('Saturday') }}",
|
sat_long: "{{ _Q('Saturday') }}",
|
||||||
sun_long: "{{ _Q('Sunday') }}",
|
sun_long: "{{ _Q('Sunday') }}",
|
||||||
"All-time": "{{ _Q('All-time') }}",
|
"All-time": "{{ _Q('All-time') }}",
|
||||||
"show timeline":"{{ _Q('show timeline') }}",
|
"Show timeline":"{{ _Q('Show timeline') }}",
|
||||||
"hide timeline":"{{ _Q('hide timeline') }}",
|
"Hide timeline":"{{ _Q('Hide timeline') }}",
|
||||||
"show calendar":"{{ _Q('show calendar') }}",
|
"Show calendar":"{{ _Q('Show calendar') }}",
|
||||||
"hide calendar":"{{ _Q('hide calendar') }}",
|
"Hide calendar":"{{ _Q('Hide calendar') }}",
|
||||||
|
"Previous capture":"{{ _Q('Previous capture') }}",
|
||||||
|
"Next capture":"{{ _Q('Next capture') }}",
|
||||||
|
"Print":"{{ _Q('Print') }}",
|
||||||
|
"Select language":"{{ _Q('Select language') }}",
|
||||||
"View capture on {date}":"{{ _Q('View capture on {date}') }}",
|
"View capture on {date}":"{{ _Q('View capture on {date}') }}",
|
||||||
"{count} capture":"{{ _Q('{count} capture') }}",
|
"{count} capture":"{{ _Q('{count} capture') }}",
|
||||||
"{count} captures":"{{ _Q('{count} captures') }}",
|
"{count} captures":"{{ _Q('{count} captures') }}",
|
||||||
@ -58,6 +62,7 @@
|
|||||||
"capture": "{{ _Q('capture') }}",
|
"capture": "{{ _Q('capture') }}",
|
||||||
"captures": "{{ _Q('captures') }}",
|
"captures": "{{ _Q('captures') }}",
|
||||||
"from {hour1} to {hour2}": "{{ _Q('from {hour1} to {hour2}') }}",
|
"from {hour1} to {hour2}": "{{ _Q('from {hour1} to {hour2}') }}",
|
||||||
"no captures": "{{ _Q('no captures') }}"
|
"no captures": "{{ _Q('no captures') }}",
|
||||||
|
"Archived Page: ": "{{ _Q('Archived Page: ') }}"
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
@ -150,7 +150,7 @@ def iter_exact(reader, key, token=b' '):
|
|||||||
"""
|
"""
|
||||||
Create an iterator which iterates over lines where the first field matches
|
Create an iterator which iterates over lines where the first field matches
|
||||||
the 'key', equivalent to token + sep prefix.
|
the 'key', equivalent to token + sep prefix.
|
||||||
Default field termin_ator/seperator is ' '
|
Default field termin_ator/separator is ' '
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return iter_prefix(reader, key + token)
|
return iter_prefix(reader, key + token)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__version__ = '2.7.0'
|
__version__ = '2.8.3'
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(__version__)
|
print(__version__)
|
||||||
|
@ -4,9 +4,12 @@
|
|||||||
<nav
|
<nav
|
||||||
class="navbar navbar-light navbar-expand-lg fixed-top top-navbar justify-content-center"
|
class="navbar navbar-light navbar-expand-lg fixed-top top-navbar justify-content-center"
|
||||||
:style="navbarStyle">
|
:style="navbarStyle">
|
||||||
<a class="navbar-brand flex-grow-1 my-1" href="/">
|
<a class="navbar-brand flex-grow-1 my-1" :href="config.logoHomeUrl" v-if="config.logoHomeUrl">
|
||||||
<img :src="config.logoImg" id="logo-img" alt="_('pywb logo')">
|
<img :src="config.logoImg" id="logo-img" alt="_('pywb logo')">
|
||||||
</a>
|
</a>
|
||||||
|
<div class="navbar-brand flex-grow-1 my-1" v-else>
|
||||||
|
<img :src="config.logoImg" id="logo-img" alt="_('pywb logo')">
|
||||||
|
</div>
|
||||||
<div class="flex-grow-1 d-flex" id="searchdiv">
|
<div class="flex-grow-1 d-flex" id="searchdiv">
|
||||||
<form
|
<form
|
||||||
class="form-inline my-2 my-md-0 mx-lg-auto"
|
class="form-inline my-2 my-md-0 mx-lg-auto"
|
||||||
@ -69,11 +72,22 @@
|
|||||||
class="btn btn-sm"
|
class="btn btn-sm"
|
||||||
:class="{active: showTimelineView, 'btn-outline-light': lightButtons, 'btn-outline-dark': !lightButtons}"
|
:class="{active: showTimelineView, 'btn-outline-light': lightButtons, 'btn-outline-dark': !lightButtons}"
|
||||||
:aria-pressed="showTimelineView"
|
:aria-pressed="showTimelineView"
|
||||||
@click="showTimelineView = !showTimelineView"
|
@click="toggleTimelineView"
|
||||||
:title="(showTimelineView ? _('Hide timeline') : _('Show timeline'))">
|
:title="(showTimelineView ? _('Hide timeline') : _('Show timeline'))">
|
||||||
<i class="far fa-chart-bar"></i>
|
<i class="far fa-chart-bar"></i>
|
||||||
</button>
|
</button>
|
||||||
</li>
|
</li>
|
||||||
|
<li class="nav-item">
|
||||||
|
<button
|
||||||
|
class="btn btn-sm"
|
||||||
|
:class="{'btn-outline-light': lightButtons, 'btn-outline-dark': !lightButtons}"
|
||||||
|
:aria-pressed="printReplayFrame"
|
||||||
|
@click="printReplayFrame"
|
||||||
|
v-if="printingEnabled && hasReplayFrame()"
|
||||||
|
:title="_('Print')">
|
||||||
|
<i class="fas fa-print"></i>
|
||||||
|
</button>
|
||||||
|
</li>
|
||||||
<li class="nav-item dropdown" v-if="localesAreSet">
|
<li class="nav-item dropdown" v-if="localesAreSet">
|
||||||
<button
|
<button
|
||||||
class="btn btn-sm dropdown-toggle"
|
class="btn btn-sm dropdown-toggle"
|
||||||
@ -113,7 +127,7 @@
|
|||||||
{{ config.title }}
|
{{ config.title }}
|
||||||
</span>
|
</span>
|
||||||
</span>
|
</span>
|
||||||
<span class="mr-1" v-if="config.title">,</span>
|
<span class="mr-1" v-if="config.title">|</span>
|
||||||
{{currentSnapshot.getTimeDateFormatted()}}
|
{{currentSnapshot.getTimeDateFormatted()}}
|
||||||
</span>
|
</span>
|
||||||
</nav>
|
</nav>
|
||||||
@ -142,7 +156,7 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Calendar -->
|
<!-- Calendar -->
|
||||||
<div class="card" v-if="currentPeriod && showFullView && currentPeriod.children.length">
|
<div class="card" id="calendar-card" v-if="currentPeriod && showFullView && currentPeriod.children.length">
|
||||||
<div class="card-body" id="calendar-card-body">
|
<div class="card-body" id="calendar-card-body">
|
||||||
<CalendarYear
|
<CalendarYear
|
||||||
:period="currentPeriod"
|
:period="currentPeriod"
|
||||||
@ -173,8 +187,8 @@ export default {
|
|||||||
currentSnapshot: null,
|
currentSnapshot: null,
|
||||||
currentSnapshotIndex: null,
|
currentSnapshotIndex: null,
|
||||||
msgs: [],
|
msgs: [],
|
||||||
showFullView: true,
|
showFullView: false,
|
||||||
showTimelineView: true,
|
showTimelineView: false,
|
||||||
maxTimelineZoomLevel: PywbPeriod.Type.day,
|
maxTimelineZoomLevel: PywbPeriod.Type.day,
|
||||||
config: {
|
config: {
|
||||||
title: "",
|
title: "",
|
||||||
@ -192,6 +206,10 @@ export default {
|
|||||||
// when the user navigates there via browser back/forward buttons
|
// when the user navigates there via browser back/forward buttons
|
||||||
addEventListener('unload', (event) => { });
|
addEventListener('unload', (event) => { });
|
||||||
},
|
},
|
||||||
|
updated: function() {
|
||||||
|
// set top frame title equal to value pulled from replay frame
|
||||||
|
document.title = this._("Archived Page: ") + this.config.title;
|
||||||
|
},
|
||||||
computed: {
|
computed: {
|
||||||
sessionStorageUrlKey() {
|
sessionStorageUrlKey() {
|
||||||
// remove http(s), www and trailing slash
|
// remove http(s), www and trailing slash
|
||||||
@ -209,6 +227,9 @@ export default {
|
|||||||
lightButtons() {
|
lightButtons() {
|
||||||
return !!this.config.navbarLightButtons;
|
return !!this.config.navbarLightButtons;
|
||||||
},
|
},
|
||||||
|
printingEnabled() {
|
||||||
|
return !this.config.disablePrinting;
|
||||||
|
},
|
||||||
previousSnapshot() {
|
previousSnapshot() {
|
||||||
if (!this.currentSnapshotIndex) {
|
if (!this.currentSnapshotIndex) {
|
||||||
return null;
|
return null;
|
||||||
@ -277,7 +298,7 @@ export default {
|
|||||||
if (reloadIFrame !== false) {
|
if (reloadIFrame !== false) {
|
||||||
this.$emit("show-snapshot", snapshot);
|
this.$emit("show-snapshot", snapshot);
|
||||||
}
|
}
|
||||||
this.hideBannerUtilities();
|
this.initBannerState(true);
|
||||||
},
|
},
|
||||||
gotoPreviousSnapshot() {
|
gotoPreviousSnapshot() {
|
||||||
let periodToChangeTo = this.currentPeriod.findByFullId(this.previousSnapshot.getFullId());
|
let periodToChangeTo = this.currentPeriod.findByFullId(this.previousSnapshot.getFullId());
|
||||||
@ -290,10 +311,23 @@ export default {
|
|||||||
gotoUrl(event) {
|
gotoUrl(event) {
|
||||||
event.preventDefault();
|
event.preventDefault();
|
||||||
const newUrl = document.querySelector("#theurl").value;
|
const newUrl = document.querySelector("#theurl").value;
|
||||||
if (newUrl !== this.url) {
|
if (newUrl !== this.config.url) {
|
||||||
window.location.href = this.config.prefix + "*/" + newUrl;
|
const ts = this.config.timestamp === undefined ? "*" : this.config.timestamp;
|
||||||
|
window.location.href = this.config.prefix + ts + (ts ? "/" : "") + newUrl;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
toggleTimelineView() {
|
||||||
|
this.showTimelineView = !this.showTimelineView;
|
||||||
|
window.localStorage.setItem("showTimelineView", this.showTimelineView ? "1" : "0");
|
||||||
|
},
|
||||||
|
hasReplayFrame() {
|
||||||
|
return !! window.frames.replay_iframe;
|
||||||
|
},
|
||||||
|
printReplayFrame() {
|
||||||
|
window.frames.replay_iframe.contentWindow.focus();
|
||||||
|
window.frames.replay_iframe.contentWindow.print();
|
||||||
|
return false;
|
||||||
|
},
|
||||||
setData(/** @type {PywbData} data */ data) {
|
setData(/** @type {PywbData} data */ data) {
|
||||||
|
|
||||||
// data-set will usually happen at App INIT (from parent caller)
|
// data-set will usually happen at App INIT (from parent caller)
|
||||||
@ -317,6 +351,10 @@ export default {
|
|||||||
}.bind(this));
|
}.bind(this));
|
||||||
},
|
},
|
||||||
setSnapshot(view) {
|
setSnapshot(view) {
|
||||||
|
if (!this.currentPeriod) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// turn off calendar (aka full) view
|
// turn off calendar (aka full) view
|
||||||
this.showFullView = false;
|
this.showFullView = false;
|
||||||
|
|
||||||
@ -326,17 +364,21 @@ export default {
|
|||||||
this.config.url = view.url;
|
this.config.url = view.url;
|
||||||
|
|
||||||
let periodToChangeTo = this.currentPeriod.findByFullId(snapshot.getFullId());
|
let periodToChangeTo = this.currentPeriod.findByFullId(snapshot.getFullId());
|
||||||
|
if (periodToChangeTo) {
|
||||||
this.gotoPeriod(periodToChangeTo, false /* onlyZoomToPeriod */);
|
this.gotoPeriod(periodToChangeTo, false /* onlyZoomToPeriod */);
|
||||||
},
|
return true;
|
||||||
setTimelineView() {
|
|
||||||
this.showTimelineView = !this.showTimelineView;
|
|
||||||
if (this.showTimelineView === true) {
|
|
||||||
this.showFullView = false;
|
|
||||||
}
|
}
|
||||||
|
return false;
|
||||||
},
|
},
|
||||||
hideBannerUtilities() {
|
initBannerState(isReplay) {
|
||||||
|
// if not replay, always show both
|
||||||
|
if (!isReplay) {
|
||||||
|
this.showFullView = true;
|
||||||
|
this.showTimelineView = true;
|
||||||
|
} else {
|
||||||
this.showFullView = false;
|
this.showFullView = false;
|
||||||
this.showTimelineView = false;
|
this.showTimelineView = window.localStorage.getItem("showTimelineView") === "1";
|
||||||
|
}
|
||||||
},
|
},
|
||||||
updateTitle(title) {
|
updateTitle(title) {
|
||||||
this.config.title = title;
|
this.config.title = title;
|
||||||
@ -355,7 +397,10 @@ export default {
|
|||||||
width: 100%;
|
width: 100%;
|
||||||
}
|
}
|
||||||
.app.expanded {
|
.app.expanded {
|
||||||
height: 130px;
|
/*height: 130px;*/
|
||||||
|
max-height: calc(100vh - 90px);
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
}
|
}
|
||||||
.full-view {
|
.full-view {
|
||||||
/*position: fixed;*/
|
/*position: fixed;*/
|
||||||
@ -443,6 +488,10 @@ export default {
|
|||||||
div.timeline-wrap div.card {
|
div.timeline-wrap div.card {
|
||||||
margin-top: 55px;
|
margin-top: 55px;
|
||||||
}
|
}
|
||||||
|
#calendar-card {
|
||||||
|
overflow-y: auto;
|
||||||
|
max-height: 100%;
|
||||||
|
}
|
||||||
div.timeline-wrap div.card-body {
|
div.timeline-wrap div.card-body {
|
||||||
display: flex;
|
display: flex;
|
||||||
align-items: center;
|
align-items: center;
|
||||||
@ -453,6 +502,7 @@ export default {
|
|||||||
align-items: center;
|
align-items: center;
|
||||||
justify-content: center;
|
justify-content: center;
|
||||||
}
|
}
|
||||||
|
|
||||||
#calendar-card-body {
|
#calendar-card-body {
|
||||||
padding: 0;
|
padding: 0;
|
||||||
}
|
}
|
||||||
|
@ -126,8 +126,8 @@ export default {
|
|||||||
const days = [];
|
const days = [];
|
||||||
// Get days in month, and days in the complete weeks before first day and after last day
|
// Get days in month, and days in the complete weeks before first day and after last day
|
||||||
const [firstDay, lastDay] = this.month.getChildrenRange();
|
const [firstDay, lastDay] = this.month.getChildrenRange();
|
||||||
const daysBeforeFirst = (new Date(this.year.id, this.month.id-1, firstDay)).getDay();
|
const daysBeforeFirst = (7 + (new Date(this.year.id, this.month.id-1, firstDay)).getDay() - PywbI18N.firstDayOfWeek) % 7;
|
||||||
const daysAfterLastDay = (6 - (new Date(this.year.id, this.month.id-1, lastDay)).getDay());
|
const daysAfterLastDay = (6 - (new Date(this.year.id, this.month.id-1, lastDay)).getDay() + PywbI18N.firstDayOfWeek) % 7;
|
||||||
for(let i=0; i<daysBeforeFirst; i++) {
|
for(let i=0; i<daysBeforeFirst; i++) {
|
||||||
days.push(null);
|
days.push(null);
|
||||||
}
|
}
|
||||||
|
@ -163,7 +163,7 @@ export default {
|
|||||||
flex: 1;
|
flex: 1;
|
||||||
flex-wrap: wrap;
|
flex-wrap: wrap;
|
||||||
z-index: 10;
|
z-index: 10;
|
||||||
overflow-y: scroll;
|
overflow-y: auto;
|
||||||
width: 100%;
|
width: 100%;
|
||||||
background-color: white;
|
background-color: white;
|
||||||
padding-bottom: 1em;
|
padding-bottom: 1em;
|
||||||
|
@ -39,7 +39,7 @@
|
|||||||
@keyup.enter="changePeriod(histoPeriod, $event)"
|
@keyup.enter="changePeriod(histoPeriod, $event)"
|
||||||
@mouseover="setTooltipPeriod(histoPeriod, $event)"
|
@mouseover="setTooltipPeriod(histoPeriod, $event)"
|
||||||
@mouseout="setTooltipPeriod(null, $event)"
|
@mouseout="setTooltipPeriod(null, $event)"
|
||||||
tabindex="0"
|
:tabindex="histoPeriod.snapshotCount > 0 ? 0 : -1"
|
||||||
>
|
>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -49,7 +49,6 @@
|
|||||||
@keyup.enter="changePeriod(histoPeriod, $event)"
|
@keyup.enter="changePeriod(histoPeriod, $event)"
|
||||||
@mouseover="setTooltipPeriod(subPeriod, $event)"
|
@mouseover="setTooltipPeriod(subPeriod, $event)"
|
||||||
@mouseout="setTooltipPeriod(null, $event)"
|
@mouseout="setTooltipPeriod(null, $event)"
|
||||||
tabindex="0"
|
|
||||||
>
|
>
|
||||||
<div class="label">
|
<div class="label">
|
||||||
{{subPeriod.getReadableId()}}
|
{{subPeriod.getReadableId()}}
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
@keyup.enter="changePeriod(parents[0])"
|
@keyup.enter="changePeriod(parents[0])"
|
||||||
:title="getPeriodZoomOutText(parents[0])"
|
:title="getPeriodZoomOutText(parents[0])"
|
||||||
tabindex="1">
|
tabindex="1">
|
||||||
<img src="/static/zoom-out-icon-333316.png" /> {{parents[0].getReadableId(true)}}
|
<i class="fa fa-search-minus"></i> {{parents[0].getReadableId(true)}}
|
||||||
</span>
|
</span>
|
||||||
</span>
|
</span>
|
||||||
>
|
>
|
||||||
|
@ -3,12 +3,15 @@ export class PywbI18N {
|
|||||||
static getLocale() { // get via public static method
|
static getLocale() { // get via public static method
|
||||||
return PywbI18N.#locale;
|
return PywbI18N.#locale;
|
||||||
}
|
}
|
||||||
|
static firstDayOfWeek = 1;
|
||||||
static init = (locale, config) => {
|
static init = (locale, config) => {
|
||||||
if (PywbI18N.instance) {
|
if (PywbI18N.instance) {
|
||||||
throw new Error('cannot instantiate PywbI18N twice');
|
throw new Error('cannot instantiate PywbI18N twice');
|
||||||
}
|
}
|
||||||
PywbI18N.#locale = locale;
|
PywbI18N.#locale = locale;
|
||||||
PywbI18N.instance = new PywbI18N(config);
|
PywbI18N.instance = new PywbI18N(config);
|
||||||
|
let intlLocale = new Intl.Locale(PywbI18N.getLocale());
|
||||||
|
if ('weekInfo' in intlLocale) PywbI18N.firstDayOfWeek = intlLocale.weekInfo.firstDay % 7;
|
||||||
}
|
}
|
||||||
|
|
||||||
// PywbI18N expects from the i18n string source to receive months SHORT and LONG names in the config like this:
|
// PywbI18N expects from the i18n string source to receive months SHORT and LONG names in the config like this:
|
||||||
@ -29,14 +32,15 @@ export class PywbI18N {
|
|||||||
getMonth(id, type='long') {
|
getMonth(id, type='long') {
|
||||||
return decodeURIComponent(this.config[PywbI18N.monthIdPrefix[id]+'_'+type]);
|
return decodeURIComponent(this.config[PywbI18N.monthIdPrefix[id]+'_'+type]);
|
||||||
}
|
}
|
||||||
// can get long (default) or short day string or intial
|
// can get long (default) or short day string or initial
|
||||||
// PywbI18N expects to receive day's initials like:
|
// PywbI18N expects to receive day's initials like:
|
||||||
// config.mon_short, config.tue_long, ...., config.<mmm>_short, config.<mmm>_long
|
// config.mon_short, config.tue_long, ...., config.<mmm>_short, config.<mmm>_long
|
||||||
getWeekDay(id, type='long') {
|
getWeekDay(id, type='long') {
|
||||||
return decodeURIComponent(this.config[id+'_'+type])
|
return decodeURIComponent(this.config[id+'_'+type])
|
||||||
}
|
}
|
||||||
getWeekDays(type='long') {
|
getWeekDays(type='long') {
|
||||||
return ['sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'].map(d => this.getWeekDay(d, type));
|
let weekDays = ['sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'];
|
||||||
|
return weekDays.concat(weekDays).slice(PywbI18N.firstDayOfWeek, PywbI18N.firstDayOfWeek + 7).map(d => this.getWeekDay(d, type));
|
||||||
}
|
}
|
||||||
getText(id, embeddedVariableStrings=null) {
|
getText(id, embeddedVariableStrings=null) {
|
||||||
const translated = decodeURIComponent(this.config[id] || id);
|
const translated = decodeURIComponent(this.config[id] || id);
|
||||||
|
@ -7,38 +7,44 @@ import Vue from "vue/dist/vue.esm.browser";
|
|||||||
|
|
||||||
|
|
||||||
// ===========================================================================
|
// ===========================================================================
|
||||||
export function main(staticPrefix, url, prefix, timestamp, logoUrl, navbarBackground, navbarColor, navbarLightButtons, locale, allLocales, i18nStrings) {
|
export function main(config, locale, i18nStrings) {
|
||||||
PywbI18N.init(locale, i18nStrings);
|
PywbI18N.init(locale, i18nStrings);
|
||||||
new CDXLoader(staticPrefix, url, prefix, timestamp, logoUrl, navbarBackground, navbarColor, navbarLightButtons, allLocales);
|
new CDXLoader(config);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ===========================================================================
|
// ===========================================================================
|
||||||
class CDXLoader {
|
class CDXLoader {
|
||||||
constructor(staticPrefix, url, prefix, timestamp, logoUrl, navbarBackground, navbarColor, navbarLightButtons, allLocales) {
|
constructor(config) {
|
||||||
this.loadingSpinner = null;
|
this.loadingSpinner = null;
|
||||||
this.loaded = false;
|
this.loaded = false;
|
||||||
this.opts = {};
|
this.opts = {};
|
||||||
this.prefix = prefix;
|
this.url = config.url;
|
||||||
this.staticPrefix = staticPrefix;
|
this.prefix = config.prefix;
|
||||||
this.logoUrl = logoUrl;
|
this.staticPrefix = config.staticPrefix;
|
||||||
this.navbarBackground = navbarBackground;
|
this.logoUrl = config.logoUrl;
|
||||||
this.navbarColor = navbarColor;
|
this.logoHomeUrl = config.logoHomeUrl;
|
||||||
this.navbarLightButtons = navbarLightButtons
|
this.navbarBackground = config.navbarBackground;
|
||||||
|
this.navbarColor = config.navbarColor;
|
||||||
|
this.navbarLightButtons = config.navbarLightButtons;
|
||||||
|
this.disablePrinting = config.disablePrinting;
|
||||||
|
|
||||||
this.isReplay = (timestamp !== undefined);
|
this.timestamp = config.timestamp;
|
||||||
|
|
||||||
|
this.isReplay = (config.timestamp !== undefined);
|
||||||
|
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
if (!this.loaded) {
|
if (!this.loaded) {
|
||||||
this.loadingSpinner = new LoadingSpinner({text: PywbI18N.instance?.getText('Loading...'), isSmall: !!timestamp}); // bootstrap loading-spinner EARLY ON
|
this.loadingSpinner = new LoadingSpinner({text: PywbI18N.instance?.getText('Loading...'), isSmall: !!this.timestamp}); // bootstrap loading-spinner EARLY ON
|
||||||
this.loadingSpinner.setOn();
|
this.loadingSpinner.setOn();
|
||||||
}
|
}
|
||||||
}, 500);
|
}, 500);
|
||||||
|
|
||||||
if (this.isReplay) {
|
if (this.isReplay) {
|
||||||
window.WBBanner = new VueBannerWrapper(this, url);
|
window.WBBanner = new VueBannerWrapper(this, this.url, this.timestamp);
|
||||||
}
|
}
|
||||||
|
|
||||||
let queryURL;
|
let queryURL;
|
||||||
|
let url;
|
||||||
|
|
||||||
// query form *?=url...
|
// query form *?=url...
|
||||||
if (window.location.href.indexOf("*?") > 0) {
|
if (window.location.href.indexOf("*?") > 0) {
|
||||||
@ -46,22 +52,24 @@ class CDXLoader {
|
|||||||
url = new URL(queryURL).searchParams.get("url");
|
url = new URL(queryURL).searchParams.get("url");
|
||||||
|
|
||||||
// otherwise, traditional calendar form /*/<url>
|
// otherwise, traditional calendar form /*/<url>
|
||||||
} else if (url) {
|
} else if (this.url) {
|
||||||
|
url = this.url
|
||||||
const params = new URLSearchParams();
|
const params = new URLSearchParams();
|
||||||
params.set("url", url);
|
params.set("url", url);
|
||||||
params.set("output", "json");
|
params.set("output", "json");
|
||||||
queryURL = prefix + "cdx?" + params.toString();
|
queryURL = this.prefix + "cdx?" + params.toString();
|
||||||
|
|
||||||
// otherwise, an error since no URL
|
// otherwise, an error since no URL
|
||||||
} else {
|
} else {
|
||||||
throw new Error("No query URL specified");
|
throw new Error("No query URL specified");
|
||||||
}
|
}
|
||||||
|
|
||||||
const logoImg = this.staticPrefix + "/" + (this.logoUrl ? this.logoUrl : "pywb-logo-sm.png");
|
config.logoImg = this.staticPrefix + "/" + (!!this.logoUrl ? this.logoUrl : "pywb-logo-sm.png");
|
||||||
|
|
||||||
|
this.app = this.initApp(config);
|
||||||
|
|
||||||
this.app = this.initApp({logoImg, navbarBackground, navbarColor, navbarLightButtons, url, allLocales});
|
|
||||||
this.loadCDX(queryURL).then((cdxList) => {
|
this.loadCDX(queryURL).then((cdxList) => {
|
||||||
this.setAppData(cdxList, timestamp ? {url, timestamp}:null);
|
this.setAppData(cdxList, url, config.timestamp);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -72,19 +80,7 @@ class CDXLoader {
|
|||||||
|
|
||||||
app.$mount("#app");
|
app.$mount("#app");
|
||||||
|
|
||||||
// TODO (Ilya): make this work with in-page snapshot/capture/replay updates!
|
app.$on("show-snapshot", (snapshot) => this.loadSnapshot(snapshot));
|
||||||
// app.$on("show-snapshot", snapshot => {
|
|
||||||
// const replayUrl = app.config.url;
|
|
||||||
// const url = location.href.replace('/'+replayUrl, '').replace(/\d+$/, '') + snapshot.id + '/' + replayUrl;
|
|
||||||
// window.history.pushState({url: replayUrl, timestamp: snapshot.id}, document.title, url);
|
|
||||||
// if (!window.onpopstate) {
|
|
||||||
// window.onpopstate = (ev) => {
|
|
||||||
// updateSnapshot(ev.state.url, ev.state.timestamp);
|
|
||||||
// };
|
|
||||||
// }
|
|
||||||
// });
|
|
||||||
|
|
||||||
app.$on("show-snapshot", this.loadSnapshot.bind(this));
|
|
||||||
app.$on("data-set-and-render-completed", () => {
|
app.$on("data-set-and-render-completed", () => {
|
||||||
if (this.loadingSpinner) {
|
if (this.loadingSpinner) {
|
||||||
this.loadingSpinner.setOff(); // only turn off loading-spinner AFTER app has told us it is DONE DONE
|
this.loadingSpinner.setOff(); // only turn off loading-spinner AFTER app has told us it is DONE DONE
|
||||||
@ -103,20 +99,34 @@ class CDXLoader {
|
|||||||
|
|
||||||
const cdxList = await this.loadCDX(queryURL);
|
const cdxList = await this.loadCDX(queryURL);
|
||||||
|
|
||||||
this.setAppData(cdxList, {url, timestamp});
|
this.setAppData(cdxList, url, timestamp);
|
||||||
}
|
}
|
||||||
|
|
||||||
setAppData(cdxList, snapshot=null) {
|
async updateTimestamp(url, timestamp) {
|
||||||
|
this.timestamp = timestamp;
|
||||||
|
|
||||||
|
if (this.cdxLoading) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.app.setSnapshot({url, timestamp});
|
||||||
|
}
|
||||||
|
|
||||||
|
setAppData(cdxList, url, timestamp) {
|
||||||
this.app.setData(new PywbData(cdxList));
|
this.app.setData(new PywbData(cdxList));
|
||||||
|
|
||||||
if (snapshot) {
|
this.app.initBannerState(this.isReplay);
|
||||||
this.app.hideBannerUtilities();
|
|
||||||
this.app.setSnapshot(snapshot);
|
// if set on initial load, may not have timestamp yet
|
||||||
|
// will be updated later
|
||||||
|
if (timestamp) {
|
||||||
|
this.updateTimestamp(url, timestamp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async loadCDX(queryURL) {
|
async loadCDX(queryURL) {
|
||||||
// this.loadingSpinner.setOn(); // start loading-spinner when CDX loading begins
|
// this.loadingSpinner.setOn(); // start loading-spinner when CDX loading begins
|
||||||
|
this.cdxLoading = true;
|
||||||
const queryWorker = new Worker(this.staticPrefix + "/queryWorker.js");
|
const queryWorker = new Worker(this.staticPrefix + "/queryWorker.js");
|
||||||
|
|
||||||
const p = new Promise((resolve) => {
|
const p = new Promise((resolve) => {
|
||||||
@ -130,6 +140,7 @@ class CDXLoader {
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case "finished":
|
case "finished":
|
||||||
|
this.cdxLoading = false;
|
||||||
resolve(cdxList);
|
resolve(cdxList);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -153,7 +164,10 @@ class CDXLoader {
|
|||||||
if (!this.isReplay) {
|
if (!this.isReplay) {
|
||||||
window.location.href = this.prefix + snapshot.id + "/" + snapshot.url;
|
window.location.href = this.prefix + snapshot.id + "/" + snapshot.url;
|
||||||
} else if (window.cframe) {
|
} else if (window.cframe) {
|
||||||
window.cframe.load_url(snapshot.url, snapshot.id + "", reloadIFrame);
|
const ts = snapshot.id + "";
|
||||||
|
if (ts !== this.timestamp) {
|
||||||
|
window.cframe.load_url(snapshot.url, ts, reloadIFrame);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -162,9 +176,10 @@ class CDXLoader {
|
|||||||
// ===========================================================================
|
// ===========================================================================
|
||||||
class VueBannerWrapper
|
class VueBannerWrapper
|
||||||
{
|
{
|
||||||
constructor(loader, url) {
|
constructor(loader, url, ts) {
|
||||||
this.loading = true;
|
this.loading = true;
|
||||||
this.lastSurt = this.getSurt(url);
|
this.lastSurt = this.getSurt(url);
|
||||||
|
this.lastTs = ts;
|
||||||
this.loader = loader;
|
this.loader = loader;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -191,6 +206,9 @@ class VueBannerWrapper
|
|||||||
if (surt !== this.lastSurt) {
|
if (surt !== this.lastSurt) {
|
||||||
this.loader.updateSnapshot(event.data.url, event.data.ts);
|
this.loader.updateSnapshot(event.data.url, event.data.ts);
|
||||||
this.lastSurt = surt;
|
this.lastSurt = surt;
|
||||||
|
} else if (event.data.ts !== this.lastTs) {
|
||||||
|
this.loader.updateTimestamp(event.data.url, event.data.ts);
|
||||||
|
this.lastTs = event.data.ts;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -386,7 +386,7 @@ color-name@~1.1.4:
|
|||||||
concat-map@0.0.1:
|
concat-map@0.0.1:
|
||||||
version "0.0.1"
|
version "0.0.1"
|
||||||
resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
|
resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
|
||||||
integrity sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=
|
integrity sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==
|
||||||
|
|
||||||
consolidate@^0.15.1:
|
consolidate@^0.15.1:
|
||||||
version "0.15.1"
|
version "0.15.1"
|
||||||
@ -469,9 +469,9 @@ debug@~3.1.0:
|
|||||||
ms "2.0.0"
|
ms "2.0.0"
|
||||||
|
|
||||||
decode-uri-component@^0.2.0:
|
decode-uri-component@^0.2.0:
|
||||||
version "0.2.0"
|
version "0.2.2"
|
||||||
resolved "https://registry.yarnpkg.com/decode-uri-component/-/decode-uri-component-0.2.0.tgz#eb3913333458775cb84cd1a1fae062106bb87545"
|
resolved "https://registry.yarnpkg.com/decode-uri-component/-/decode-uri-component-0.2.2.tgz#e69dbe25d37941171dd540e024c444cd5188e1e9"
|
||||||
integrity sha1-6zkTMzRYd1y4TNGh+uBiEGu4dUU=
|
integrity sha512-FqUYQ+8o158GyGTrMFJms9qh3CqTKvAqgqsTnkLI8sKu0028orqBhxNMFkFen0zGyg6epACD32pjVk58ngIErQ==
|
||||||
|
|
||||||
deep-is@^0.1.3:
|
deep-is@^0.1.3:
|
||||||
version "0.1.3"
|
version "0.1.3"
|
||||||
@ -1103,9 +1103,9 @@ mime@^1.4.1:
|
|||||||
integrity sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==
|
integrity sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==
|
||||||
|
|
||||||
minimatch@^3.0.4:
|
minimatch@^3.0.4:
|
||||||
version "3.0.4"
|
version "3.1.2"
|
||||||
resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.0.4.tgz#5166e286457f03306064be5497e8dbb0c3d32083"
|
resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.1.2.tgz#19cd194bfd3e428f049a70817c038d89ab4be35b"
|
||||||
integrity sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==
|
integrity sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==
|
||||||
dependencies:
|
dependencies:
|
||||||
brace-expansion "^1.1.7"
|
brace-expansion "^1.1.7"
|
||||||
|
|
||||||
|
@ -260,6 +260,10 @@ class AccessChecker(object):
|
|||||||
if key.startswith(acl_key):
|
if key.startswith(acl_key):
|
||||||
acl_obj = CDXObject(acl)
|
acl_obj = CDXObject(acl)
|
||||||
|
|
||||||
|
# Check for "*," in ACL, which matches any URL
|
||||||
|
if acl_key == b"*,":
|
||||||
|
acl_obj = CDXObject(acl)
|
||||||
|
|
||||||
if acl_obj:
|
if acl_obj:
|
||||||
user = acl_obj.get('user')
|
user = acl_obj.get('user')
|
||||||
if user == acl_user:
|
if user == acl_user:
|
||||||
|
@ -15,7 +15,7 @@ from collections import namedtuple
|
|||||||
# ============================================================================
|
# ============================================================================
|
||||||
FuzzyRule = namedtuple('FuzzyRule',
|
FuzzyRule = namedtuple('FuzzyRule',
|
||||||
'url_prefix, regex, replace_after, filter_str, ' +
|
'url_prefix, regex, replace_after, filter_str, ' +
|
||||||
'match_type, find_all')
|
'match_type, re_type')
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@ -23,6 +23,7 @@ class FuzzyMatcher(object):
|
|||||||
DEFAULT_FILTER = ['urlkey:{0}']
|
DEFAULT_FILTER = ['urlkey:{0}']
|
||||||
DEFAULT_MATCH_TYPE = 'prefix'
|
DEFAULT_MATCH_TYPE = 'prefix'
|
||||||
DEFAULT_REPLACE_AFTER = '?'
|
DEFAULT_REPLACE_AFTER = '?'
|
||||||
|
DEFAULT_RE_TYPE = 'search'
|
||||||
|
|
||||||
FUZZY_SKIP_PARAMS = ('alt_url', 'reverse', 'closest', 'end_key',
|
FUZZY_SKIP_PARAMS = ('alt_url', 'reverse', 'closest', 'end_key',
|
||||||
'url', 'matchType', 'filter')
|
'url', 'matchType', 'filter')
|
||||||
@ -58,16 +59,16 @@ class FuzzyMatcher(object):
|
|||||||
replace_after = self.DEFAULT_REPLACE_AFTER
|
replace_after = self.DEFAULT_REPLACE_AFTER
|
||||||
filter_str = self.DEFAULT_FILTER
|
filter_str = self.DEFAULT_FILTER
|
||||||
match_type = self.DEFAULT_MATCH_TYPE
|
match_type = self.DEFAULT_MATCH_TYPE
|
||||||
find_all = False
|
re_type = self.DEFAULT_RE_TYPE
|
||||||
|
|
||||||
else:
|
else:
|
||||||
regex = self.make_regex(config.get('match'))
|
regex = self.make_regex(config.get('match'))
|
||||||
replace_after = config.get('replace', self.DEFAULT_REPLACE_AFTER)
|
replace_after = config.get('replace', self.DEFAULT_REPLACE_AFTER)
|
||||||
filter_str = config.get('filter', self.DEFAULT_FILTER)
|
filter_str = config.get('filter', self.DEFAULT_FILTER)
|
||||||
match_type = config.get('type', self.DEFAULT_MATCH_TYPE)
|
match_type = config.get('type', self.DEFAULT_MATCH_TYPE)
|
||||||
find_all = config.get('find_all', False)
|
re_type = config.get('re_type', self.DEFAULT_RE_TYPE)
|
||||||
|
|
||||||
return FuzzyRule(url_prefix, regex, replace_after, filter_str, match_type, find_all)
|
return FuzzyRule(url_prefix, regex, replace_after, filter_str, match_type, re_type)
|
||||||
|
|
||||||
def get_fuzzy_match(self, urlkey, url, params):
|
def get_fuzzy_match(self, urlkey, url, params):
|
||||||
filters = set()
|
filters = set()
|
||||||
@ -78,9 +79,12 @@ class FuzzyMatcher(object):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
groups = None
|
groups = None
|
||||||
if rule.find_all:
|
if rule.re_type == 'findall':
|
||||||
groups = rule.regex.findall(urlkey)
|
groups = rule.regex.findall(urlkey)
|
||||||
else:
|
if rule.re_type == 'sub':
|
||||||
|
matched_rule = rule
|
||||||
|
break
|
||||||
|
elif rule.re_type == 'search':
|
||||||
m = rule.regex.search(urlkey)
|
m = rule.regex.search(urlkey)
|
||||||
groups = m and m.groups()
|
groups = m and m.groups()
|
||||||
|
|
||||||
@ -102,7 +106,7 @@ class FuzzyMatcher(object):
|
|||||||
no_filters = (not filters or filters == {'urlkey:'}) and (matched_rule.replace_after == '?')
|
no_filters = (not filters or filters == {'urlkey:'}) and (matched_rule.replace_after == '?')
|
||||||
|
|
||||||
inx = url.find(matched_rule.replace_after)
|
inx = url.find(matched_rule.replace_after)
|
||||||
if inx > 0:
|
if inx > 0 and matched_rule.re_type != 'sub':
|
||||||
length = inx + len(matched_rule.replace_after)
|
length = inx + len(matched_rule.replace_after)
|
||||||
# don't include trailing '?' for default filter
|
# don't include trailing '?' for default filter
|
||||||
if no_filters:
|
if no_filters:
|
||||||
@ -111,13 +115,17 @@ class FuzzyMatcher(object):
|
|||||||
if url[length - 1] == '/':
|
if url[length - 1] == '/':
|
||||||
length -= 1
|
length -= 1
|
||||||
url = url[:length]
|
url = url[:length]
|
||||||
elif not no_filters:
|
elif not no_filters and matched_rule.re_type != 'sub':
|
||||||
url += matched_rule.replace_after[0]
|
url += matched_rule.replace_after[0]
|
||||||
|
|
||||||
if matched_rule.match_type == 'domain':
|
if matched_rule.match_type == 'domain':
|
||||||
host = urlsplit(url).netloc
|
host = urlsplit(url).netloc
|
||||||
url = host.split('.', 1)[1]
|
url = host.split('.', 1)[1]
|
||||||
|
|
||||||
|
if matched_rule.re_type == 'sub':
|
||||||
|
filters = {'urlkey:'}
|
||||||
|
url = re.sub(rule.regex, rule.replace_after, url)
|
||||||
|
|
||||||
fuzzy_params = {'url': url,
|
fuzzy_params = {'url': url,
|
||||||
'matchType': matched_rule.match_type,
|
'matchType': matched_rule.match_type,
|
||||||
'filter': filters,
|
'filter': filters,
|
||||||
|
@ -234,3 +234,10 @@ class TestFuzzy(object):
|
|||||||
params = self.get_params(url, actual_url, mime='application/x-shockwave-flash')
|
params = self.get_params(url, actual_url, mime='application/x-shockwave-flash')
|
||||||
cdx_iter, errs = self.fuzzy(self.source, params)
|
cdx_iter, errs = self.fuzzy(self.source, params)
|
||||||
assert list(cdx_iter) == []
|
assert list(cdx_iter) == []
|
||||||
|
|
||||||
|
def test_fuzzy_sub_replacement(self):
|
||||||
|
url = 'https://example.com/matched'
|
||||||
|
actual_url = 'https://example.com/replaced'
|
||||||
|
params = self.get_params(url, actual_url)
|
||||||
|
cdx_iter, errs = self.fuzzy(self.source, params)
|
||||||
|
assert list(cdx_iter) == self.get_expected(actual_url)
|
||||||
|
@ -11,6 +11,7 @@ from io import BytesIO
|
|||||||
import base64
|
import base64
|
||||||
import cgi
|
import cgi
|
||||||
import json
|
import json
|
||||||
|
import math
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
@ -328,6 +329,21 @@ class MethodQueryCanonicalizer(object):
|
|||||||
_parser(v, name)
|
_parser(v, name)
|
||||||
|
|
||||||
elif name:
|
elif name:
|
||||||
|
if isinstance(json_obj, bool) and json_obj:
|
||||||
|
data[get_key(name)] = "true"
|
||||||
|
elif isinstance(json_obj, bool):
|
||||||
|
data[get_key(name)] = "false"
|
||||||
|
elif json_obj is None:
|
||||||
|
data[get_key(name)] = "null"
|
||||||
|
elif isinstance(json_obj, float):
|
||||||
|
# Treat floats like JavaScript's Number.prototype.toString(),
|
||||||
|
# drop decimal if float represents a whole number.
|
||||||
|
fraction, _ = math.modf(json_obj)
|
||||||
|
if fraction == 0.0:
|
||||||
|
data[get_key(name)] = str(int(json_obj))
|
||||||
|
else:
|
||||||
|
data[get_key(name)] = str(json_obj)
|
||||||
|
else:
|
||||||
data[get_key(name)] = str(json_obj)
|
data[get_key(name)] = str(json_obj)
|
||||||
|
|
||||||
_parser(json.loads(string))
|
_parser(json.loads(string))
|
||||||
|
@ -39,7 +39,7 @@ class InputReqApp(object):
|
|||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
class TestInputReq(object):
|
class TestInputReq(object):
|
||||||
def setup(self):
|
def setup_method(self):
|
||||||
self.app = InputReqApp()
|
self.app = InputReqApp()
|
||||||
self.testapp = webtest.TestApp(self.app)
|
self.testapp = webtest.TestApp(self.app)
|
||||||
|
|
||||||
@ -82,44 +82,49 @@ Foo: Bar\r\n\
|
|||||||
class TestPostQueryExtract(object):
|
class TestPostQueryExtract(object):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setup_class(cls):
|
def setup_class(cls):
|
||||||
cls.post_data = b'foo=bar&dir=%2Fbaz'
|
cls.post_data = b'foo=bar&dir=%2Fbaz&do=true&re=false&re=null'
|
||||||
cls.binary_post_data = b'\x816l`L\xa04P\x0e\xe0r\x02\xb5\x89\x19\x00fP\xdb\x0e\xb0\x02,'
|
cls.binary_post_data = b'\x816l`L\xa04P\x0e\xe0r\x02\xb5\x89\x19\x00fP\xdb\x0e\xb0\x02,'
|
||||||
|
|
||||||
def test_post_extract_1(self):
|
def test_post_extract_1(self):
|
||||||
mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
|
mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
|
||||||
len(self.post_data), BytesIO(self.post_data))
|
len(self.post_data), BytesIO(self.post_data))
|
||||||
|
|
||||||
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&foo=bar&dir=/baz'
|
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&foo=bar&dir=/baz&do=true&re=false&re=null'
|
||||||
|
|
||||||
assert mq.append_query('http://example.com/?123=ABC') == 'http://example.com/?123=ABC&__wb_method=POST&foo=bar&dir=/baz'
|
assert mq.append_query('http://example.com/?123=ABC') == 'http://example.com/?123=ABC&__wb_method=POST&foo=bar&dir=/baz&do=true&re=false&re=null'
|
||||||
|
|
||||||
def test_post_extract_json(self):
|
def test_post_extract_json(self):
|
||||||
post_data = b'{"a": "b", "c": {"a": 2}, "d": "e"}'
|
post_data = b'{"a": "b", "c": {"a": 2}, "d": "e", "f": true, "g": [false, null]}'
|
||||||
mq = MethodQueryCanonicalizer('POST', 'application/json',
|
mq = MethodQueryCanonicalizer('POST', 'application/json',
|
||||||
len(post_data), BytesIO(post_data))
|
len(post_data), BytesIO(post_data))
|
||||||
|
|
||||||
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&a=b&a.2_=2&d=e'
|
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&a=b&a.2_=2&d=e&f=true&g=false&g.2_=null'
|
||||||
|
|
||||||
|
post_data = b'{"type": "event", "id": 44.0, "float": 35.7, "values": [true, false, null], "source": {"type": "component", "id": "a+b&c= d", "values": [3, 4]}}'
|
||||||
|
mq = MethodQueryCanonicalizer('POST', 'application/json',
|
||||||
|
len(post_data), BytesIO(post_data))
|
||||||
|
|
||||||
|
assert mq.append_query('http://example.com/events') == 'http://example.com/events?__wb_method=POST&type=event&id=44&float=35.7&values=true&values.2_=false&values.3_=null&type.2_=component&id.2_=a%2Bb%26c%3D+d&values.4_=3&values.5_=4'
|
||||||
|
|
||||||
def test_put_extract_method(self):
|
def test_put_extract_method(self):
|
||||||
mq = MethodQueryCanonicalizer('PUT', 'application/x-www-form-urlencoded',
|
mq = MethodQueryCanonicalizer('PUT', 'application/x-www-form-urlencoded',
|
||||||
len(self.post_data), BytesIO(self.post_data))
|
len(self.post_data), BytesIO(self.post_data))
|
||||||
|
|
||||||
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=PUT&foo=bar&dir=/baz'
|
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=PUT&foo=bar&dir=/baz&do=true&re=false&re=null'
|
||||||
|
|
||||||
def test_post_extract_non_form_data_1(self):
|
def test_post_extract_non_form_data_1(self):
|
||||||
mq = MethodQueryCanonicalizer('POST', 'application/octet-stream',
|
mq = MethodQueryCanonicalizer('POST', 'application/octet-stream',
|
||||||
len(self.post_data), BytesIO(self.post_data))
|
len(self.post_data), BytesIO(self.post_data))
|
||||||
|
|
||||||
#base64 encoded data
|
#base64 encoded data
|
||||||
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6'
|
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6JmRvPXRydWUmcmU9ZmFsc2UmcmU9bnVsbA=='
|
||||||
|
|
||||||
def test_post_extract_non_form_data_2(self):
|
def test_post_extract_non_form_data_2(self):
|
||||||
mq = MethodQueryCanonicalizer('POST', 'text/plain',
|
mq = MethodQueryCanonicalizer('POST', 'text/plain',
|
||||||
len(self.post_data), BytesIO(self.post_data))
|
len(self.post_data), BytesIO(self.post_data))
|
||||||
|
|
||||||
#base64 encoded data
|
#base64 encoded data
|
||||||
assert mq.append_query('http://example.com/pathbar?id=123') == 'http://example.com/pathbar?id=123&__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6'
|
assert mq.append_query('http://example.com/pathbar?id=123') == 'http://example.com/pathbar?id=123&__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6JmRvPXRydWUmcmU9ZmFsc2UmcmU9bnVsbA=='
|
||||||
|
|
||||||
def test_post_extract_length_invalid_ignore(self):
|
def test_post_extract_length_invalid_ignore(self):
|
||||||
mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
|
mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
|
||||||
@ -136,13 +141,13 @@ class TestPostQueryExtract(object):
|
|||||||
mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
|
mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
|
||||||
len(self.post_data) - 4, BytesIO(self.post_data))
|
len(self.post_data) - 4, BytesIO(self.post_data))
|
||||||
|
|
||||||
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&foo=bar&dir=%2'
|
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&foo=bar&dir=/baz&do=true&re=false&re='
|
||||||
|
|
||||||
def test_post_extract_length_too_long(self):
|
def test_post_extract_length_too_long(self):
|
||||||
mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
|
mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
|
||||||
len(self.post_data) + 4, BytesIO(self.post_data))
|
len(self.post_data) + 4, BytesIO(self.post_data))
|
||||||
|
|
||||||
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&foo=bar&dir=/baz'
|
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&foo=bar&dir=/baz&do=true&re=false&re=null'
|
||||||
|
|
||||||
def test_post_extract_malformed_form_data(self):
|
def test_post_extract_malformed_form_data(self):
|
||||||
mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
|
mq = MethodQueryCanonicalizer('POST', 'application/x-www-form-urlencoded',
|
||||||
@ -155,7 +160,7 @@ class TestPostQueryExtract(object):
|
|||||||
mq = MethodQueryCanonicalizer('POST', 'multipart/form-data',
|
mq = MethodQueryCanonicalizer('POST', 'multipart/form-data',
|
||||||
len(self.post_data), BytesIO(self.post_data))
|
len(self.post_data), BytesIO(self.post_data))
|
||||||
|
|
||||||
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6'
|
assert mq.append_query('http://example.com/') == 'http://example.com/?__wb_method=POST&__wb_post_data=Zm9vPWJhciZkaXI9JTJGYmF6JmRvPXRydWUmcmU9ZmFsc2UmcmU9bnVsbA=='
|
||||||
|
|
||||||
|
|
||||||
def test_options(self):
|
def test_options(self):
|
||||||
|
@ -18,7 +18,7 @@ from .testutils import LiveServerTests, HttpBinLiveTests, BaseTestClass
|
|||||||
|
|
||||||
|
|
||||||
class TestUpstream(LiveServerTests, HttpBinLiveTests, BaseTestClass):
|
class TestUpstream(LiveServerTests, HttpBinLiveTests, BaseTestClass):
|
||||||
def setup(self):
|
def setup_method(self):
|
||||||
app = BaseWarcServer()
|
app = BaseWarcServer()
|
||||||
|
|
||||||
base_url = 'http://localhost:{0}'.format(self.server.port)
|
base_url = 'http://localhost:{0}'.format(self.server.port)
|
||||||
|
@ -1,19 +1,21 @@
|
|||||||
six
|
six
|
||||||
warcio>=1.7.1
|
warcio>=1.7.1
|
||||||
requests
|
requests
|
||||||
redis<3.0
|
redis==2.10.6
|
||||||
jinja2<3.0.0
|
jinja2>=3.1.2
|
||||||
surt>=0.3.1
|
surt>=0.3.1
|
||||||
brotlipy
|
brotlipy
|
||||||
pyyaml
|
pyyaml
|
||||||
werkzeug
|
werkzeug==2.2.3
|
||||||
webencodings
|
webencodings
|
||||||
gevent==21.12.0
|
gevent==22.10.2
|
||||||
|
greenlet>=2.0.2,<3.0
|
||||||
webassets==2.0
|
webassets==2.0
|
||||||
portalocker
|
portalocker
|
||||||
wsgiprox>=1.5.1
|
wsgiprox>=1.5.1
|
||||||
fakeredis<1.0
|
fakeredis<1.0
|
||||||
tldextract
|
tldextract
|
||||||
python-dateutil
|
python-dateutil
|
||||||
markupsafe<2.1.0
|
markupsafe>=2.1.1
|
||||||
ua_parser
|
ua_parser
|
||||||
|
py3AMF
|
||||||
|
1
sample_archive/access/allow_all.aclj
Normal file
1
sample_archive/access/allow_all.aclj
Normal file
@ -0,0 +1 @@
|
|||||||
|
*, - {"access": "allow", "user": "staff"}
|
@ -5,6 +5,8 @@ org,iana)/_css/2013.1/fonts/opensans-semibold.ttf - {"access": "allow"}
|
|||||||
org,iana)/_css - {"access": "exclude"}
|
org,iana)/_css - {"access": "exclude"}
|
||||||
org,iana)/### - {"access": "allow"}
|
org,iana)/### - {"access": "allow"}
|
||||||
org,iana)/ - {"access": "exclude"}
|
org,iana)/ - {"access": "exclude"}
|
||||||
|
com,example)/?example=3 - {"access": "block", "user": "staff"}
|
||||||
|
com,example)/?example=3 - {"access": "exclude", "user": "staff2"}
|
||||||
org,example)/?example=1 - {"access": "block"}
|
org,example)/?example=1 - {"access": "block"}
|
||||||
com,example)/?example=2 - {"access": "allow_ignore_embargo"}
|
com,example)/?example=2 - {"access": "allow_ignore_embargo"}
|
||||||
com,example)/?example=1 - {"access": "allow_ignore_embargo", "user": "staff2"}
|
com,example)/?example=1 - {"access": "allow_ignore_embargo", "user": "staff2"}
|
||||||
|
BIN
sample_archive/cdxj/example.cdx.gz
Normal file
BIN
sample_archive/cdxj/example.cdx.gz
Normal file
Binary file not shown.
BIN
sample_archive/waczs/invalid_example_1.wacz
Normal file
BIN
sample_archive/waczs/invalid_example_1.wacz
Normal file
Binary file not shown.
BIN
sample_archive/waczs/valid_example_1.wacz
Normal file
BIN
sample_archive/waczs/valid_example_1.wacz
Normal file
Binary file not shown.
13
setup.py
13
setup.py
@ -62,10 +62,6 @@ def generate_git_hash_py(pkg, filename='git_hash.py'):
|
|||||||
def load_requirements(filename):
|
def load_requirements(filename):
|
||||||
with open(filename, 'rt') as fh:
|
with open(filename, 'rt') as fh:
|
||||||
requirements = fh.read().rstrip().split('\n')
|
requirements = fh.read().rstrip().split('\n')
|
||||||
if sys.version_info > (3, 0):
|
|
||||||
requirements.append("py3AMF")
|
|
||||||
else:
|
|
||||||
requirements.append("pyAMF")
|
|
||||||
return requirements
|
return requirements
|
||||||
|
|
||||||
|
|
||||||
@ -113,6 +109,7 @@ setup(
|
|||||||
"translate_toolkit"
|
"translate_toolkit"
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
python_requires='>=3.7,<3.12',
|
||||||
tests_require=load_requirements("test_requirements.txt"),
|
tests_require=load_requirements("test_requirements.txt"),
|
||||||
cmdclass={'test': PyTest},
|
cmdclass={'test': PyTest},
|
||||||
test_suite='',
|
test_suite='',
|
||||||
@ -131,16 +128,12 @@ setup(
|
|||||||
'Environment :: Web Environment',
|
'Environment :: Web Environment',
|
||||||
'License :: OSI Approved :: GNU General Public License (GPL)',
|
'License :: OSI Approved :: GNU General Public License (GPL)',
|
||||||
'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
|
'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
|
||||||
'Programming Language :: Python :: 2',
|
|
||||||
'Programming Language :: Python :: 2.7',
|
|
||||||
'Programming Language :: Python :: 3',
|
'Programming Language :: Python :: 3',
|
||||||
'Programming Language :: Python :: 3.3',
|
|
||||||
'Programming Language :: Python :: 3.4',
|
|
||||||
'Programming Language :: Python :: 3.5',
|
|
||||||
'Programming Language :: Python :: 3.6',
|
|
||||||
'Programming Language :: Python :: 3.7',
|
'Programming Language :: Python :: 3.7',
|
||||||
'Programming Language :: Python :: 3.8',
|
'Programming Language :: Python :: 3.8',
|
||||||
'Programming Language :: Python :: 3.9',
|
'Programming Language :: Python :: 3.9',
|
||||||
|
'Programming Language :: Python :: 3.10',
|
||||||
|
'Programming Language :: Python :: 3.11',
|
||||||
'Topic :: Internet :: Proxy Servers',
|
'Topic :: Internet :: Proxy Servers',
|
||||||
'Topic :: Internet :: WWW/HTTP',
|
'Topic :: Internet :: WWW/HTTP',
|
||||||
'Topic :: Internet :: WWW/HTTP :: WSGI',
|
'Topic :: Internet :: WWW/HTTP :: WSGI',
|
||||||
|
@ -3,7 +3,6 @@ WebTest
|
|||||||
pytest-cov
|
pytest-cov
|
||||||
mock
|
mock
|
||||||
urllib3
|
urllib3
|
||||||
httpbin==0.5.0
|
|
||||||
flask<2.0
|
|
||||||
ujson
|
ujson
|
||||||
lxml
|
lxml
|
||||||
|
httpbin>=0.10.2
|
||||||
|
@ -62,6 +62,13 @@ collections:
|
|||||||
acl_paths:
|
acl_paths:
|
||||||
- ./sample_archive/access/pywb.aclj
|
- ./sample_archive/access/pywb.aclj
|
||||||
|
|
||||||
|
pywb-wildcard-surt:
|
||||||
|
index_paths: ./sample_archive/cdx/
|
||||||
|
archive_paths: ./sample_archive/warcs/
|
||||||
|
default_access: block
|
||||||
|
acl_paths:
|
||||||
|
- ./sample_archive/access/allow_all.aclj
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -41,12 +41,23 @@ class TestACLApp(BaseConfigTest):
|
|||||||
assert 'Access Blocked' in resp.text
|
assert 'Access Blocked' in resp.text
|
||||||
|
|
||||||
def test_allow_via_acl_header(self):
|
def test_allow_via_acl_header(self):
|
||||||
resp = self.query('http://www.iana.org/about/')
|
resp = self.testapp.get('/pywb/cdx?url=http://www.iana.org/about/', headers={"X-Pywb-Acl-User": "staff"})
|
||||||
|
|
||||||
assert len(resp.text.splitlines()) == 1
|
assert len(resp.text.splitlines()) == 1
|
||||||
|
|
||||||
resp = self.testapp.get('/pywb/mp_/http://www.iana.org/about/', headers={"X-Pywb-Acl-User": "staff"}, status=200)
|
resp = self.testapp.get('/pywb/mp_/http://www.iana.org/about/', headers={"X-Pywb-Acl-User": "staff"}, status=200)
|
||||||
|
|
||||||
|
def test_block_via_acl_header(self):
|
||||||
|
resp = self.testapp.get('/pywb/cdx?url=http://example.com/?example=3', headers={"X-Pywb-Acl-User": "staff"})
|
||||||
|
assert len(resp.text.splitlines()) > 0
|
||||||
|
|
||||||
|
resp = self.testapp.get('/pywb/mp_/http://example.com/?example=3', headers={"X-Pywb-Acl-User": "staff"}, status=451)
|
||||||
|
|
||||||
|
def test_exclude_via_acl_header(self):
|
||||||
|
resp = self.testapp.get('/pywb/cdx?url=http://example.com/?example=3', headers={"X-Pywb-Acl-User": "staff2"})
|
||||||
|
assert len(resp.text.splitlines()) == 0
|
||||||
|
|
||||||
|
resp = self.testapp.get('/pywb/mp_/http://example.com/?example=3', headers={"X-Pywb-Acl-User": "staff2"}, status=404)
|
||||||
|
|
||||||
def test_allowed_more_specific(self):
|
def test_allowed_more_specific(self):
|
||||||
resp = self.query('http://www.iana.org/_css/2013.1/fonts/opensans-semibold.ttf')
|
resp = self.query('http://www.iana.org/_css/2013.1/fonts/opensans-semibold.ttf')
|
||||||
|
|
||||||
@ -85,5 +96,9 @@ class TestACLApp(BaseConfigTest):
|
|||||||
|
|
||||||
assert '"http://httpbin.org/anything/resource.json"' in resp.text
|
assert '"http://httpbin.org/anything/resource.json"' in resp.text
|
||||||
|
|
||||||
|
def test_allow_all_acl_user_specific(self):
|
||||||
|
resp = self.testapp.get('/pywb-wildcard-surt/mp_/http://example.com/', status=451)
|
||||||
|
|
||||||
|
assert 'Access Blocked' in resp.text
|
||||||
|
|
||||||
|
resp = self.testapp.get('/pywb-wildcard-surt/mp_/http://example.com/', headers={"X-Pywb-Acl-User": "staff"}, status=200)
|
||||||
|
@ -203,7 +203,7 @@ class TestManagedColls(CollsDirMixin, BaseConfigTest):
|
|||||||
|
|
||||||
resp = self.testapp.get('/static/_/test/abc.js')
|
resp = self.testapp.get('/static/_/test/abc.js')
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
assert resp.content_type == 'application/javascript'
|
assert resp.content_type in ('application/javascript', 'text/javascript')
|
||||||
resp.charset = 'utf-8'
|
resp.charset = 'utf-8'
|
||||||
assert '/* Some JS File */' in resp.text
|
assert '/* Some JS File */' in resp.text
|
||||||
|
|
||||||
@ -537,7 +537,7 @@ class TestManagedColls(CollsDirMixin, BaseConfigTest):
|
|||||||
main(['template', 'foo', '--remove', 'query_html'])
|
main(['template', 'foo', '--remove', 'query_html'])
|
||||||
|
|
||||||
def test_err_no_such_coll(self):
|
def test_err_no_such_coll(self):
|
||||||
""" Test error adding warc to non-existant collection
|
""" Test error adding warc to non-existent collection
|
||||||
"""
|
"""
|
||||||
warc1 = self._get_sample_warc('example.warc.gz')
|
warc1 = self._get_sample_warc('example.warc.gz')
|
||||||
|
|
||||||
|
@ -46,8 +46,12 @@ class TestEmbargoApp(BaseConfigTest):
|
|||||||
def test_embargo_ignore_acl_with_header_only(self):
|
def test_embargo_ignore_acl_with_header_only(self):
|
||||||
# ignore embargo with custom header only
|
# ignore embargo with custom header only
|
||||||
headers = {"X-Pywb-ACL-User": "staff2"}
|
headers = {"X-Pywb-ACL-User": "staff2"}
|
||||||
resp = self.testapp.get('/pywb-embargo-acl/20140126201054mp_/http://example.com/?example=1', status=200, headers=headers)
|
|
||||||
|
|
||||||
|
resp = self.testapp.get('/pywb-embargo-acl/cdx?url=http://example.com/?example=1', headers=headers)
|
||||||
|
assert len(resp.text.splitlines()) > 0
|
||||||
|
resp = self.testapp.get('/pywb-embargo-acl/20140126201054mp_/http://example.com/?example=1', status=200, headers=headers)
|
||||||
|
resp = self.testapp.get('/pywb-embargo-acl/cdx?url=http://example.com/?example=1')
|
||||||
|
assert len(resp.text.splitlines()) == 0
|
||||||
resp = self.testapp.get('/pywb-embargo-acl/20140126201054mp_/http://example.com/?example=1', status=404)
|
resp = self.testapp.get('/pywb-embargo-acl/20140126201054mp_/http://example.com/?example=1', status=404)
|
||||||
|
|
||||||
|
|
||||||
|
@ -56,6 +56,6 @@ class TestForceHttpsRoot(BaseConfigTest):
|
|||||||
resp = self.get('/20140128051539{0}/http://www.iana.org/domains/example', fmod,
|
resp = self.get('/20140128051539{0}/http://www.iana.org/domains/example', fmod,
|
||||||
headers={'X-Forwarded-Proto': 'https'})
|
headers={'X-Forwarded-Proto': 'https'})
|
||||||
|
|
||||||
assert resp.headers['Location'] == 'https://localhost:80/20140128051539{0}/http://www.iana.org/domains/reserved'.format(fmod)
|
assert resp.headers['Location'] == 'https://localhost:80/20140128051539{0}/http://www.iana.org/help/example-domains'.format(fmod)
|
||||||
|
|
||||||
|
|
||||||
|
@ -400,7 +400,7 @@ class TestWbIntegration(BaseConfigTest):
|
|||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
assert resp.headers['Content-Location'].endswith('/pywb/20140126200928{0}/http://www.iana.org/domains/root/db'.format(fmod))
|
assert resp.headers['Content-Location'].endswith('/pywb/20140126200928{0}/http://www.iana.org/domains/root/db'.format(fmod))
|
||||||
|
|
||||||
def test_not_existant_warc_other_capture(self, fmod):
|
def test_not_existent_warc_other_capture(self, fmod):
|
||||||
resp = self.get('/pywb/20140703030321{0}/http://example.com/?example=2', fmod)
|
resp = self.get('/pywb/20140703030321{0}/http://example.com/?example=2', fmod)
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
assert resp.headers['Content-Location'].endswith('/pywb/20140603030341{0}/http://example.com?example=2'.format(fmod))
|
assert resp.headers['Content-Location'].endswith('/pywb/20140603030341{0}/http://example.com?example=2'.format(fmod))
|
||||||
@ -410,7 +410,7 @@ class TestWbIntegration(BaseConfigTest):
|
|||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
assert resp.headers['Content-Location'].endswith('/pywb/20140603030341{0}/http://example.com?example=2'.format(fmod))
|
assert resp.headers['Content-Location'].endswith('/pywb/20140603030341{0}/http://example.com?example=2'.format(fmod))
|
||||||
|
|
||||||
def test_not_existant_warc_no_other(self, fmod):
|
def test_not_existent_warc_no_other(self, fmod):
|
||||||
resp = self.get('/pywb/20140703030321{0}/http://example.com/?example=3', fmod, status=503)
|
resp = self.get('/pywb/20140703030321{0}/http://example.com/?example=3', fmod, status=503)
|
||||||
assert resp.status_int == 503
|
assert resp.status_int == 503
|
||||||
|
|
||||||
|
@ -91,25 +91,28 @@ class TestLiveRewriter(HttpBinLiveTests, BaseConfigTest):
|
|||||||
resp = self.head('/live/{0}httpbin.org/get?foo=bar', fmod_sl)
|
resp = self.head('/live/{0}httpbin.org/get?foo=bar', fmod_sl)
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
|
|
||||||
@pytest.mark.skipif(sys.version_info < (3,0), reason='does not respond in 2.7')
|
# Following tests are temporarily commented out because latest version of PSF httpbin
|
||||||
def test_live_bad_content_length(self, fmod_sl):
|
# now returns 400 if content-length header isn't parsable as an int
|
||||||
resp = self.get('/live/{0}httpbin.org/response-headers?content-length=149,149', fmod_sl, status=200)
|
|
||||||
assert resp.headers['Content-Length'] == '149'
|
|
||||||
|
|
||||||
resp = self.get('/live/{0}httpbin.org/response-headers?Content-Length=xyz', fmod_sl, status=200)
|
# @pytest.mark.skipif(sys.version_info < (3,0), reason='does not respond in 2.7')
|
||||||
assert resp.headers['Content-Length'] == '90'
|
# def test_live_bad_content_length(self, fmod_sl):
|
||||||
|
# resp = self.get('/live/{0}httpbin.org/response-headers?content-length=149,149', fmod_sl, status=200)
|
||||||
|
# assert resp.headers['Content-Length'] == '149'
|
||||||
|
|
||||||
@pytest.mark.skipif(sys.version_info < (3,0), reason='does not respond in 2.7')
|
# resp = self.get('/live/{0}httpbin.org/response-headers?Content-Length=xyz', fmod_sl, status=200)
|
||||||
def test_live_bad_content_length_with_range(self, fmod_sl):
|
# assert resp.headers['Content-Length'] == '90'
|
||||||
resp = self.get('/live/{0}httpbin.org/response-headers?content-length=149,149', fmod_sl,
|
|
||||||
headers={'Range': 'bytes=0-'}, status=206)
|
|
||||||
assert resp.headers['Content-Length'] == '149'
|
|
||||||
assert resp.headers['Content-Range'] == 'bytes 0-148/149'
|
|
||||||
|
|
||||||
resp = self.get('/live/{0}httpbin.org/response-headers?Content-Length=xyz', fmod_sl,
|
# @pytest.mark.skipif(sys.version_info < (3,0), reason='does not respond in 2.7')
|
||||||
headers={'Range': 'bytes=0-'}, status=206)
|
# def test_live_bad_content_length_with_range(self, fmod_sl):
|
||||||
assert resp.headers['Content-Length'] == '90'
|
# resp = self.get('/live/{0}httpbin.org/response-headers?content-length=149,149', fmod_sl,
|
||||||
assert resp.headers['Content-Range'] == 'bytes 0-89/90'
|
# headers={'Range': 'bytes=0-'}, status=206)
|
||||||
|
# assert resp.headers['Content-Length'] == '149'
|
||||||
|
# assert resp.headers['Content-Range'] == 'bytes 0-148/149'
|
||||||
|
|
||||||
|
# resp = self.get('/live/{0}httpbin.org/response-headers?Content-Length=xyz', fmod_sl,
|
||||||
|
# headers={'Range': 'bytes=0-'}, status=206)
|
||||||
|
# assert resp.headers['Content-Length'] == '90'
|
||||||
|
# assert resp.headers['Content-Range'] == 'bytes 0-89/90'
|
||||||
|
|
||||||
def test_custom_unicode_header(self, fmod_sl):
|
def test_custom_unicode_header(self, fmod_sl):
|
||||||
value = u'⛄'
|
value = u'⛄'
|
||||||
|
135
tests/test_manager.py
Normal file
135
tests/test_manager.py
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from pywb.manager.manager import CollectionsManager
|
||||||
|
|
||||||
|
VALID_WACZ_PATH = 'sample_archive/waczs/valid_example_1.wacz'
|
||||||
|
INVALID_WACZ_PATH = 'sample_archive/waczs/invalid_example_1.wacz'
|
||||||
|
|
||||||
|
TEST_COLLECTION_NAME = 'test-col'
|
||||||
|
|
||||||
|
|
||||||
|
class TestManager:
|
||||||
|
def test_add_valid_wacz_unpacked(self, tmp_path):
|
||||||
|
"""Test if adding a valid wacz file to a collection succeeds"""
|
||||||
|
manager = self.get_test_collections_manager(tmp_path)
|
||||||
|
manager._add_wacz_unpacked(VALID_WACZ_PATH)
|
||||||
|
assert 'valid_example_1-0.warc' in os.listdir(manager.archive_dir)
|
||||||
|
assert manager.DEF_INDEX_FILE in os.listdir(manager.indexes_dir)
|
||||||
|
with open(os.path.join(manager.indexes_dir, manager.DEF_INDEX_FILE), 'r') as f:
|
||||||
|
assert '"filename": "valid_example_1-0.warc"' in f.read()
|
||||||
|
|
||||||
|
def test_add_valid_wacz_unpacked_dupe_name(self, tmp_path):
|
||||||
|
"""Test if warc that already exists is renamed with -index suffix"""
|
||||||
|
manager = self.get_test_collections_manager(tmp_path)
|
||||||
|
manager._add_wacz_unpacked(VALID_WACZ_PATH)
|
||||||
|
# Add it again to see if there are name conflicts
|
||||||
|
manager._add_wacz_unpacked(VALID_WACZ_PATH)
|
||||||
|
assert 'valid_example_1-0.warc' in os.listdir(manager.archive_dir)
|
||||||
|
assert 'valid_example_1-0-1.warc' in os.listdir(manager.archive_dir)
|
||||||
|
assert manager.DEF_INDEX_FILE in os.listdir(manager.indexes_dir)
|
||||||
|
with open(os.path.join(manager.indexes_dir, manager.DEF_INDEX_FILE), 'r') as f:
|
||||||
|
data = f.read()
|
||||||
|
assert '"filename": "valid_example_1-0.warc"' in data
|
||||||
|
assert '"filename": "valid_example_1-0-1.warc"' in data
|
||||||
|
|
||||||
|
def test_add_invalid_wacz_unpacked(self, tmp_path, caplog):
|
||||||
|
"""Test if adding an invalid wacz file to a collection fails"""
|
||||||
|
manager = self.get_test_collections_manager(tmp_path)
|
||||||
|
manager._add_wacz_unpacked(INVALID_WACZ_PATH)
|
||||||
|
assert 'invalid_example_1-0.warc' not in os.listdir(manager.archive_dir)
|
||||||
|
assert 'sample_archive/waczs/invalid_example_1.wacz does not contain any warc files.' in caplog.text
|
||||||
|
|
||||||
|
index_path = os.path.join(manager.indexes_dir, manager.DEF_INDEX_FILE)
|
||||||
|
if os.path.exists(index_path):
|
||||||
|
with open(index_path, 'r') as f:
|
||||||
|
assert '"filename": "invalid_example_1-0.warc"' not in f.read()
|
||||||
|
|
||||||
|
def test_add_valid_archives_unpack_wacz(self, tmp_path):
|
||||||
|
manager = self.get_test_collections_manager(tmp_path)
|
||||||
|
archives = ['sample_archive/warcs/example.arc', 'sample_archive/warcs/example.arc.gz',
|
||||||
|
'sample_archive/warcs/example.warc', 'sample_archive/warcs/example.warc.gz',
|
||||||
|
'sample_archive/waczs/valid_example_1.wacz']
|
||||||
|
manager.add_archives(archives, unpack_wacz=True)
|
||||||
|
|
||||||
|
with open(os.path.join(manager.indexes_dir, manager.DEF_INDEX_FILE), 'r') as f:
|
||||||
|
index_text = f.read()
|
||||||
|
|
||||||
|
for archive in archives:
|
||||||
|
archive = os.path.basename(archive)
|
||||||
|
|
||||||
|
if archive.endswith('wacz'):
|
||||||
|
archive = 'valid_example_1-0.warc'
|
||||||
|
|
||||||
|
assert archive in os.listdir(manager.archive_dir)
|
||||||
|
assert archive in index_text
|
||||||
|
|
||||||
|
def test_add_valid_archives_dupe_name(self, tmp_path):
|
||||||
|
manager = self.get_test_collections_manager(tmp_path)
|
||||||
|
warc_filename = 'sample_archive/warcs/example.warc.gz'
|
||||||
|
manager.add_archives([warc_filename, warc_filename])
|
||||||
|
|
||||||
|
with open(os.path.join(manager.indexes_dir, manager.DEF_INDEX_FILE), 'r') as f:
|
||||||
|
index_text = f.read()
|
||||||
|
|
||||||
|
expected_archives = ('example.warc.gz', 'example-1.warc.gz')
|
||||||
|
|
||||||
|
for archive in expected_archives:
|
||||||
|
assert archive in os.listdir(manager.archive_dir)
|
||||||
|
assert archive in index_text
|
||||||
|
|
||||||
|
def test_add_valid_archives_dont_unpack_wacz(self, tmp_path):
|
||||||
|
manager = self.get_test_collections_manager(tmp_path)
|
||||||
|
archives = ['sample_archive/warcs/example.arc', 'sample_archive/warcs/example.arc.gz',
|
||||||
|
'sample_archive/warcs/example.warc', 'sample_archive/warcs/example.warc.gz',
|
||||||
|
'sample_archive/waczs/valid_example_1.wacz']
|
||||||
|
|
||||||
|
with pytest.raises(NotImplementedError):
|
||||||
|
manager.add_archives(archives, unpack_wacz=False)
|
||||||
|
|
||||||
|
def test_add_invalid_archives_unpack_wacz(self, tmp_path, caplog):
|
||||||
|
manager = self.get_test_collections_manager(tmp_path)
|
||||||
|
manager.add_archives(['sample_archive/warcs/example.warc', 'sample_archive/text_content/sample.html'],
|
||||||
|
unpack_wacz=True)
|
||||||
|
assert 'sample.html' not in os.listdir(manager.archive_dir)
|
||||||
|
assert 'example.warc' in os.listdir(manager.archive_dir)
|
||||||
|
assert "Invalid archives weren't added: sample_archive/text_content/sample.html" in caplog.messages
|
||||||
|
|
||||||
|
def test_merge_wacz_index(self, tmp_path):
|
||||||
|
manager = self.get_test_collections_manager(tmp_path)
|
||||||
|
manager._add_wacz_index(os.path.join(manager.indexes_dir, manager.DEF_INDEX_FILE),
|
||||||
|
'sample_archive/cdxj/example.cdxj',
|
||||||
|
{'example.warc.gz': 'rewritten.warc.gz'})
|
||||||
|
with open(os.path.join(manager.indexes_dir, manager.DEF_INDEX_FILE), 'r') as f:
|
||||||
|
index_content = f.read()
|
||||||
|
index_content = index_content.strip()
|
||||||
|
|
||||||
|
assert 'example.warc.gz' not in index_content
|
||||||
|
assert 'rewritten.warc.gz' in index_content
|
||||||
|
|
||||||
|
# check that collection index is sorted
|
||||||
|
index_lines = index_content.split('\n')
|
||||||
|
assert sorted(index_lines) == index_lines
|
||||||
|
|
||||||
|
def test_merge_wacz_index_gzip(self, tmp_path):
|
||||||
|
manager = self.get_test_collections_manager(tmp_path)
|
||||||
|
manager._add_wacz_index(os.path.join(manager.indexes_dir, manager.DEF_INDEX_FILE),
|
||||||
|
'sample_archive/cdxj/example.cdx.gz',
|
||||||
|
{'example-collection.warc': 'rewritten.warc'})
|
||||||
|
with open(os.path.join(manager.indexes_dir, manager.DEF_INDEX_FILE), 'r') as f:
|
||||||
|
index_content = f.read()
|
||||||
|
index_content = index_content.strip()
|
||||||
|
|
||||||
|
assert 'example-collection.warc' not in index_content
|
||||||
|
assert 'rewritten.warc' in index_content
|
||||||
|
|
||||||
|
# check that collection index is sorted
|
||||||
|
index_lines = index_content.split('\n')
|
||||||
|
assert sorted(index_lines) == index_lines
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_test_collections_manager(collections_path):
|
||||||
|
manager = CollectionsManager(TEST_COLLECTION_NAME, colls_dir=collections_path, must_exist=False)
|
||||||
|
manager.add_collection()
|
||||||
|
return manager
|
7
tox.ini
7
tox.ini
@ -4,23 +4,24 @@ testpaths =
|
|||||||
tests
|
tests
|
||||||
|
|
||||||
[tox]
|
[tox]
|
||||||
envlist = py36, py37, py38, py39, py310
|
envlist = py37, py38, py39, py310, py311
|
||||||
|
|
||||||
[gh-actions]
|
[gh-actions]
|
||||||
python =
|
python =
|
||||||
3.6: py36
|
|
||||||
3.7: py37
|
3.7: py37
|
||||||
3.8: py38
|
3.8: py38
|
||||||
3.9: py39
|
3.9: py39
|
||||||
3.10: py310
|
3.10: py310
|
||||||
|
3.11: py311
|
||||||
|
|
||||||
[testenv]
|
[testenv]
|
||||||
setenv = PYWB_NO_VERIFY_SSL = 1
|
setenv = PYWB_NO_VERIFY_SSL = 1
|
||||||
|
passenv = *
|
||||||
deps =
|
deps =
|
||||||
-rtest_requirements.txt
|
-rtest_requirements.txt
|
||||||
-rrequirements.txt
|
-rrequirements.txt
|
||||||
-rextra_requirements.txt
|
-rextra_requirements.txt
|
||||||
commands =
|
commands =
|
||||||
py.test --cov-config .coveragerc --cov pywb -v --doctest-modules ./pywb/ tests/
|
pytest --cov-config .coveragerc --cov pywb -v --doctest-modules ./pywb/ tests/
|
||||||
|
|
||||||
|
|
||||||
|
2
wombat
2
wombat
@ -1 +1 @@
|
|||||||
Subproject commit 74a6087d41d335c371e3a1f52f0f008944705118
|
Subproject commit 20596ca1e66928cae6f309af781f961aa112ca7f
|
Loading…
x
Reference in New Issue
Block a user