mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Docs, custom metadata improvements (#509)
* metadata/coll_config: don't confuse user metadata with collection config, don't display collection config settings as metadata (ukwa/ukwa-pywb#47) - for collection template, add separate 'coll_config' dict, keep user metadata only in 'metadata' dict (default to empty) - for static collections, assume metadata is in the 'metadata' dict of collection config - for dynamic collections, load metadata.yaml into 'metadata' dict - ensure 'metadata' key is passed to frame_insert - ensure 'metadata' added consistently in framed and non-framed mode - tests: update tests to ensure metadata is added consistently - fuzzymatch: don't match 204 OPTIONS responses, update fuzzymatcher test * documentation - add documentation for metadata in ui-customization, rebuild docs, - add link to ui customization from configuring - work on access control docs * fixed small typo's in ui-customization.rst * frontendapp: fix doc string - misc: remove warning on urllib3 Retry init - set version to pywb 2.4.0rc0 Co-Authored-By: John Berlin <n0tan3rd@gmail.com>
This commit is contained in:
parent
35004c1675
commit
6f79840b79
10
README.rst
10
README.rst
@ -1,4 +1,4 @@
|
|||||||
Webrecorder pywb 2.3
|
Webrecorder pywb 2.4
|
||||||
====================
|
====================
|
||||||
|
|
||||||
.. image:: https://travis-ci.org/webrecorder/pywb.svg?branch=master
|
.. image:: https://travis-ci.org/webrecorder/pywb.svg?branch=master
|
||||||
@ -41,6 +41,8 @@ The 2.x release included a major overhaul of pywb and introduces many new featur
|
|||||||
|
|
||||||
* Improved 'calendar' query UI, grouping results by year and month, and updated replay banner.
|
* Improved 'calendar' query UI, grouping results by year and month, and updated replay banner.
|
||||||
|
|
||||||
|
* New with 2.4: An extensinble access control system.
|
||||||
|
|
||||||
|
|
||||||
Please see the `full documentation <https://pywb.readthedocs.org>`_ for more detailed info on all these features.
|
Please see the `full documentation <https://pywb.readthedocs.org>`_ for more detailed info on all these features.
|
||||||
|
|
||||||
@ -67,12 +69,6 @@ Contributions & Bug Reports
|
|||||||
|
|
||||||
Users are encouraged to fork and contribute to this project to keep improving web archiving tools.
|
Users are encouraged to fork and contribute to this project to keep improving web archiving tools.
|
||||||
|
|
||||||
A few key features are high on list of priorities, but have not yet been implemented, including:
|
|
||||||
|
|
||||||
* Url Exclusion System
|
|
||||||
|
|
||||||
* UI Improvements
|
|
||||||
|
|
||||||
If you are interested in contributing, especially to any of these areas, please let us know!
|
If you are interested in contributing, especially to any of these areas, please let us know!
|
||||||
|
|
||||||
Otherwise, please take a look at `list of current issues <https://github.com/webrecorder/pywb/issues>`_ and feel free to open new ones about any aspect of pywb, including the new documentation.
|
Otherwise, please take a look at `list of current issues <https://github.com/webrecorder/pywb/issues>`_ and feel free to open new ones about any aspect of pywb, including the new documentation.
|
||||||
|
@ -4,6 +4,14 @@ pywb\.manager package
|
|||||||
Submodules
|
Submodules
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
pywb\.manager\.aclmanager module
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
|
.. automodule:: pywb.manager.aclmanager
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
pywb\.manager\.autoindex module
|
pywb\.manager\.autoindex module
|
||||||
-------------------------------
|
-------------------------------
|
||||||
|
|
||||||
|
@ -100,6 +100,14 @@ pywb\.rewrite\.rewrite\_hls module
|
|||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
pywb\.rewrite\.rewrite\_js\_workers module
|
||||||
|
------------------------------------------
|
||||||
|
|
||||||
|
.. automodule:: pywb.rewrite.rewrite_js_workers
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
pywb\.rewrite\.rewriteinputreq module
|
pywb\.rewrite\.rewriteinputreq module
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
|
|
||||||
|
@ -14,6 +14,18 @@ Subpackages
|
|||||||
pywb.utils
|
pywb.utils
|
||||||
pywb.warcserver
|
pywb.warcserver
|
||||||
|
|
||||||
|
Submodules
|
||||||
|
----------
|
||||||
|
|
||||||
|
pywb\.version module
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
.. automodule:: pywb.version
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
||||||
Module contents
|
Module contents
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
|
@ -60,6 +60,14 @@ pywb\.utils\.memento module
|
|||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
pywb\.utils\.merge module
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
.. automodule:: pywb.utils.merge
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
pywb\.utils\.wbexception module
|
pywb\.utils\.wbexception module
|
||||||
-------------------------------
|
-------------------------------
|
||||||
|
|
||||||
|
@ -12,6 +12,22 @@ Subpackages
|
|||||||
Submodules
|
Submodules
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
pywb\.warcserver\.access\_checker module
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
.. automodule:: pywb.warcserver.access_checker
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
pywb\.warcserver\.amf module
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
.. automodule:: pywb.warcserver.amf
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
pywb\.warcserver\.basewarcserver module
|
pywb\.warcserver\.basewarcserver module
|
||||||
---------------------------------------
|
---------------------------------------
|
||||||
|
|
||||||
|
148
docs/manual/access-control.rst
Normal file
148
docs/manual/access-control.rst
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
.. _access-control:
|
||||||
|
|
||||||
|
Access Control System
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
The access controls system allows for a flexible configuration of rules to allow,
|
||||||
|
block or exclude access to individual urls by longest-prefix match.
|
||||||
|
|
||||||
|
Access Control Files (.aclj)
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
Access controls are set in one or more access control json files (.aclj), sorted in reverse alphabetical order.
|
||||||
|
To determine the best match, a binary search is used (similar to CDXJ) lookup and then the best match is found forward.
|
||||||
|
|
||||||
|
An .aclj file may look as follows::
|
||||||
|
|
||||||
|
org,httpbin)/anything/something - {"access": "allow", "url": "http://httpbin.org/anything/something"}
|
||||||
|
org,httpbin)/anything - {"access": "exclude", "url": "http://httpbin.org/anything"}
|
||||||
|
org,httpbin)/ - {"access": "block", "url": "httpbin.org/"}
|
||||||
|
com, - {"access": "allow", "url": "com,"}
|
||||||
|
|
||||||
|
|
||||||
|
Each JSON entry contains an ``access`` field and the original ``url`` field that was used to convert to the SURT (if any).
|
||||||
|
|
||||||
|
The prefix consists of a SURT key and a ``-`` (currently reserved for a timestamp/date range field to be added later)
|
||||||
|
|
||||||
|
Given these rules, a user would:
|
||||||
|
* be allowed to visit ``http://httpbin.org/anything/something`` (allow)
|
||||||
|
* but would receive an 'access blocked' error message when viewing ``http://httpbin.org/`` (block)
|
||||||
|
* would receive a 404 not found error when viewing ``http://httpbin.org/anything`` (exclude)
|
||||||
|
|
||||||
|
|
||||||
|
Access Types: allow, block, exclude
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
The available access types are as follows:
|
||||||
|
|
||||||
|
- ``exclude`` - when matched, results are excluded from the index, as if they do not exist. User will receive a 404.
|
||||||
|
- ``block`` - when matched, results are not excluded from the index, marked with ``access: block``, but access to the actual is blocked. User will see a 451
|
||||||
|
- ``allow`` - full access to the index and the resource.
|
||||||
|
|
||||||
|
The difference between ``exclude`` and ``block`` is that when blocked, the user can be notified that access is blocked, while
|
||||||
|
with exclude, no trace of the resource is presented to the user.
|
||||||
|
|
||||||
|
The use of ``allow`` is useful to provide access to more specific resources within a broader block/exclude rule.
|
||||||
|
|
||||||
|
Access Error Messages
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
The special error code 451 is used to indicate that a resource has been blocked (access setting ``block``)
|
||||||
|
|
||||||
|
The [error.html](https://github.com/webrecorder/pywb/blob/master/pywb/templates/error.html) template contains a special message for this access and can be customized further.
|
||||||
|
|
||||||
|
By design, resources that are ``exclude``-ed simply appear as 404 not found and no special error is provided.
|
||||||
|
|
||||||
|
|
||||||
|
Managing Access Lists via Command-Line
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
The .aclj files need not ever be added or edited manually.
|
||||||
|
|
||||||
|
The pywb ``wb-manager`` utility has been extended to provide tools for adding, removing and checking access control rules.
|
||||||
|
|
||||||
|
The access rules are written to ``<collection>/acl/access-rules.acl`` for a given collection ``<collection>`` for automatic collections.
|
||||||
|
|
||||||
|
For example, to add the first line to an ACL file ``access.aclj``, one could run::
|
||||||
|
|
||||||
|
wb-manager acl add <collection> http://httpbin.org/anything/something exclude
|
||||||
|
|
||||||
|
|
||||||
|
The URL supplied can be a URL or a SURT prefix. If a SURT is supplied, it is used as is::
|
||||||
|
|
||||||
|
wb-manager acl add <collection> com, allow
|
||||||
|
|
||||||
|
|
||||||
|
By default, access control rules apply to a prefix of a given URL or SURT.
|
||||||
|
|
||||||
|
To have the rule apply only to the exact match, use::
|
||||||
|
|
||||||
|
wb-manager acl add <collection> http://httpbin.org/anything/something allow --exact-match
|
||||||
|
|
||||||
|
Rules added with and without the ``--exact-match`` flag are considered distinct rules, and can be added
|
||||||
|
and removed separately.
|
||||||
|
|
||||||
|
With the above rules, ``http://httpbin.org/anything/something`` would be allowed, but
|
||||||
|
``http://httpbin.org/anything/something/subpath`` would be excluded for any ``subpath``.
|
||||||
|
|
||||||
|
To remove a rule, one can run::
|
||||||
|
|
||||||
|
wb-manager acl remove <collection> http://httpbin.org/anything/something
|
||||||
|
|
||||||
|
To import rules in bulk, such as from an OpenWayback-style excludes.txt and mark them as ``exclude``::
|
||||||
|
|
||||||
|
wb-manager acl importtxt <collection> ./excludes.txt exclude
|
||||||
|
|
||||||
|
|
||||||
|
See ``wb-manager acl -h`` for a list of additional commands such as for validating rules files and running a match against
|
||||||
|
an existing rule set.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Access Controls for Custom Collections
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
For manually configured collections, there are additional options for configuring access controls.
|
||||||
|
The access control files can be specified explicitly using the ``acl_paths`` key and allow specifying multiple ACL files,
|
||||||
|
and allowing sharing access control files between different collections.
|
||||||
|
|
||||||
|
Single ACLJ::
|
||||||
|
|
||||||
|
collections:
|
||||||
|
test:
|
||||||
|
acl_paths: ./path/to/file.aclj
|
||||||
|
default_access: block
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Multiple ACLJ::
|
||||||
|
|
||||||
|
collections:
|
||||||
|
test:
|
||||||
|
acl_paths:
|
||||||
|
- ./path/to/allows.aclj
|
||||||
|
- ./path/to/blocks.aclj
|
||||||
|
- ./path/to/other.aclj
|
||||||
|
- ./path/to/directory
|
||||||
|
|
||||||
|
default_access: block
|
||||||
|
|
||||||
|
The ``acl_paths`` can be a single entry or a list, and can also include directories. If a directory is specified, all ``.aclj`` files
|
||||||
|
in the directory are checked.
|
||||||
|
|
||||||
|
When finding the best rule from multiple ``.aclj`` files, each file is binary searched and the result
|
||||||
|
set merge-sorted to find the best match (very similar to the CDXJ index lookup).
|
||||||
|
|
||||||
|
Note: It might make sense to separate ``allows.aclj`` and ``blocks.aclj`` into individual files for organizational reasons,
|
||||||
|
but there is no specific need to keep more than one access control files.
|
||||||
|
|
||||||
|
Default Access
|
||||||
|
^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
An additional ``default_access`` setting can be added to specify the default rule if no other rules match for custom collections.
|
||||||
|
If omitted, this setting is ``default_access: allow``, which is usually the desired default.
|
||||||
|
|
||||||
|
Setting ``default_access: block`` and providing a list of ``allow`` rules provides a flexible way to allow access
|
||||||
|
to only a limited set of resources, and block access to anything out of scope by default.
|
||||||
|
|
||||||
|
|
@ -105,6 +105,11 @@ When resolving a ``example.warc.gz``, pywb will then check (in order):
|
|||||||
* Then, ``http://remote-backup.example.com/collections/<coll name>/example.warc.gz`` (if first lookup unsuccessful)
|
* Then, ``http://remote-backup.example.com/collections/<coll name>/example.warc.gz`` (if first lookup unsuccessful)
|
||||||
|
|
||||||
|
|
||||||
|
UI Customizations
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
See :ref:`ui-customization` for more details on how to customize the UI.
|
||||||
|
|
||||||
|
|
||||||
Special and Custom Collections
|
Special and Custom Collections
|
||||||
------------------------------
|
------------------------------
|
||||||
|
@ -1,34 +1,26 @@
|
|||||||
.. _configuring-pywb-ui:
|
.. _ui-customizations:
|
||||||
|
|
||||||
UI Customizations
|
UI Customizations
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
pywb supports UI customizations, either for an entire archive,
|
pywb supports UI customizations, either for an entire archive,
|
||||||
or per-collection.
|
or per-collection. Jinja2 templates are used for rendering all views,
|
||||||
|
and static files can also be added as needed.
|
||||||
Static Files
|
|
||||||
^^^^^^^^^^^^
|
|
||||||
|
|
||||||
The replay server will automatically support static files placed under the following directories:
|
|
||||||
|
|
||||||
* Files under the root ``static`` directory can be accessed via ``http://my-archive.example.com/static/<filename>``
|
|
||||||
|
|
||||||
* Files under the per-collection ``./collections/<coll name>/static`` directory can be accessed via ``http://my-archive.example.com/static/_/<coll name>/<filename>``
|
|
||||||
|
|
||||||
Templates
|
Templates
|
||||||
^^^^^^^^^
|
^^^^^^^^^
|
||||||
|
|
||||||
pywb users Jinja2 templates to render HTML to render the HTML for all aspects of the application.
|
Default templates, listed below, are found in the ``./pywb/templates/`` directory.
|
||||||
|
|
||||||
A version placed in the ``templates`` directory, either in the root or per collection, will override that template.
|
Custom template files placed in the ``templates`` directory, either in the root or per collection, will override that template.
|
||||||
|
|
||||||
To copy the default pywb template to the template directory run:
|
To copy the default pywb template to the template directory using the cli tools, run:
|
||||||
|
|
||||||
``wb-manager template --add search_html``
|
``wb-manager template --add search_html``
|
||||||
|
|
||||||
The following templates are available:
|
The following page-level templates are available, corresponding to home page, collection page or search results:
|
||||||
|
|
||||||
* ``home.html`` -- Home Page Template, used for ``http://my-archive.example.com/``
|
* ``index.html`` -- Home Page Template, used for ``http://my-archive.example.com/``
|
||||||
|
|
||||||
* ``search.html`` -- Collection Template, used for each collection page ``http://my-archive.example.com/<coll name>/``
|
* ``search.html`` -- Collection Template, used for each collection page ``http://my-archive.example.com/<coll name>/``
|
||||||
|
|
||||||
@ -50,8 +42,8 @@ Replay and Banner templates:
|
|||||||
* ``banner.html`` -- The banner used for frameless replay. Can be set to blank to disable the banner.
|
* ``banner.html`` -- The banner used for frameless replay. Can be set to blank to disable the banner.
|
||||||
|
|
||||||
|
|
||||||
For those looking to customize the default template(s) when deploying pywb, the following templates located in the
|
To customize the default pywb UI across multiple pages, the following generic templates
|
||||||
pywb/templates directory.
|
can also be overriden:
|
||||||
|
|
||||||
* ``base.html`` -- The base template used for non-replay related pages.
|
* ``base.html`` -- The base template used for non-replay related pages.
|
||||||
|
|
||||||
@ -74,6 +66,40 @@ The ``base.html`` template also provides five blocks that can be supplied by tem
|
|||||||
|
|
||||||
* ``footer`` -- Block for adding content to the ``<body>`` after the ``body`` block, includes the ``footer.html`` template
|
* ``footer`` -- Block for adding content to the ``<body>`` after the ``body`` block, includes the ``footer.html`` template
|
||||||
|
|
||||||
|
Static Files
|
||||||
|
^^^^^^^^^^^^
|
||||||
|
|
||||||
|
The pywb server will automatically support static files placed under the following directories:
|
||||||
|
|
||||||
|
* Files under the root ``static`` directory can be accessed via ``http://my-archive.example.com/static/<filename>``
|
||||||
|
|
||||||
|
* Files under the per-collection ``./collections/<coll name>/static`` directory can be accessed via ``http://my-archive.example.com/static/_/<coll name>/<filename>``
|
||||||
|
|
||||||
|
|
||||||
|
Custom Metadata
|
||||||
|
^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
It is possible to also add custom metadata that will be available in the Jinja2 template.
|
||||||
|
|
||||||
|
For dynamic collections, any fields placed under ``<coll_name>/metadata.yaml`` filed can be accessed
|
||||||
|
|
||||||
|
via the ``{{ metadata }}`` variable.
|
||||||
|
|
||||||
|
For example, if metadata file contains:
|
||||||
|
|
||||||
|
.. ex-block:: yaml
|
||||||
|
|
||||||
|
somedata: value
|
||||||
|
|
||||||
|
Accessing ``{{ metadata.somedata }}`` will resolve to ``value``
|
||||||
|
|
||||||
|
The metadata can also be added via commandline: ``wb-manager metadata myCollection --set somedata=value]``
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
The default collection UI template (search.html) currently lists all of the available metadata fields.
|
||||||
|
|
||||||
|
|
||||||
Custom Outer Replay Frame
|
Custom Outer Replay Frame
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
@ -289,25 +289,22 @@ class FrontEndApp(object):
|
|||||||
except Exception:
|
except Exception:
|
||||||
self.raise_not_found(environ, 'static_file_not_found', filepath)
|
self.raise_not_found(environ, 'static_file_not_found', filepath)
|
||||||
|
|
||||||
def get_metadata(self, coll):
|
def get_coll_config(self, coll):
|
||||||
"""Retrieve the metadata associated with a collection
|
"""Retrieve the collection config, including metadata, associated with a collection
|
||||||
|
|
||||||
:param str coll: The name of the collection to receive metadata for
|
:param str coll: The name of the collection to receive config info for
|
||||||
:return: The collections metadata if it exists
|
:return: The collections config
|
||||||
:rtype: dict
|
:rtype: dict
|
||||||
"""
|
"""
|
||||||
# if coll == self.all_coll:
|
coll_config = {'coll': coll,
|
||||||
# coll = '*'
|
'type': 'replay'}
|
||||||
|
|
||||||
metadata = {'coll': coll,
|
|
||||||
'type': 'replay'}
|
|
||||||
|
|
||||||
if coll in self.warcserver.list_fixed_routes():
|
if coll in self.warcserver.list_fixed_routes():
|
||||||
metadata.update(self.warcserver.get_coll_config(coll))
|
coll_config.update(self.warcserver.get_coll_config(coll))
|
||||||
else:
|
else:
|
||||||
metadata.update(self.metadata_cache.load(coll))
|
coll_config['metadata'] = self.metadata_cache.load(coll) or {}
|
||||||
|
|
||||||
return metadata
|
return coll_config
|
||||||
|
|
||||||
def serve_coll_page(self, environ, coll='$root'):
|
def serve_coll_page(self, environ, coll='$root'):
|
||||||
"""Render and serve a collections search page (search.html).
|
"""Render and serve a collections search page (search.html).
|
||||||
@ -322,7 +319,8 @@ class FrontEndApp(object):
|
|||||||
|
|
||||||
self.setup_paths(environ, coll)
|
self.setup_paths(environ, coll)
|
||||||
|
|
||||||
metadata = self.get_metadata(coll)
|
coll_config = self.get_coll_config(coll)
|
||||||
|
metadata = coll_config.get('metadata')
|
||||||
|
|
||||||
view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html')
|
view = BaseInsertView(self.rewriterapp.jinja_env, 'search.html')
|
||||||
|
|
||||||
@ -332,8 +330,9 @@ class FrontEndApp(object):
|
|||||||
|
|
||||||
content = view.render_to_string(environ,
|
content = view.render_to_string(environ,
|
||||||
wb_prefix=wb_prefix,
|
wb_prefix=wb_prefix,
|
||||||
metadata=metadata,
|
coll=coll,
|
||||||
coll=coll)
|
coll_config=coll_config,
|
||||||
|
metadata=metadata)
|
||||||
|
|
||||||
return WbResponse.text_response(content, content_type='text/html; charset="utf-8"')
|
return WbResponse.text_response(content, content_type='text/html; charset="utf-8"')
|
||||||
|
|
||||||
@ -409,16 +408,16 @@ class FrontEndApp(object):
|
|||||||
if environ.get('QUERY_STRING'):
|
if environ.get('QUERY_STRING'):
|
||||||
wb_url_str += '?' + environ.get('QUERY_STRING')
|
wb_url_str += '?' + environ.get('QUERY_STRING')
|
||||||
|
|
||||||
metadata = self.get_metadata(coll)
|
coll_config = self.get_coll_config(coll)
|
||||||
if record:
|
if record:
|
||||||
metadata['type'] = 'record'
|
coll_config['type'] = 'record'
|
||||||
|
|
||||||
if timemap_output:
|
if timemap_output:
|
||||||
metadata['output'] = timemap_output
|
coll_config['output'] = timemap_output
|
||||||
# ensure that the timemap path information is not included
|
# ensure that the timemap path information is not included
|
||||||
wb_url_str = wb_url_str.replace('timemap/{0}/'.format(timemap_output), '')
|
wb_url_str = wb_url_str.replace('timemap/{0}/'.format(timemap_output), '')
|
||||||
|
|
||||||
return self.rewriterapp.render_content(wb_url_str, metadata, environ)
|
return self.rewriterapp.render_content(wb_url_str, coll_config, environ)
|
||||||
|
|
||||||
def setup_paths(self, environ, coll, record=False):
|
def setup_paths(self, environ, coll, record=False):
|
||||||
"""Populates the WSGI environment dictionary with the path information necessary to perform a response for
|
"""Populates the WSGI environment dictionary with the path information necessary to perform a response for
|
||||||
|
@ -493,6 +493,7 @@ class RewriterApp(object):
|
|||||||
framed_replay,
|
framed_replay,
|
||||||
coll=kwargs.get('coll', ''),
|
coll=kwargs.get('coll', ''),
|
||||||
replay_mod=self.replay_mod,
|
replay_mod=self.replay_mod,
|
||||||
|
metadata=kwargs.get('metadata', {}),
|
||||||
config=self.config))
|
config=self.config))
|
||||||
|
|
||||||
cookie_rewriter = None
|
cookie_rewriter = None
|
||||||
@ -835,7 +836,7 @@ class RewriterApp(object):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
def get_top_frame_params(self, wb_url, kwargs):
|
def get_top_frame_params(self, wb_url, kwargs):
|
||||||
return None
|
return {'metadata': kwargs.get('metadata', {})}
|
||||||
|
|
||||||
def handle_custom_response(self, environ, wb_url, full_prefix, host_prefix, kwargs):
|
def handle_custom_response(self, environ, wb_url, full_prefix, host_prefix, kwargs):
|
||||||
if kwargs.get('output'):
|
if kwargs.get('output'):
|
||||||
|
@ -18,6 +18,7 @@ html, body
|
|||||||
|
|
||||||
</head>
|
</head>
|
||||||
<body style="margin: 0px; padding: 0px;">
|
<body style="margin: 0px; padding: 0px;">
|
||||||
|
|
||||||
<div id="wb_iframe_div">
|
<div id="wb_iframe_div">
|
||||||
<iframe id="replay_iframe" frameborder="0" seamless="seamless" scrolling="yes" class="wb_iframe"></iframe>
|
<iframe id="replay_iframe" frameborder="0" seamless="seamless" scrolling="yes" class="wb_iframe"></iframe>
|
||||||
</div>
|
</div>
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__version__ = '2.4.0'
|
__version__ = '2.4.0rc0'
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(__version__)
|
print(__version__)
|
||||||
|
@ -4,6 +4,7 @@ import requests
|
|||||||
import six.moves.http_client
|
import six.moves.http_client
|
||||||
from requests.adapters import DEFAULT_POOLBLOCK, HTTPAdapter
|
from requests.adapters import DEFAULT_POOLBLOCK, HTTPAdapter
|
||||||
from urllib3.poolmanager import PoolManager
|
from urllib3.poolmanager import PoolManager
|
||||||
|
from urllib3.util.retry import Retry
|
||||||
|
|
||||||
six.moves.http_client._MAXHEADERS = 10000
|
six.moves.http_client._MAXHEADERS = 10000
|
||||||
six.moves.http_client._MAXLINE = 131072
|
six.moves.http_client._MAXLINE = 131072
|
||||||
@ -41,8 +42,8 @@ class PywbHttpAdapter(HTTPAdapter):
|
|||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
class DefaultAdapters(object):
|
class DefaultAdapters(object):
|
||||||
live_adapter = PywbHttpAdapter(max_retries=3)
|
live_adapter = PywbHttpAdapter(max_retries=Retry(3))
|
||||||
remote_adapter = PywbHttpAdapter(max_retries=3)
|
remote_adapter = PywbHttpAdapter(max_retries=Retry(3))
|
||||||
|
|
||||||
|
|
||||||
requests.packages.urllib3.disable_warnings()
|
requests.packages.urllib3.disable_warnings()
|
||||||
|
@ -194,6 +194,11 @@ class FuzzyMatcher(object):
|
|||||||
check_query = False
|
check_query = False
|
||||||
url_no_query, ext = self.get_ext(url)
|
url_no_query, ext = self.get_ext(url)
|
||||||
|
|
||||||
|
# don't fuzzy match to 204
|
||||||
|
if cdx.get('status') == '204':
|
||||||
|
if '__pywb_method=options' in cdx['urlkey']:
|
||||||
|
return False
|
||||||
|
|
||||||
# check ext
|
# check ext
|
||||||
if ext and ext not in self.default_filters['not_exts']:
|
if ext and ext not in self.default_filters['not_exts']:
|
||||||
check_query = True
|
check_query = True
|
||||||
|
@ -283,6 +283,7 @@ class TestManagedColls(CollsDirMixin, BaseConfigTest):
|
|||||||
|
|
||||||
with open(banner_file, 'w+b') as fh:
|
with open(banner_file, 'w+b') as fh:
|
||||||
fh.write(b'<div>Custom Banner Here!</div>')
|
fh.write(b'<div>Custom Banner Here!</div>')
|
||||||
|
fh.write(b'\n{{ metadata | tojson }}')
|
||||||
|
|
||||||
def test_add_custom_banner_replay(self, fmod):
|
def test_add_custom_banner_replay(self, fmod):
|
||||||
resp = self.get('/test/20140103030321/http://example.com/?example=1', fmod)
|
resp = self.get('/test/20140103030321/http://example.com/?example=1', fmod)
|
||||||
@ -314,6 +315,13 @@ class TestManagedColls(CollsDirMixin, BaseConfigTest):
|
|||||||
assert 'overriden search page: ' in resp.text
|
assert 'overriden search page: ' in resp.text
|
||||||
assert '"some":"value"' in resp.text
|
assert '"some":"value"' in resp.text
|
||||||
|
|
||||||
|
def test_replay_banner_metadata(self, fmod):
|
||||||
|
""" Test adding metadata in replay banner (both framed and non-frame)
|
||||||
|
"""
|
||||||
|
resp = self.get('/test/20140103030321{0}/http://example.com/?example=1', fmod)
|
||||||
|
assert '<div>Custom Banner Here!</div>' in resp.text
|
||||||
|
assert '"some":"value"' in resp.text
|
||||||
|
|
||||||
def test_more_custom_templates_replay(self, fmod):
|
def test_more_custom_templates_replay(self, fmod):
|
||||||
resp = self.get('/test/20140103030321{0}/http://example.com/?example=1', fmod)
|
resp = self.get('/test/20140103030321{0}/http://example.com/?example=1', fmod)
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
|
Loading…
x
Reference in New Issue
Block a user