1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

2.4.2 Develop->Master (#572)

* ensure that the RemoteCDXIndexSource also adds a 'matchType=' param, fix for ukwa-pywb/ukwa#57

* 2.4.2 fixes:
- cdxindexer: don't treat first param as output, require '-o <output>' instead, update tests
- cleanup: move url-polyfill.min.js to correct static dir, addresses #571
- update to latest wombat
- move logo to ./pywb/static, fix README path
- tests: update indexing tests for cdx-indexer fix
- bump version to 2.4.2
- Fix link in access-control docs to use RST instead of MD syntax (#568) (by @machawk1)
This commit is contained in:
Ilya Kreymer 2020-07-10 20:22:58 -07:00 committed by GitHub
parent 2e35c3e1ed
commit 9b8c187b3a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 43 additions and 16 deletions

View File

@ -1,3 +1,13 @@
pywb 2.4.2 changelist
~~~~~~~~~~~~~~~~~~~~~
* ensure RemoteCDXIndexSource also passes ``matchType`` to upstream
* cdx-indexer: use ``-o`` flag to specify output, not first param (output to stdout by default)
* static paths cleanup, move ``url-polyfill.min.js`` to correct dir (fixes `#571 <https://github.com/webrecorder/pywb/issues/571>`_)
pywb 2.4.1 changelist pywb 2.4.1 changelist
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~

View File

@ -3,7 +3,7 @@ Webrecorder pywb 2.4
.. raw:: html .. raw:: html
<img src="static/pywb-logo.png" width="200"/> <img src="pywb/static/pywb-logo.png" width="200"/>
.. image:: https://travis-ci.org/webrecorder/pywb.svg?branch=master .. image:: https://travis-ci.org/webrecorder/pywb.svg?branch=master
:target: https://travis-ci.org/webrecorder/pywb :target: https://travis-ci.org/webrecorder/pywb

View File

@ -49,7 +49,7 @@ Access Error Messages
The special error code 451 is used to indicate that a resource has been blocked (access setting ``block``) The special error code 451 is used to indicate that a resource has been blocked (access setting ``block``)
The [error.html](https://github.com/webrecorder/pywb/blob/master/pywb/templates/error.html) template contains a special message for this access and can be customized further. The `error.html <https://github.com/webrecorder/pywb/blob/master/pywb/templates/error.html>`_ template contains a special message for this access and can be customized further.
By design, resources that are ``exclude``-ed simply appear as 404 not found and no special error is provided. By design, resources that are ``exclude``-ed simply appear as 404 not found and no special error is provided.

View File

@ -451,7 +451,9 @@ instead of current working directory
action='store_true', action='store_true',
help=minimal_json_help) help=minimal_json_help)
parser.add_argument('output', nargs='?', default='-', help=output_help) parser.add_argument('-o', '--output',
default='-', help=output_help)
parser.add_argument('inputs', nargs='+', help=input_help) parser.add_argument('inputs', nargs='+', help=input_help)
cmd = parser.parse_args(args=args) cmd = parser.parse_args(args=args)

View File

@ -149,13 +149,13 @@ StatusAndHeadersParserException: Expected Status Line starting with ['HTTP/1.0',
#================================================================= #=================================================================
# test sort, multiple inputs # test sort, multiple inputs
>>> cli_lines(['--sort', '-', TEST_WARC_DIR]) >>> cli_lines(['--sort', '-o', '-', TEST_WARC_DIR])
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 example-url-agnostic-revisit.warc.gz com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 example-url-agnostic-revisit.warc.gz
urn:x-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 example-wpull.warc.gz urn:x-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 example-wpull.warc.gz
Total: 213 Total: 213
# test sort, multiple inputs, recursive, from base test dir # test sort, multiple inputs, recursive, from base test dir
>>> cli_lines(['--sort', '-r', '-', get_test_dir()]) >>> cli_lines(['--sort', '-r', '-o', '-', get_test_dir()])
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 warcs/example-url-agnostic-revisit.warc.gz com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 warcs/example-url-agnostic-revisit.warc.gz
urn:x-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 warcs/example-wpull.warc.gz urn:x-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 warcs/example-wpull.warc.gz
Total: 213 Total: 213
@ -167,7 +167,7 @@ urn:x-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX
Total: 408 Total: 408
# test writing to stdout # test writing to stdout
>>> cli_lines(['-', TEST_WARC_DIR + 'example.warc.gz']) >>> cli_lines([TEST_WARC_DIR + 'example.warc.gz'])
com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1043 333 example.warc.gz com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1043 333 example.warc.gz
org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz
Total: 4 Total: 4
@ -178,7 +178,7 @@ com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 20
org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz
Total: 4 Total: 4
# test custom root dir for cdx filenames, singlw warc # test custom root dir for cdx filenames, single warc
>>> cli_lines(['--dir-root', get_test_dir() + 'other/', TEST_WARC_DIR + 'example.warc.gz']) >>> cli_lines(['--dir-root', get_test_dir() + 'other/', TEST_WARC_DIR + 'example.warc.gz'])
com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1043 333 ../warcs/example.warc.gz com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1043 333 ../warcs/example.warc.gz
org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 ../warcs/example.warc.gz org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 ../warcs/example.warc.gz
@ -265,7 +265,7 @@ def cli_lines_with_dir(input_):
tmp_dir = None tmp_dir = None
tmp_dir = tempfile.mkdtemp() tmp_dir = tempfile.mkdtemp()
main([tmp_dir, input_]) main(['-o', tmp_dir, input_])
filename = cdx_filename(os.path.basename(input_)) filename = cdx_filename(os.path.basename(input_))

View File

Before

Width:  |  Height:  |  Size: 12 KiB

After

Width:  |  Height:  |  Size: 12 KiB

File diff suppressed because one or more lines are too long

View File

@ -7,7 +7,7 @@
{% block head %} {% block head %}
{{ super() }} {{ super() }}
<link rel="stylesheet" href="{{ static_prefix }}/css/query.css"> <link rel="stylesheet" href="{{ static_prefix }}/css/query.css">
<script src="{{ static_prefix }}/url-polyfill.min.js"></script> <script src="{{ static_prefix }}/js/url-polyfill.min.js"></script>
<script src="{{ static_prefix }}/query.js"></script> <script src="{{ static_prefix }}/query.js"></script>
{% endblock %} {% endblock %}

View File

@ -1,4 +1,4 @@
__version__ = '2.4.1' __version__ = '2.4.2'
if __name__ == '__main__': if __name__ == '__main__':
print(__version__) print(__version__)

View File

@ -125,6 +125,9 @@ class RemoteIndexSource(BaseIndexSource):
if 'closest' in params and self.closest_limit: if 'closest' in params and self.closest_limit:
api_url += '&limit=' + str(self.closest_limit) api_url += '&limit=' + str(self.closest_limit)
if 'matchType' in params:
api_url += '&matchType=' + params.get('matchType')
return api_url return api_url
def load_index(self, params): def load_index(self, params):

View File

@ -110,6 +110,17 @@ com,instagram)/amaliaulman 20141014171954 https://webenact.rhizome.org/all/20141
assert(key_ts_res(res, 'load_url') == expected) assert(key_ts_res(res, 'load_url') == expected)
assert(errs == {}) assert(errs == {})
# Url Match -- Remote Loaders
def test_remote_loader_with_prefix(self):
url = 'http://instagram.com/amaliaulman?__=1234234234'
remote_source = self.all_sources['remote_cdx']
res, errs = self.query_single_source(remote_source, dict(url=url, closest='20141014162332', limit=1, allowFuzzy='0'))
expected = """\
com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman"""
assert(key_ts_res(res, 'load_url') == expected)
assert(errs == {})
# Url Match -- Remote Loaders Closest # Url Match -- Remote Loaders Closest
def test_remote_closest_loader(self, remote_source): def test_remote_closest_loader(self, remote_source):
@ -123,7 +134,7 @@ com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141
assert(errs == {}) assert(errs == {})
# Url Match -- Wb Memento # Url Match -- Wb Memento
def test_remote_closest_wb_memnto_loader(self): def test_remote_closest_wb_memento_loader(self):
replay = 'https://webenact.rhizome.org/all/{timestamp}id_/{url}' replay = 'https://webenact.rhizome.org/all/{timestamp}id_/{url}'
source = WBMementoIndexSource(replay, '', replay) source = WBMementoIndexSource(replay, '', replay)

View File

@ -313,7 +313,8 @@ class TestManagedColls(CollsDirMixin, BaseConfigTest):
assert resp.status_int == 200 assert resp.status_int == 200
assert resp.content_type == 'text/html' assert resp.content_type == 'text/html'
assert 'overriden search page: ' in resp.text assert 'overriden search page: ' in resp.text
assert '"some":"value"' in resp.text print(resp.text)
assert '"some":"value"' in resp.text, resp.text
def test_replay_banner_metadata(self, fmod): def test_replay_banner_metadata(self, fmod):
""" Test adding metadata in replay banner (both framed and non-frame) """ Test adding metadata in replay banner (both framed and non-frame)
@ -429,10 +430,10 @@ class TestManagedColls(CollsDirMixin, BaseConfigTest):
os.mkdir(migrate_dir) os.mkdir(migrate_dir)
cdxindexer_main(['-u', migrate_dir, self._get_sample_warc('')]) cdxindexer_main(['-u', '-o', migrate_dir, self._get_sample_warc('')])
# try one file with -9 # try one file with -9
cdxindexer_main(['-u', '-9', migrate_dir, self._get_sample_warc('example.warc.gz')]) cdxindexer_main(['-u', '-9', '-o', migrate_dir, self._get_sample_warc('example.warc.gz')])
cdxs = os.listdir(migrate_dir) cdxs = os.listdir(migrate_dir)
assert all(x.endswith('.cdx') for x in cdxs) assert all(x.endswith('.cdx') for x in cdxs)

2
wombat

@ -1 +1 @@
Subproject commit b05b406b331050318caffa84f27348f689bdf53a Subproject commit 3f04dcdcb071042d498c4912599454a15c11f0e4