mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
2.4.2 Develop->Master (#572)
* ensure that the RemoteCDXIndexSource also adds a 'matchType=' param, fix for ukwa-pywb/ukwa#57 * 2.4.2 fixes: - cdxindexer: don't treat first param as output, require '-o <output>' instead, update tests - cleanup: move url-polyfill.min.js to correct static dir, addresses #571 - update to latest wombat - move logo to ./pywb/static, fix README path - tests: update indexing tests for cdx-indexer fix - bump version to 2.4.2 - Fix link in access-control docs to use RST instead of MD syntax (#568) (by @machawk1)
This commit is contained in:
parent
2e35c3e1ed
commit
9b8c187b3a
10
CHANGES.rst
10
CHANGES.rst
@ -1,3 +1,13 @@
|
|||||||
|
pywb 2.4.2 changelist
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
* ensure RemoteCDXIndexSource also passes ``matchType`` to upstream
|
||||||
|
|
||||||
|
* cdx-indexer: use ``-o`` flag to specify output, not first param (output to stdout by default)
|
||||||
|
|
||||||
|
* static paths cleanup, move ``url-polyfill.min.js`` to correct dir (fixes `#571 <https://github.com/webrecorder/pywb/issues/571>`_)
|
||||||
|
|
||||||
|
|
||||||
pywb 2.4.1 changelist
|
pywb 2.4.1 changelist
|
||||||
~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ Webrecorder pywb 2.4
|
|||||||
|
|
||||||
.. raw:: html
|
.. raw:: html
|
||||||
|
|
||||||
<img src="static/pywb-logo.png" width="200"/>
|
<img src="pywb/static/pywb-logo.png" width="200"/>
|
||||||
|
|
||||||
.. image:: https://travis-ci.org/webrecorder/pywb.svg?branch=master
|
.. image:: https://travis-ci.org/webrecorder/pywb.svg?branch=master
|
||||||
:target: https://travis-ci.org/webrecorder/pywb
|
:target: https://travis-ci.org/webrecorder/pywb
|
||||||
|
@ -49,7 +49,7 @@ Access Error Messages
|
|||||||
|
|
||||||
The special error code 451 is used to indicate that a resource has been blocked (access setting ``block``)
|
The special error code 451 is used to indicate that a resource has been blocked (access setting ``block``)
|
||||||
|
|
||||||
The [error.html](https://github.com/webrecorder/pywb/blob/master/pywb/templates/error.html) template contains a special message for this access and can be customized further.
|
The `error.html <https://github.com/webrecorder/pywb/blob/master/pywb/templates/error.html>`_ template contains a special message for this access and can be customized further.
|
||||||
|
|
||||||
By design, resources that are ``exclude``-ed simply appear as 404 not found and no special error is provided.
|
By design, resources that are ``exclude``-ed simply appear as 404 not found and no special error is provided.
|
||||||
|
|
||||||
|
@ -451,7 +451,9 @@ instead of current working directory
|
|||||||
action='store_true',
|
action='store_true',
|
||||||
help=minimal_json_help)
|
help=minimal_json_help)
|
||||||
|
|
||||||
parser.add_argument('output', nargs='?', default='-', help=output_help)
|
parser.add_argument('-o', '--output',
|
||||||
|
default='-', help=output_help)
|
||||||
|
|
||||||
parser.add_argument('inputs', nargs='+', help=input_help)
|
parser.add_argument('inputs', nargs='+', help=input_help)
|
||||||
|
|
||||||
cmd = parser.parse_args(args=args)
|
cmd = parser.parse_args(args=args)
|
||||||
|
@ -149,13 +149,13 @@ StatusAndHeadersParserException: Expected Status Line starting with ['HTTP/1.0',
|
|||||||
#=================================================================
|
#=================================================================
|
||||||
|
|
||||||
# test sort, multiple inputs
|
# test sort, multiple inputs
|
||||||
>>> cli_lines(['--sort', '-', TEST_WARC_DIR])
|
>>> cli_lines(['--sort', '-o', '-', TEST_WARC_DIR])
|
||||||
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 example-url-agnostic-revisit.warc.gz
|
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 example-url-agnostic-revisit.warc.gz
|
||||||
urn:x-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 example-wpull.warc.gz
|
urn:x-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 example-wpull.warc.gz
|
||||||
Total: 213
|
Total: 213
|
||||||
|
|
||||||
# test sort, multiple inputs, recursive, from base test dir
|
# test sort, multiple inputs, recursive, from base test dir
|
||||||
>>> cli_lines(['--sort', '-r', '-', get_test_dir()])
|
>>> cli_lines(['--sort', '-r', '-o', '-', get_test_dir()])
|
||||||
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 warcs/example-url-agnostic-revisit.warc.gz
|
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 warcs/example-url-agnostic-revisit.warc.gz
|
||||||
urn:x-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 warcs/example-wpull.warc.gz
|
urn:x-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 warcs/example-wpull.warc.gz
|
||||||
Total: 213
|
Total: 213
|
||||||
@ -167,7 +167,7 @@ urn:x-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX
|
|||||||
Total: 408
|
Total: 408
|
||||||
|
|
||||||
# test writing to stdout
|
# test writing to stdout
|
||||||
>>> cli_lines(['-', TEST_WARC_DIR + 'example.warc.gz'])
|
>>> cli_lines([TEST_WARC_DIR + 'example.warc.gz'])
|
||||||
com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1043 333 example.warc.gz
|
com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1043 333 example.warc.gz
|
||||||
org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz
|
org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz
|
||||||
Total: 4
|
Total: 4
|
||||||
@ -178,7 +178,7 @@ com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 20
|
|||||||
org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz
|
org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz
|
||||||
Total: 4
|
Total: 4
|
||||||
|
|
||||||
# test custom root dir for cdx filenames, singlw warc
|
# test custom root dir for cdx filenames, single warc
|
||||||
>>> cli_lines(['--dir-root', get_test_dir() + 'other/', TEST_WARC_DIR + 'example.warc.gz'])
|
>>> cli_lines(['--dir-root', get_test_dir() + 'other/', TEST_WARC_DIR + 'example.warc.gz'])
|
||||||
com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1043 333 ../warcs/example.warc.gz
|
com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1043 333 ../warcs/example.warc.gz
|
||||||
org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 ../warcs/example.warc.gz
|
org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 ../warcs/example.warc.gz
|
||||||
@ -265,7 +265,7 @@ def cli_lines_with_dir(input_):
|
|||||||
tmp_dir = None
|
tmp_dir = None
|
||||||
tmp_dir = tempfile.mkdtemp()
|
tmp_dir = tempfile.mkdtemp()
|
||||||
|
|
||||||
main([tmp_dir, input_])
|
main(['-o', tmp_dir, input_])
|
||||||
|
|
||||||
filename = cdx_filename(os.path.basename(input_))
|
filename = cdx_filename(os.path.basename(input_))
|
||||||
|
|
||||||
|
Before Width: | Height: | Size: 12 KiB After Width: | Height: | Size: 12 KiB |
File diff suppressed because one or more lines are too long
@ -7,7 +7,7 @@
|
|||||||
{% block head %}
|
{% block head %}
|
||||||
{{ super() }}
|
{{ super() }}
|
||||||
<link rel="stylesheet" href="{{ static_prefix }}/css/query.css">
|
<link rel="stylesheet" href="{{ static_prefix }}/css/query.css">
|
||||||
<script src="{{ static_prefix }}/url-polyfill.min.js"></script>
|
<script src="{{ static_prefix }}/js/url-polyfill.min.js"></script>
|
||||||
<script src="{{ static_prefix }}/query.js"></script>
|
<script src="{{ static_prefix }}/query.js"></script>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__version__ = '2.4.1'
|
__version__ = '2.4.2'
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(__version__)
|
print(__version__)
|
||||||
|
@ -125,6 +125,9 @@ class RemoteIndexSource(BaseIndexSource):
|
|||||||
if 'closest' in params and self.closest_limit:
|
if 'closest' in params and self.closest_limit:
|
||||||
api_url += '&limit=' + str(self.closest_limit)
|
api_url += '&limit=' + str(self.closest_limit)
|
||||||
|
|
||||||
|
if 'matchType' in params:
|
||||||
|
api_url += '&matchType=' + params.get('matchType')
|
||||||
|
|
||||||
return api_url
|
return api_url
|
||||||
|
|
||||||
def load_index(self, params):
|
def load_index(self, params):
|
||||||
|
@ -110,6 +110,17 @@ com,instagram)/amaliaulman 20141014171954 https://webenact.rhizome.org/all/20141
|
|||||||
assert(key_ts_res(res, 'load_url') == expected)
|
assert(key_ts_res(res, 'load_url') == expected)
|
||||||
assert(errs == {})
|
assert(errs == {})
|
||||||
|
|
||||||
|
# Url Match -- Remote Loaders
|
||||||
|
def test_remote_loader_with_prefix(self):
|
||||||
|
url = 'http://instagram.com/amaliaulman?__=1234234234'
|
||||||
|
remote_source = self.all_sources['remote_cdx']
|
||||||
|
res, errs = self.query_single_source(remote_source, dict(url=url, closest='20141014162332', limit=1, allowFuzzy='0'))
|
||||||
|
|
||||||
|
expected = """\
|
||||||
|
com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman"""
|
||||||
|
|
||||||
|
assert(key_ts_res(res, 'load_url') == expected)
|
||||||
|
assert(errs == {})
|
||||||
|
|
||||||
# Url Match -- Remote Loaders Closest
|
# Url Match -- Remote Loaders Closest
|
||||||
def test_remote_closest_loader(self, remote_source):
|
def test_remote_closest_loader(self, remote_source):
|
||||||
@ -123,7 +134,7 @@ com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141
|
|||||||
assert(errs == {})
|
assert(errs == {})
|
||||||
|
|
||||||
# Url Match -- Wb Memento
|
# Url Match -- Wb Memento
|
||||||
def test_remote_closest_wb_memnto_loader(self):
|
def test_remote_closest_wb_memento_loader(self):
|
||||||
replay = 'https://webenact.rhizome.org/all/{timestamp}id_/{url}'
|
replay = 'https://webenact.rhizome.org/all/{timestamp}id_/{url}'
|
||||||
source = WBMementoIndexSource(replay, '', replay)
|
source = WBMementoIndexSource(replay, '', replay)
|
||||||
|
|
||||||
|
@ -313,7 +313,8 @@ class TestManagedColls(CollsDirMixin, BaseConfigTest):
|
|||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
assert resp.content_type == 'text/html'
|
assert resp.content_type == 'text/html'
|
||||||
assert 'overriden search page: ' in resp.text
|
assert 'overriden search page: ' in resp.text
|
||||||
assert '"some":"value"' in resp.text
|
print(resp.text)
|
||||||
|
assert '"some":"value"' in resp.text, resp.text
|
||||||
|
|
||||||
def test_replay_banner_metadata(self, fmod):
|
def test_replay_banner_metadata(self, fmod):
|
||||||
""" Test adding metadata in replay banner (both framed and non-frame)
|
""" Test adding metadata in replay banner (both framed and non-frame)
|
||||||
@ -429,10 +430,10 @@ class TestManagedColls(CollsDirMixin, BaseConfigTest):
|
|||||||
|
|
||||||
os.mkdir(migrate_dir)
|
os.mkdir(migrate_dir)
|
||||||
|
|
||||||
cdxindexer_main(['-u', migrate_dir, self._get_sample_warc('')])
|
cdxindexer_main(['-u', '-o', migrate_dir, self._get_sample_warc('')])
|
||||||
|
|
||||||
# try one file with -9
|
# try one file with -9
|
||||||
cdxindexer_main(['-u', '-9', migrate_dir, self._get_sample_warc('example.warc.gz')])
|
cdxindexer_main(['-u', '-9', '-o', migrate_dir, self._get_sample_warc('example.warc.gz')])
|
||||||
|
|
||||||
cdxs = os.listdir(migrate_dir)
|
cdxs = os.listdir(migrate_dir)
|
||||||
assert all(x.endswith('.cdx') for x in cdxs)
|
assert all(x.endswith('.cdx') for x in cdxs)
|
||||||
|
2
wombat
2
wombat
@ -1 +1 @@
|
|||||||
Subproject commit b05b406b331050318caffa84f27348f689bdf53a
|
Subproject commit 3f04dcdcb071042d498c4912599454a15c11f0e4
|
Loading…
x
Reference in New Issue
Block a user