From 8765de4fe74fd53f6fecaff4e72bdce7099f09c5 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Fri, 11 Nov 2016 10:32:19 -0800 Subject: [PATCH] refactor: updated dependencies, remove watchdog, add gevent and webassets update tests, tests should pass for python 2 and 3! --- pywb/recorder/test/simplerec.py | 45 ++++++++++++++++++--------------- pywb/utils/test/test_loaders.py | 2 +- pywb/warc/test/test_indexing.py | 8 +++--- pywb/webagg/responseloader.py | 2 -- setup.py | 3 ++- 5 files changed, 32 insertions(+), 28 deletions(-) diff --git a/pywb/recorder/test/simplerec.py b/pywb/recorder/test/simplerec.py index 2a927498..e7665c6b 100644 --- a/pywb/recorder/test/simplerec.py +++ b/pywb/recorder/test/simplerec.py @@ -9,34 +9,39 @@ from pywb.recorder.filters import SkipDupePolicy import atexit import tempfile import redis +import shutil -upstream_url = 'http://localhost:8080' +def main(): + upstream_url = 'http://localhost:8080' -target = tempfile.mkdtemp(prefix='tmprec') + '/' + target = tempfile.mkdtemp(prefix='tmprec') + '/' -print('Recording to ' + target) + print('Recording to ' + target) -def rm_target(): - print('Removing ' + target) - shutil.rmtree(target) + def rm_target(): + print('Removing ' + target) + shutil.rmtree(target) -atexit.register(rm_target) + atexit.register(rm_target) -local_r = redis.StrictRedis.from_url('redis://localhost/2') -local_r.delete('rec:cdxj') -local_r.delete('rec:warc') + local_r = redis.StrictRedis.from_url('redis://localhost/2') + local_r.delete('rec:cdxj') + local_r.delete('rec:warc') -#target = './_recordings/' + #target = './_recordings/' -dedup_index = WritableRedisIndexer( - redis_url='redis://localhost/2/rec:cdxj', - file_key_template='rec:warc', - rel_path_template=target, - dupe_policy=SkipDupePolicy()) + dedup_index = WritableRedisIndexer( + redis_url='redis://localhost/2/rec:cdxj', + file_key_template='rec:warc', + rel_path_template=target, + dupe_policy=SkipDupePolicy()) -recorder_app = RecorderApp(upstream_url, - MultiFileWARCWriter(target, dedup_index=dedup_index), - accept_colls='live') + recorder_app = RecorderApp(upstream_url, + MultiFileWARCWriter(target, dedup_index=dedup_index), + accept_colls='live') -application = recorder_app + return recorder_app + +if __name__ == '__main__': + application = main() diff --git a/pywb/utils/test/test_loaders.py b/pywb/utils/test/test_loaders.py index 4b755726..dd5c3861 100644 --- a/pywb/utils/test/test_loaders.py +++ b/pywb/utils/test/test_loaders.py @@ -166,7 +166,7 @@ def seek_read_full(seekable_reader, offset): def test_s3_read_1(): pytest.importorskip('boto') - res = BlockLoader().load('s3://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2015-11/segments/1424936462700.28/warc/CC-MAIN-20150226074102-00159-ip-10-28-5-156.ec2.internal.warc.gz', + res = BlockLoader().load('s3://commoncrawl/crawl-data/CC-MAIN-2015-11/segments/1424936462700.28/warc/CC-MAIN-20150226074102-00159-ip-10-28-5-156.ec2.internal.warc.gz', offset=53235662, length=2526) diff --git a/pywb/warc/test/test_indexing.py b/pywb/warc/test/test_indexing.py index daabf7bb..40238bca 100644 --- a/pywb/warc/test/test_indexing.py +++ b/pywb/warc/test/test_indexing.py @@ -152,19 +152,19 @@ StatusAndHeadersParserException: Expected Status Line starting with ['HTTP/1.0', >>> cli_lines(['--sort', '-', TEST_WARC_DIR]) com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 example-url-agnostic-revisit.warc.gz urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 example-wpull.warc.gz -Total: 210 +Total: 211 # test sort, multiple inputs, recursive, from base test dir >>> cli_lines(['--sort', '-r', '-', get_test_dir()]) com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 warcs/example-url-agnostic-revisit.warc.gz urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 warcs/example-wpull.warc.gz -Total: 210 +Total: 211 # test sort, 9-field, multiple inputs, all records + post query >>> cli_lines(['--sort', '-a', '-p', '-9', TEST_WARC_DIR]) com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - 355 example-url-agnostic-revisit.warc.gz urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - 3181 example-wpull.warc.gz -Total: 404 +Total: 406 # test writing to stdout >>> cli_lines(['-', TEST_WARC_DIR + 'example.warc.gz']) @@ -188,7 +188,7 @@ Total: 4 >>> cli_lines(['--sort', '--dir-root', get_test_dir() + 'other/', TEST_WARC_DIR]) com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 ../warcs/example-url-agnostic-revisit.warc.gz urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 ../warcs/example-wpull.warc.gz -Total: 210 +Total: 211 # test writing to temp dir, also use unicode filename >>> cli_lines_with_dir(TEST_WARC_DIR + 'example.warc.gz') diff --git a/pywb/webagg/responseloader.py b/pywb/webagg/responseloader.py index ecebe82a..0716d33d 100644 --- a/pywb/webagg/responseloader.py +++ b/pywb/webagg/responseloader.py @@ -200,8 +200,6 @@ class WARCPathLoader(BaseLoader): cdx._formatter = formatter yield cdx - return cdx_iter - failed_files = [] headers, payload = (self.resolve_loader. load_headers_and_payload(cdx, diff --git a/setup.py b/setup.py index 7eafa305..7e4ecb0e 100755 --- a/setup.py +++ b/setup.py @@ -84,8 +84,9 @@ setup( 'surt>=0.3.0', 'brotlipy', 'pyyaml', - 'watchdog', 'webencodings', + 'gevent>=1.1.1', + 'webassets', ], tests_require=[ 'pytest',