1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

refactor: updated dependencies, remove watchdog, add gevent and webassets

update tests, tests should pass for python 2 and 3!
This commit is contained in:
Ilya Kreymer 2016-11-11 10:32:19 -08:00
parent ab77c1b6d9
commit 8765de4fe7
5 changed files with 32 additions and 28 deletions

View File

@ -9,7 +9,9 @@ from pywb.recorder.filters import SkipDupePolicy
import atexit
import tempfile
import redis
import shutil
def main():
upstream_url = 'http://localhost:8080'
target = tempfile.mkdtemp(prefix='tmprec') + '/'
@ -38,5 +40,8 @@ recorder_app = RecorderApp(upstream_url,
MultiFileWARCWriter(target, dedup_index=dedup_index),
accept_colls='live')
application = recorder_app
return recorder_app
if __name__ == '__main__':
application = main()

View File

@ -166,7 +166,7 @@ def seek_read_full(seekable_reader, offset):
def test_s3_read_1():
pytest.importorskip('boto')
res = BlockLoader().load('s3://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2015-11/segments/1424936462700.28/warc/CC-MAIN-20150226074102-00159-ip-10-28-5-156.ec2.internal.warc.gz',
res = BlockLoader().load('s3://commoncrawl/crawl-data/CC-MAIN-2015-11/segments/1424936462700.28/warc/CC-MAIN-20150226074102-00159-ip-10-28-5-156.ec2.internal.warc.gz',
offset=53235662,
length=2526)

View File

@ -152,19 +152,19 @@ StatusAndHeadersParserException: Expected Status Line starting with ['HTTP/1.0',
>>> cli_lines(['--sort', '-', TEST_WARC_DIR])
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 example-url-agnostic-revisit.warc.gz
urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 example-wpull.warc.gz
Total: 210
Total: 211
# test sort, multiple inputs, recursive, from base test dir
>>> cli_lines(['--sort', '-r', '-', get_test_dir()])
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 warcs/example-url-agnostic-revisit.warc.gz
urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 warcs/example-wpull.warc.gz
Total: 210
Total: 211
# test sort, 9-field, multiple inputs, all records + post query
>>> cli_lines(['--sort', '-a', '-p', '-9', TEST_WARC_DIR])
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - 355 example-url-agnostic-revisit.warc.gz
urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - 3181 example-wpull.warc.gz
Total: 404
Total: 406
# test writing to stdout
>>> cli_lines(['-', TEST_WARC_DIR + 'example.warc.gz'])
@ -188,7 +188,7 @@ Total: 4
>>> cli_lines(['--sort', '--dir-root', get_test_dir() + 'other/', TEST_WARC_DIR])
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 ../warcs/example-url-agnostic-revisit.warc.gz
urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 ../warcs/example-wpull.warc.gz
Total: 210
Total: 211
# test writing to temp dir, also use unicode filename
>>> cli_lines_with_dir(TEST_WARC_DIR + 'example.warc.gz')

View File

@ -200,8 +200,6 @@ class WARCPathLoader(BaseLoader):
cdx._formatter = formatter
yield cdx
return cdx_iter
failed_files = []
headers, payload = (self.resolve_loader.
load_headers_and_payload(cdx,

View File

@ -84,8 +84,9 @@ setup(
'surt>=0.3.0',
'brotlipy',
'pyyaml',
'watchdog',
'webencodings',
'gevent>=1.1.1',
'webassets',
],
tests_require=[
'pytest',