mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
refactor: updated dependencies, remove watchdog, add gevent and webassets
update tests, tests should pass for python 2 and 3!
This commit is contained in:
parent
ab77c1b6d9
commit
8765de4fe7
@ -9,7 +9,9 @@ from pywb.recorder.filters import SkipDupePolicy
|
||||
import atexit
|
||||
import tempfile
|
||||
import redis
|
||||
import shutil
|
||||
|
||||
def main():
|
||||
upstream_url = 'http://localhost:8080'
|
||||
|
||||
target = tempfile.mkdtemp(prefix='tmprec') + '/'
|
||||
@ -38,5 +40,8 @@ recorder_app = RecorderApp(upstream_url,
|
||||
MultiFileWARCWriter(target, dedup_index=dedup_index),
|
||||
accept_colls='live')
|
||||
|
||||
application = recorder_app
|
||||
return recorder_app
|
||||
|
||||
if __name__ == '__main__':
|
||||
application = main()
|
||||
|
||||
|
@ -166,7 +166,7 @@ def seek_read_full(seekable_reader, offset):
|
||||
def test_s3_read_1():
|
||||
pytest.importorskip('boto')
|
||||
|
||||
res = BlockLoader().load('s3://aws-publicdatasets/common-crawl/crawl-data/CC-MAIN-2015-11/segments/1424936462700.28/warc/CC-MAIN-20150226074102-00159-ip-10-28-5-156.ec2.internal.warc.gz',
|
||||
res = BlockLoader().load('s3://commoncrawl/crawl-data/CC-MAIN-2015-11/segments/1424936462700.28/warc/CC-MAIN-20150226074102-00159-ip-10-28-5-156.ec2.internal.warc.gz',
|
||||
offset=53235662,
|
||||
length=2526)
|
||||
|
||||
|
@ -152,19 +152,19 @@ StatusAndHeadersParserException: Expected Status Line starting with ['HTTP/1.0',
|
||||
>>> cli_lines(['--sort', '-', TEST_WARC_DIR])
|
||||
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 example-url-agnostic-revisit.warc.gz
|
||||
urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 example-wpull.warc.gz
|
||||
Total: 210
|
||||
Total: 211
|
||||
|
||||
# test sort, multiple inputs, recursive, from base test dir
|
||||
>>> cli_lines(['--sort', '-r', '-', get_test_dir()])
|
||||
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 warcs/example-url-agnostic-revisit.warc.gz
|
||||
urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 warcs/example-wpull.warc.gz
|
||||
Total: 210
|
||||
Total: 211
|
||||
|
||||
# test sort, 9-field, multiple inputs, all records + post query
|
||||
>>> cli_lines(['--sort', '-a', '-p', '-9', TEST_WARC_DIR])
|
||||
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - 355 example-url-agnostic-revisit.warc.gz
|
||||
urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - 3181 example-wpull.warc.gz
|
||||
Total: 404
|
||||
Total: 406
|
||||
|
||||
# test writing to stdout
|
||||
>>> cli_lines(['-', TEST_WARC_DIR + 'example.warc.gz'])
|
||||
@ -188,7 +188,7 @@ Total: 4
|
||||
>>> cli_lines(['--sort', '--dir-root', get_test_dir() + 'other/', TEST_WARC_DIR])
|
||||
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 ../warcs/example-url-agnostic-revisit.warc.gz
|
||||
urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 ../warcs/example-wpull.warc.gz
|
||||
Total: 210
|
||||
Total: 211
|
||||
|
||||
# test writing to temp dir, also use unicode filename
|
||||
>>> cli_lines_with_dir(TEST_WARC_DIR + 'example.warc.gz')
|
||||
|
@ -200,8 +200,6 @@ class WARCPathLoader(BaseLoader):
|
||||
cdx._formatter = formatter
|
||||
yield cdx
|
||||
|
||||
return cdx_iter
|
||||
|
||||
failed_files = []
|
||||
headers, payload = (self.resolve_loader.
|
||||
load_headers_and_payload(cdx,
|
||||
|
Loading…
x
Reference in New Issue
Block a user