mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
add first set of zipnum tests #17
still need to test timed reload, multi sources
This commit is contained in:
parent
7863b2bade
commit
bff39626b5
@ -214,7 +214,7 @@ def create_cdx_source(filename, config):
|
||||
if filename.endswith('.cdx'):
|
||||
return CDXFile(filename)
|
||||
|
||||
if filename.endswith('.summary'):
|
||||
if filename.endswith(('.summary', '.idx')):
|
||||
return ZipNumCluster(filename, config)
|
||||
|
||||
return None
|
||||
|
@ -142,8 +142,8 @@ org,iana)/domains/root/db 20140126200928 http://www.iana.org/domains/root/db tex
|
||||
('filename', 'dupes.warc.gz')]
|
||||
|
||||
# NOTE: external dependency -- need self-contained test
|
||||
#>>> x = CDXServer('http://web.archive.org/cdx/search/cdx').load_cdx(url = 'example.com', output = 'raw', limit = '2')
|
||||
#>>> pprint.pprint(x.next().items())
|
||||
>>> x = CDXServer('http://web.archive.org/cdx/search/cdx').load_cdx(url = 'example.com', output = 'raw', limit = '2')
|
||||
>>> pprint.pprint(x.next().items())
|
||||
[('urlkey', 'com,example)/'),
|
||||
('timestamp', '20020120142510'),
|
||||
('original', 'http://example.com:80/'),
|
||||
@ -172,6 +172,7 @@ def cdx_ops_test(url, sources = [test_cdx_dir + 'iana.cdx'], **kwparams):
|
||||
results = server.load_cdx(**kwparams)
|
||||
|
||||
for x in results:
|
||||
x = x.replace('\t', ' ')
|
||||
sys.stdout.write(x)
|
||||
|
||||
|
||||
|
44
pywb/cdx/test/zipnum_test.py
Normal file
44
pywb/cdx/test/zipnum_test.py
Normal file
@ -0,0 +1,44 @@
|
||||
"""
|
||||
>>> zip_ops_test(url = 'http://iana.org')
|
||||
org,iana)/ 20140126200624 http://www.iana.org/ text/html 200 OSSAPWJ23L56IYVRW3GFEAR4MCJMGPTB - - 2258 334 iana.warc.gz
|
||||
org,iana)/ 20140127171238 http://iana.org unk 302 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 343 1858 dupes.warc.gz
|
||||
org,iana)/ 20140127171238 http://www.iana.org/ warc/revisit - OSSAPWJ23L56IYVRW3GFEAR4MCJMGPTB - - 536 2678 dupes.warc.gz
|
||||
|
||||
# test idx index (tabs replacad with 4 spaces)
|
||||
>>> zip_ops_test(url = 'http://iana.org/domains/', matchType = 'prefix', showPagedIndex = True)
|
||||
org,iana)/dnssec 20140126201307 zipnum 8511 373
|
||||
org,iana)/domains/int 20140126201239 zipnum 8884 353
|
||||
org,iana)/domains/root/servers 20140126201227 zipnum 9237 386
|
||||
|
||||
>>> zip_ops_test(url = 'http://iana.org/domains/', matchType = 'prefix')
|
||||
org,iana)/domains/arpa 20140126201248 http://www.iana.org/domains/arpa text/html 200 QOFZZRN6JIKAL2JRL6ZC2VVG42SPKGHT - - 2939 759039 iana.warc.gz
|
||||
org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz
|
||||
org,iana)/domains/idn-tables 20140126201127 http://www.iana.org/domains/idn-tables text/html 200 HNCUFTJMOQOGAEY6T56KVC3T7TVLKGEW - - 8118 715878 iana.warc.gz
|
||||
org,iana)/domains/int 20140126201239 http://www.iana.org/domains/int text/html 200 X32BBNNORV4SPEHTQF5KI5NFHSKTZK6Q - - 2482 746788 iana.warc.gz
|
||||
org,iana)/domains/reserved 20140126201054 http://www.iana.org/domains/reserved text/html 200 R5AAEQX5XY5X5DG66B23ODN5DUBWRA27 - - 3573 701457 iana.warc.gz
|
||||
org,iana)/domains/root 20140126200912 http://www.iana.org/domains/root text/html 200 YWA2R6UVWCYNHBZJKBTPYPZ5CJWKGGUX - - 2691 657746 iana.warc.gz
|
||||
org,iana)/domains/root/db 20140126200927 http://www.iana.org/domains/root/db/ text/html 302 3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ - - 446 671278 iana.warc.gz
|
||||
org,iana)/domains/root/db 20140126200928 http://www.iana.org/domains/root/db text/html 200 DHXA725IW5VJJFRTWBQT6BEZKRE7H57S - - 18365 672225 iana.warc.gz
|
||||
org,iana)/domains/root/servers 20140126201227 http://www.iana.org/domains/root/servers text/html 200 AFW34N3S4NK2RJ6QWMVPB5E2AIUETAHU - - 3137 733840 iana.warc.gz
|
||||
|
||||
"""
|
||||
|
||||
|
||||
|
||||
|
||||
from cdxserver_test import cdx_ops_test
|
||||
|
||||
from pywb import get_test_dir
|
||||
test_zipnum = get_test_dir() + 'zipcdx/zipnum-sample.idx'
|
||||
print test_zipnum
|
||||
|
||||
def zip_ops_test(url, **kwargs):
|
||||
sources = test_zipnum
|
||||
cdx_ops_test(url, sources, **kwargs)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
|
||||
|
@ -1 +1 @@
|
||||
zipnum ./sample_archive/zipcdx/zipnum-sample.cdx.gz
|
||||
zipnum ./sample_archive/zipcdx/zipnum-sample.cdx.gz
|
||||
|
1
setup.py
1
setup.py
@ -15,6 +15,7 @@ setuptools.setup(name='pywb',
|
||||
provides=['pywb','pywb.utils','pywb.cdx','pywb.warc','pywb.rewrite'],
|
||||
package_data={'pywb': ['ui/*', 'static/*', '*.yaml']},
|
||||
data_files = [('sample_archive/cdx/', glob.glob('sample_archive/cdx/*')),
|
||||
('sample_archive/zipcdx/', glob.glob('sample_archive/zipcdx/*')),
|
||||
('sample_archive/warcs/', glob.glob('sample_archive/warcs/*')),
|
||||
('sample_archive/text_content/', glob.glob('sample_archive/text_content/*'))],
|
||||
install_requires=['uwsgi', 'rfc3987', 'chardet', 'redis', 'jinja2', 'surt', 'pyyaml', 'WebTest','pytest'],
|
||||
|
Loading…
x
Reference in New Issue
Block a user