mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
* s3 and zipnum fixes: - update s3 to use boto3 - ensure zipnum indexes (.idx, .summary) are picked up automatically via DirectoryAggregator - ensure showNumPages query always return a json object, ignoring output= - add tests for auto-configured zipnum indexes * reqs: add boto3 dependency, init boto Config only if avail * s3 loader: first try with credentials, then with no-cred config archive paths: don't add anything if path is fully qualified (contains '://') * s3 loader: on first load, if credentialed load fails, try uncredentialed fix typo tests: add zinum auto collection tests * zipnum page count query: don't add 'source' field to page count query (if 'url' key not present in dict) * s3 loader: fix no-range load, add test, update skip check to boto3 * fix spacing * boto -> boto3 rename error message, cleanup comments
51 lines
2.4 KiB
Python
51 lines
2.4 KiB
Python
from .base_config_test import BaseConfigTest, CollsDirMixin
|
|
from pywb.manager.manager import main as manager
|
|
|
|
from pywb.warcserver.index.cdxobject import CDXObject
|
|
import shutil
|
|
from pywb import get_test_dir
|
|
import os
|
|
import json
|
|
|
|
|
|
# ============================================================================
|
|
class TestZipnumAutoDir(CollsDirMixin, BaseConfigTest):
|
|
@classmethod
|
|
def setup_class(cls):
|
|
super(TestZipnumAutoDir, cls).setup_class('config_test.yaml')
|
|
|
|
manager(['init', 'testzip'])
|
|
|
|
cls.archive_dir = os.path.join(cls.root_dir, '_test_colls', 'testzip', 'archive')
|
|
cls.index_dir = os.path.join(cls.root_dir, '_test_colls', 'testzip', 'indexes')
|
|
|
|
zip_cdx = os.path.join(get_test_dir(), 'zipcdx')
|
|
|
|
shutil.copy(os.path.join(zip_cdx, 'zipnum-sample.idx'), cls.index_dir)
|
|
shutil.copy(os.path.join(zip_cdx, 'zipnum-sample.cdx.gz'), cls.index_dir)
|
|
shutil.copy(os.path.join(zip_cdx, 'zipnum-sample.loc'), cls.index_dir)
|
|
|
|
shutil.copy(os.path.join(get_test_dir(), 'warcs', 'iana.warc.gz'), cls.archive_dir)
|
|
|
|
def test_cdxj_query(self):
|
|
res = self.testapp.get('/testzip/cdx?url=iana.org/domains/*')
|
|
assert len(res.text.rstrip().split('\n')) == 9
|
|
|
|
def test_num_pages_query(self):
|
|
res = self.testapp.get('/testzip/cdx?url=http://iana.org/domains/&matchType=domain&showNumPages=true&pageSize=4')
|
|
res.content_type = 'text/json'
|
|
assert(res.json == {"blocks": 38, "pages": 10, "pageSize": 4})
|
|
|
|
def test_paged_index_query(self):
|
|
res = self.testapp.get('/testzip/cdx?url=http://iana.org/domains/&matchType=domain&output=json&showPagedIndex=true&pageSize=4&page=1')
|
|
|
|
lines = [json.loads(line) for line in res.text.rstrip().split('\n')]
|
|
|
|
assert lines[0] == {"urlkey": "org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126200912", "part": "zipnum", "offset": 1150, "length": 235, "lineno": 5}
|
|
assert lines[1] == {"urlkey": "org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126201240", "part": "zipnum", "offset": 1385, "length": 307, "lineno": 6}
|
|
assert lines[2] == {"urlkey": "org,iana)/_css/2013.1/fonts/opensans-regular.ttf 20140126200654", "part": "zipnum", "offset": 1692, "length": 235, "lineno": 7}
|
|
assert lines[3] == {"urlkey": "org,iana)/_css/2013.1/fonts/opensans-regular.ttf 20140126200816", "part": "zipnum", "offset": 1927, "length": 231, "lineno": 8}
|
|
|
|
|
|
|