mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
address test cases broken by previous commit.
move py.test fixture and fixture classes (TestExclusionPerms, PrintReporter) to tests.fixture module. update test_config.yaml accordingly.
This commit is contained in:
parent
2c40c9b112
commit
9eda5ad97e
@ -75,6 +75,16 @@ class CDXObject(OrderedDict):
|
||||
return (self['mimetype'] == 'warc/revisit' or
|
||||
self['filename'] == '-')
|
||||
|
||||
def to_text(self, fields=None):
|
||||
"""
|
||||
return plaintext CDX record (includes newline).
|
||||
:param fields: list of field names to output.
|
||||
"""
|
||||
if fields is None:
|
||||
return str(self) + '\n'
|
||||
else:
|
||||
return ' '.join(self[x] for x in fields) + '\n'
|
||||
|
||||
def __str__(self):
|
||||
if self.cdxline:
|
||||
return self.cdxline
|
||||
@ -109,5 +119,12 @@ class IDXObject(OrderedDict):
|
||||
|
||||
self.idxline = idxline
|
||||
|
||||
def to_text(self, fields=None):
|
||||
"""
|
||||
return plaintext IDX record (including newline).
|
||||
:param fields: list of field names to output (currently ignored)
|
||||
"""
|
||||
return str(self) + '\n'
|
||||
|
||||
def __str__(self):
|
||||
return self.idxline
|
||||
|
@ -10,10 +10,11 @@ from collections import deque
|
||||
|
||||
|
||||
#=================================================================
|
||||
def cdx_load(sources, params, filter=True, perms_checker=None):
|
||||
def cdx_load(sources, params, perms_checker=None, filter=True):
|
||||
"""
|
||||
merge text CDX lines from sources, return an iterator for
|
||||
filtered and access-checked sequence of CDX objects.
|
||||
|
||||
:param sources: iterable for text CDX sources.
|
||||
:param perms_checker: access check filter object implementing
|
||||
allow_url_lookup(key, url), allow_capture(cdxobj) and
|
||||
|
@ -107,7 +107,8 @@ class CDXServer(BaseCDXServer):
|
||||
params['key'] = key
|
||||
params['end_key'] = end_key
|
||||
|
||||
cdx_iter = cdx_load(self.sources, params, self.perms_checker)
|
||||
cdx_iter = cdx_load(self.sources, params,
|
||||
perms_checker=self.perms_checker)
|
||||
return self._check_cdx_iter(cdx_iter, params)
|
||||
|
||||
def _create_cdx_sources(self, paths, config):
|
||||
|
@ -153,21 +153,41 @@ import pprint
|
||||
from pywb import get_test_dir
|
||||
#test_cdx_dir = os.path.dirname(os.path.realpath(__file__)) + '/../sample_data/'
|
||||
test_cdx_dir = get_test_dir() + 'cdx/'
|
||||
from pywb.cdx.cdxobject import AccessException
|
||||
|
||||
from tests.fixture import testconfig, TestExclusionPerms
|
||||
|
||||
import pytest
|
||||
|
||||
def cdx_ops_test(url, sources = [test_cdx_dir + 'iana.cdx'], **kwparams):
|
||||
kwparams['url'] = url
|
||||
kwparams['output'] = 'text'
|
||||
fields = kwparams.get('fields')
|
||||
if fields:
|
||||
fields = fields.split(',')
|
||||
|
||||
server = CDXServer(sources)
|
||||
results = server.load_cdx(**kwparams)
|
||||
|
||||
for x in results:
|
||||
sys.stdout.write(x)
|
||||
sys.stdout.write(x.to_text(fields))
|
||||
|
||||
#================================================================
|
||||
|
||||
def test_excluded(testconfig):
|
||||
testconfig['perms_checker'] = TestExclusionPerms()
|
||||
sources = testconfig.get('index_paths')
|
||||
print sources
|
||||
server = CDXServer(sources, perms_checker=testconfig['perms_checker'])
|
||||
assert isinstance(server, CDXServer)
|
||||
assert server.perms_checker
|
||||
|
||||
url = 'http://www.iana.org/_img/bookmark_icon.ico'
|
||||
key = 'org,iana)/_img/bookmark_icon.ico'
|
||||
with pytest.raises(AccessException):
|
||||
cdxobjs = list(server.load_cdx(url=url))
|
||||
print cdxobjs
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
|
||||
|
||||
|
@ -1,10 +1,10 @@
|
||||
import webtest
|
||||
from pywb.cdx.wsgi_cdxserver import main
|
||||
from pywb.cdx.wsgi_cdxserver import create_app
|
||||
from pywb import get_test_dir
|
||||
|
||||
class TestCdx:
|
||||
def setup(self):
|
||||
self.app = main(get_test_dir() + 'cdx/')
|
||||
self.app = create_app(get_test_dir() + 'cdx/')
|
||||
self.testapp = webtest.TestApp(self.app)
|
||||
|
||||
def test_cdx(self):
|
||||
|
@ -83,8 +83,7 @@ class PlainTextResponse(BaseResponse):
|
||||
def __init__(self, cdxitr, fields, status=200, content_type='text/plain'):
|
||||
super(PlainTextResponse, self).__init__(
|
||||
response=(
|
||||
cdx_text_out(cdx, fields)
|
||||
for cdx in cdxitr
|
||||
cdx.to_text(fields) for cdx in cdxitr
|
||||
),
|
||||
status=status, content_type=content_type)
|
||||
|
||||
|
@ -30,7 +30,7 @@ class IndexReader(object):
|
||||
|
||||
params['allowFuzzy'] = True
|
||||
|
||||
cdxlines = self.load_cdx(url=wburl.url, output='raw', **params)
|
||||
cdxlines = self.load_cdx(url=wburl.url, **params)
|
||||
|
||||
return cdxlines
|
||||
|
||||
|
@ -92,10 +92,10 @@ enable_cdx_api: true
|
||||
|
||||
# optional reporter callback func
|
||||
# if set, called with request and cdx object
|
||||
reporter: !!python/object/new:tests.test_integration.PrintReporter []
|
||||
reporter: !!python/object/new:tests.fixture.PrintReporter []
|
||||
|
||||
# custom rules for domain specific matching
|
||||
#domain_specific_rules: rules.yaml
|
||||
|
||||
#perms_checker: !!python/object/new:pywb.cdx.perms.AllowAllPerms []
|
||||
perms_checker: !!python/object/new:tests.test_integration.TestExclusionPerms []
|
||||
perms_checker: !!python/object/new:tests.fixture.TestExclusionPerms []
|
||||
|
56
tests/fixture.py
Normal file
56
tests/fixture.py
Normal file
@ -0,0 +1,56 @@
|
||||
import os
|
||||
import pytest
|
||||
|
||||
import yaml
|
||||
|
||||
@pytest.fixture
|
||||
def testconfig():
|
||||
config = yaml.load(open('test_config.yaml'))
|
||||
assert config
|
||||
if 'index_paths' not in config:
|
||||
# !!! assumes this module is in a sub-directory of project root.
|
||||
config['index_paths'] = os.path.join(
|
||||
os.path.dirname(os.path.realpath(__file__)),
|
||||
'../sample_archive/cdx')
|
||||
return config
|
||||
|
||||
#================================================================
|
||||
# Reporter callback for replay view
|
||||
class PrintReporter:
|
||||
"""Reporter callback for replay view.
|
||||
"""
|
||||
def __call__(self, wbrequest, cdx, response):
|
||||
print wbrequest
|
||||
print cdx
|
||||
pass
|
||||
|
||||
#================================================================
|
||||
class TestExclusionPerms:
|
||||
"""
|
||||
Perm Checker fixture which can block one URL.
|
||||
"""
|
||||
# sample_archive has captures for this URLKEY
|
||||
URLKEY_EXCLUDED = 'org,iana)/_img/bookmark_icon.ico'
|
||||
|
||||
def allow_url_lookup(self, urlkey, url):
|
||||
"""
|
||||
Return true/false if url or urlkey (canonicalized url)
|
||||
should be allowed
|
||||
"""
|
||||
print "allow_url_lookup:urlkey={}".format(urlkey)
|
||||
if urlkey == self.URLKEY_EXCLUDED:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def allow_capture(self, cdx):
|
||||
"""
|
||||
Return True if specified capture (cdx) is allowed.
|
||||
"""
|
||||
return True
|
||||
|
||||
def filter_fields(self, cdx):
|
||||
"""
|
||||
Filter out any forbidden cdx fields from cdx object
|
||||
"""
|
||||
return cdx
|
@ -8,7 +8,9 @@ class TestWb:
|
||||
|
||||
def setup(self):
|
||||
#self.app = pywb.wbapp.create_wb_app(pywb.pywb_init.pywb_config())
|
||||
self.app = create_wb_app(pywb_config(self.TEST_CONFIG))
|
||||
# save it in self - useful for debugging
|
||||
self.router = pywb_config(self.TEST_CONFIG)
|
||||
self.app = create_wb_app(self.router)
|
||||
self.testapp = webtest.TestApp(self.app)
|
||||
|
||||
def _assert_basic_html(self, resp):
|
||||
@ -193,43 +195,3 @@ class TestWb:
|
||||
resp = self.testapp.get('/pywb/?abc', status = 400)
|
||||
assert resp.status_int == 400
|
||||
assert 'Invalid Url: http://?abc' in resp.body
|
||||
|
||||
#=================================================================
|
||||
# Reporter callback for replay view
|
||||
class PrintReporter:
|
||||
def __call__(self, wbrequest, cdx, response):
|
||||
print wbrequest
|
||||
print cdx
|
||||
pass
|
||||
|
||||
#=================================================================
|
||||
class TestExclusionPerms:
|
||||
"""
|
||||
Sample Perm Checker which allows all
|
||||
"""
|
||||
def allow_url_lookup(self, urlkey, url):
|
||||
"""
|
||||
Return true/false if url or urlkey (canonicalized url)
|
||||
should be allowed
|
||||
"""
|
||||
print urlkey
|
||||
if urlkey == 'org,iana)/_img/bookmark_icon.ico':
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def allow_capture(self, cdx):
|
||||
"""
|
||||
Return true/false is specified capture (cdx) should be
|
||||
allowed
|
||||
"""
|
||||
return True
|
||||
|
||||
def filter_fields(self, cdx):
|
||||
"""
|
||||
Filter out any forbidden cdx fields from cdx dictionary
|
||||
"""
|
||||
return cdx
|
||||
|
||||
|
||||
|
||||
|
@ -12,15 +12,7 @@ import yaml
|
||||
from pywb.cdx.cdxobject import CDXObject
|
||||
from pywb.cdx.wsgi_cdxserver import create_app
|
||||
|
||||
@pytest.fixture
|
||||
def testconfig():
|
||||
config = yaml.load(open('test_config.yaml'))
|
||||
assert config
|
||||
if 'index_paths' not in config:
|
||||
config['index_paths'] = os.path.join(
|
||||
os.path.dirname(os.path.realpath(__file__)),
|
||||
'../sample_archive/cdx')
|
||||
return config
|
||||
from tests.fixture import testconfig
|
||||
|
||||
@pytest.fixture
|
||||
def client(testconfig):
|
||||
|
Loading…
x
Reference in New Issue
Block a user