mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
address test cases broken by previous commit.
move py.test fixture and fixture classes (TestExclusionPerms, PrintReporter) to tests.fixture module. update test_config.yaml accordingly.
This commit is contained in:
parent
2c40c9b112
commit
9eda5ad97e
@ -75,6 +75,16 @@ class CDXObject(OrderedDict):
|
|||||||
return (self['mimetype'] == 'warc/revisit' or
|
return (self['mimetype'] == 'warc/revisit' or
|
||||||
self['filename'] == '-')
|
self['filename'] == '-')
|
||||||
|
|
||||||
|
def to_text(self, fields=None):
|
||||||
|
"""
|
||||||
|
return plaintext CDX record (includes newline).
|
||||||
|
:param fields: list of field names to output.
|
||||||
|
"""
|
||||||
|
if fields is None:
|
||||||
|
return str(self) + '\n'
|
||||||
|
else:
|
||||||
|
return ' '.join(self[x] for x in fields) + '\n'
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
if self.cdxline:
|
if self.cdxline:
|
||||||
return self.cdxline
|
return self.cdxline
|
||||||
@ -109,5 +119,12 @@ class IDXObject(OrderedDict):
|
|||||||
|
|
||||||
self.idxline = idxline
|
self.idxline = idxline
|
||||||
|
|
||||||
|
def to_text(self, fields=None):
|
||||||
|
"""
|
||||||
|
return plaintext IDX record (including newline).
|
||||||
|
:param fields: list of field names to output (currently ignored)
|
||||||
|
"""
|
||||||
|
return str(self) + '\n'
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.idxline
|
return self.idxline
|
||||||
|
@ -10,10 +10,11 @@ from collections import deque
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def cdx_load(sources, params, filter=True, perms_checker=None):
|
def cdx_load(sources, params, perms_checker=None, filter=True):
|
||||||
"""
|
"""
|
||||||
merge text CDX lines from sources, return an iterator for
|
merge text CDX lines from sources, return an iterator for
|
||||||
filtered and access-checked sequence of CDX objects.
|
filtered and access-checked sequence of CDX objects.
|
||||||
|
|
||||||
:param sources: iterable for text CDX sources.
|
:param sources: iterable for text CDX sources.
|
||||||
:param perms_checker: access check filter object implementing
|
:param perms_checker: access check filter object implementing
|
||||||
allow_url_lookup(key, url), allow_capture(cdxobj) and
|
allow_url_lookup(key, url), allow_capture(cdxobj) and
|
||||||
|
@ -107,7 +107,8 @@ class CDXServer(BaseCDXServer):
|
|||||||
params['key'] = key
|
params['key'] = key
|
||||||
params['end_key'] = end_key
|
params['end_key'] = end_key
|
||||||
|
|
||||||
cdx_iter = cdx_load(self.sources, params, self.perms_checker)
|
cdx_iter = cdx_load(self.sources, params,
|
||||||
|
perms_checker=self.perms_checker)
|
||||||
return self._check_cdx_iter(cdx_iter, params)
|
return self._check_cdx_iter(cdx_iter, params)
|
||||||
|
|
||||||
def _create_cdx_sources(self, paths, config):
|
def _create_cdx_sources(self, paths, config):
|
||||||
|
@ -153,21 +153,41 @@ import pprint
|
|||||||
from pywb import get_test_dir
|
from pywb import get_test_dir
|
||||||
#test_cdx_dir = os.path.dirname(os.path.realpath(__file__)) + '/../sample_data/'
|
#test_cdx_dir = os.path.dirname(os.path.realpath(__file__)) + '/../sample_data/'
|
||||||
test_cdx_dir = get_test_dir() + 'cdx/'
|
test_cdx_dir = get_test_dir() + 'cdx/'
|
||||||
|
from pywb.cdx.cdxobject import AccessException
|
||||||
|
|
||||||
|
from tests.fixture import testconfig, TestExclusionPerms
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
def cdx_ops_test(url, sources = [test_cdx_dir + 'iana.cdx'], **kwparams):
|
def cdx_ops_test(url, sources = [test_cdx_dir + 'iana.cdx'], **kwparams):
|
||||||
kwparams['url'] = url
|
kwparams['url'] = url
|
||||||
kwparams['output'] = 'text'
|
fields = kwparams.get('fields')
|
||||||
|
if fields:
|
||||||
|
fields = fields.split(',')
|
||||||
|
|
||||||
server = CDXServer(sources)
|
server = CDXServer(sources)
|
||||||
results = server.load_cdx(**kwparams)
|
results = server.load_cdx(**kwparams)
|
||||||
|
|
||||||
for x in results:
|
for x in results:
|
||||||
sys.stdout.write(x)
|
sys.stdout.write(x.to_text(fields))
|
||||||
|
|
||||||
|
#================================================================
|
||||||
|
|
||||||
|
def test_excluded(testconfig):
|
||||||
|
testconfig['perms_checker'] = TestExclusionPerms()
|
||||||
|
sources = testconfig.get('index_paths')
|
||||||
|
print sources
|
||||||
|
server = CDXServer(sources, perms_checker=testconfig['perms_checker'])
|
||||||
|
assert isinstance(server, CDXServer)
|
||||||
|
assert server.perms_checker
|
||||||
|
|
||||||
|
url = 'http://www.iana.org/_img/bookmark_icon.ico'
|
||||||
|
key = 'org,iana)/_img/bookmark_icon.ico'
|
||||||
|
with pytest.raises(AccessException):
|
||||||
|
cdxobjs = list(server.load_cdx(url=url))
|
||||||
|
print cdxobjs
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import doctest
|
import doctest
|
||||||
doctest.testmod()
|
doctest.testmod()
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
import webtest
|
import webtest
|
||||||
from pywb.cdx.wsgi_cdxserver import main
|
from pywb.cdx.wsgi_cdxserver import create_app
|
||||||
from pywb import get_test_dir
|
from pywb import get_test_dir
|
||||||
|
|
||||||
class TestCdx:
|
class TestCdx:
|
||||||
def setup(self):
|
def setup(self):
|
||||||
self.app = main(get_test_dir() + 'cdx/')
|
self.app = create_app(get_test_dir() + 'cdx/')
|
||||||
self.testapp = webtest.TestApp(self.app)
|
self.testapp = webtest.TestApp(self.app)
|
||||||
|
|
||||||
def test_cdx(self):
|
def test_cdx(self):
|
||||||
|
@ -83,8 +83,7 @@ class PlainTextResponse(BaseResponse):
|
|||||||
def __init__(self, cdxitr, fields, status=200, content_type='text/plain'):
|
def __init__(self, cdxitr, fields, status=200, content_type='text/plain'):
|
||||||
super(PlainTextResponse, self).__init__(
|
super(PlainTextResponse, self).__init__(
|
||||||
response=(
|
response=(
|
||||||
cdx_text_out(cdx, fields)
|
cdx.to_text(fields) for cdx in cdxitr
|
||||||
for cdx in cdxitr
|
|
||||||
),
|
),
|
||||||
status=status, content_type=content_type)
|
status=status, content_type=content_type)
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@ class IndexReader(object):
|
|||||||
|
|
||||||
params['allowFuzzy'] = True
|
params['allowFuzzy'] = True
|
||||||
|
|
||||||
cdxlines = self.load_cdx(url=wburl.url, output='raw', **params)
|
cdxlines = self.load_cdx(url=wburl.url, **params)
|
||||||
|
|
||||||
return cdxlines
|
return cdxlines
|
||||||
|
|
||||||
|
@ -92,10 +92,10 @@ enable_cdx_api: true
|
|||||||
|
|
||||||
# optional reporter callback func
|
# optional reporter callback func
|
||||||
# if set, called with request and cdx object
|
# if set, called with request and cdx object
|
||||||
reporter: !!python/object/new:tests.test_integration.PrintReporter []
|
reporter: !!python/object/new:tests.fixture.PrintReporter []
|
||||||
|
|
||||||
# custom rules for domain specific matching
|
# custom rules for domain specific matching
|
||||||
#domain_specific_rules: rules.yaml
|
#domain_specific_rules: rules.yaml
|
||||||
|
|
||||||
#perms_checker: !!python/object/new:pywb.cdx.perms.AllowAllPerms []
|
#perms_checker: !!python/object/new:pywb.cdx.perms.AllowAllPerms []
|
||||||
perms_checker: !!python/object/new:tests.test_integration.TestExclusionPerms []
|
perms_checker: !!python/object/new:tests.fixture.TestExclusionPerms []
|
||||||
|
56
tests/fixture.py
Normal file
56
tests/fixture.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
import os
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def testconfig():
|
||||||
|
config = yaml.load(open('test_config.yaml'))
|
||||||
|
assert config
|
||||||
|
if 'index_paths' not in config:
|
||||||
|
# !!! assumes this module is in a sub-directory of project root.
|
||||||
|
config['index_paths'] = os.path.join(
|
||||||
|
os.path.dirname(os.path.realpath(__file__)),
|
||||||
|
'../sample_archive/cdx')
|
||||||
|
return config
|
||||||
|
|
||||||
|
#================================================================
|
||||||
|
# Reporter callback for replay view
|
||||||
|
class PrintReporter:
|
||||||
|
"""Reporter callback for replay view.
|
||||||
|
"""
|
||||||
|
def __call__(self, wbrequest, cdx, response):
|
||||||
|
print wbrequest
|
||||||
|
print cdx
|
||||||
|
pass
|
||||||
|
|
||||||
|
#================================================================
|
||||||
|
class TestExclusionPerms:
|
||||||
|
"""
|
||||||
|
Perm Checker fixture which can block one URL.
|
||||||
|
"""
|
||||||
|
# sample_archive has captures for this URLKEY
|
||||||
|
URLKEY_EXCLUDED = 'org,iana)/_img/bookmark_icon.ico'
|
||||||
|
|
||||||
|
def allow_url_lookup(self, urlkey, url):
|
||||||
|
"""
|
||||||
|
Return true/false if url or urlkey (canonicalized url)
|
||||||
|
should be allowed
|
||||||
|
"""
|
||||||
|
print "allow_url_lookup:urlkey={}".format(urlkey)
|
||||||
|
if urlkey == self.URLKEY_EXCLUDED:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def allow_capture(self, cdx):
|
||||||
|
"""
|
||||||
|
Return True if specified capture (cdx) is allowed.
|
||||||
|
"""
|
||||||
|
return True
|
||||||
|
|
||||||
|
def filter_fields(self, cdx):
|
||||||
|
"""
|
||||||
|
Filter out any forbidden cdx fields from cdx object
|
||||||
|
"""
|
||||||
|
return cdx
|
@ -8,7 +8,9 @@ class TestWb:
|
|||||||
|
|
||||||
def setup(self):
|
def setup(self):
|
||||||
#self.app = pywb.wbapp.create_wb_app(pywb.pywb_init.pywb_config())
|
#self.app = pywb.wbapp.create_wb_app(pywb.pywb_init.pywb_config())
|
||||||
self.app = create_wb_app(pywb_config(self.TEST_CONFIG))
|
# save it in self - useful for debugging
|
||||||
|
self.router = pywb_config(self.TEST_CONFIG)
|
||||||
|
self.app = create_wb_app(self.router)
|
||||||
self.testapp = webtest.TestApp(self.app)
|
self.testapp = webtest.TestApp(self.app)
|
||||||
|
|
||||||
def _assert_basic_html(self, resp):
|
def _assert_basic_html(self, resp):
|
||||||
@ -193,43 +195,3 @@ class TestWb:
|
|||||||
resp = self.testapp.get('/pywb/?abc', status = 400)
|
resp = self.testapp.get('/pywb/?abc', status = 400)
|
||||||
assert resp.status_int == 400
|
assert resp.status_int == 400
|
||||||
assert 'Invalid Url: http://?abc' in resp.body
|
assert 'Invalid Url: http://?abc' in resp.body
|
||||||
|
|
||||||
#=================================================================
|
|
||||||
# Reporter callback for replay view
|
|
||||||
class PrintReporter:
|
|
||||||
def __call__(self, wbrequest, cdx, response):
|
|
||||||
print wbrequest
|
|
||||||
print cdx
|
|
||||||
pass
|
|
||||||
|
|
||||||
#=================================================================
|
|
||||||
class TestExclusionPerms:
|
|
||||||
"""
|
|
||||||
Sample Perm Checker which allows all
|
|
||||||
"""
|
|
||||||
def allow_url_lookup(self, urlkey, url):
|
|
||||||
"""
|
|
||||||
Return true/false if url or urlkey (canonicalized url)
|
|
||||||
should be allowed
|
|
||||||
"""
|
|
||||||
print urlkey
|
|
||||||
if urlkey == 'org,iana)/_img/bookmark_icon.ico':
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
def allow_capture(self, cdx):
|
|
||||||
"""
|
|
||||||
Return true/false is specified capture (cdx) should be
|
|
||||||
allowed
|
|
||||||
"""
|
|
||||||
return True
|
|
||||||
|
|
||||||
def filter_fields(self, cdx):
|
|
||||||
"""
|
|
||||||
Filter out any forbidden cdx fields from cdx dictionary
|
|
||||||
"""
|
|
||||||
return cdx
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -12,15 +12,7 @@ import yaml
|
|||||||
from pywb.cdx.cdxobject import CDXObject
|
from pywb.cdx.cdxobject import CDXObject
|
||||||
from pywb.cdx.wsgi_cdxserver import create_app
|
from pywb.cdx.wsgi_cdxserver import create_app
|
||||||
|
|
||||||
@pytest.fixture
|
from tests.fixture import testconfig
|
||||||
def testconfig():
|
|
||||||
config = yaml.load(open('test_config.yaml'))
|
|
||||||
assert config
|
|
||||||
if 'index_paths' not in config:
|
|
||||||
config['index_paths'] = os.path.join(
|
|
||||||
os.path.dirname(os.path.realpath(__file__)),
|
|
||||||
'../sample_archive/cdx')
|
|
||||||
return config
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def client(testconfig):
|
def client(testconfig):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user