1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

address test cases broken by previous commit.

move py.test fixture and fixture classes (TestExclusionPerms, PrintReporter)
  to tests.fixture module. update test_config.yaml accordingly.
This commit is contained in:
Kenji Nagahashi 2014-02-28 01:39:04 +00:00
parent 2c40c9b112
commit 9eda5ad97e
11 changed files with 111 additions and 63 deletions

View File

@ -75,6 +75,16 @@ class CDXObject(OrderedDict):
return (self['mimetype'] == 'warc/revisit' or
self['filename'] == '-')
def to_text(self, fields=None):
"""
return plaintext CDX record (includes newline).
:param fields: list of field names to output.
"""
if fields is None:
return str(self) + '\n'
else:
return ' '.join(self[x] for x in fields) + '\n'
def __str__(self):
if self.cdxline:
return self.cdxline
@ -109,5 +119,12 @@ class IDXObject(OrderedDict):
self.idxline = idxline
def to_text(self, fields=None):
"""
return plaintext IDX record (including newline).
:param fields: list of field names to output (currently ignored)
"""
return str(self) + '\n'
def __str__(self):
return self.idxline

View File

@ -10,10 +10,11 @@ from collections import deque
#=================================================================
def cdx_load(sources, params, filter=True, perms_checker=None):
def cdx_load(sources, params, perms_checker=None, filter=True):
"""
merge text CDX lines from sources, return an iterator for
filtered and access-checked sequence of CDX objects.
:param sources: iterable for text CDX sources.
:param perms_checker: access check filter object implementing
allow_url_lookup(key, url), allow_capture(cdxobj) and

View File

@ -107,7 +107,8 @@ class CDXServer(BaseCDXServer):
params['key'] = key
params['end_key'] = end_key
cdx_iter = cdx_load(self.sources, params, self.perms_checker)
cdx_iter = cdx_load(self.sources, params,
perms_checker=self.perms_checker)
return self._check_cdx_iter(cdx_iter, params)
def _create_cdx_sources(self, paths, config):

View File

@ -153,21 +153,41 @@ import pprint
from pywb import get_test_dir
#test_cdx_dir = os.path.dirname(os.path.realpath(__file__)) + '/../sample_data/'
test_cdx_dir = get_test_dir() + 'cdx/'
from pywb.cdx.cdxobject import AccessException
from tests.fixture import testconfig, TestExclusionPerms
import pytest
def cdx_ops_test(url, sources = [test_cdx_dir + 'iana.cdx'], **kwparams):
kwparams['url'] = url
kwparams['output'] = 'text'
fields = kwparams.get('fields')
if fields:
fields = fields.split(',')
server = CDXServer(sources)
results = server.load_cdx(**kwparams)
for x in results:
sys.stdout.write(x)
sys.stdout.write(x.to_text(fields))
#================================================================
def test_excluded(testconfig):
testconfig['perms_checker'] = TestExclusionPerms()
sources = testconfig.get('index_paths')
print sources
server = CDXServer(sources, perms_checker=testconfig['perms_checker'])
assert isinstance(server, CDXServer)
assert server.perms_checker
url = 'http://www.iana.org/_img/bookmark_icon.ico'
key = 'org,iana)/_img/bookmark_icon.ico'
with pytest.raises(AccessException):
cdxobjs = list(server.load_cdx(url=url))
print cdxobjs
if __name__ == "__main__":
import doctest
doctest.testmod()

View File

@ -1,10 +1,10 @@
import webtest
from pywb.cdx.wsgi_cdxserver import main
from pywb.cdx.wsgi_cdxserver import create_app
from pywb import get_test_dir
class TestCdx:
def setup(self):
self.app = main(get_test_dir() + 'cdx/')
self.app = create_app(get_test_dir() + 'cdx/')
self.testapp = webtest.TestApp(self.app)
def test_cdx(self):

View File

@ -83,8 +83,7 @@ class PlainTextResponse(BaseResponse):
def __init__(self, cdxitr, fields, status=200, content_type='text/plain'):
super(PlainTextResponse, self).__init__(
response=(
cdx_text_out(cdx, fields)
for cdx in cdxitr
cdx.to_text(fields) for cdx in cdxitr
),
status=status, content_type=content_type)

View File

@ -30,7 +30,7 @@ class IndexReader(object):
params['allowFuzzy'] = True
cdxlines = self.load_cdx(url=wburl.url, output='raw', **params)
cdxlines = self.load_cdx(url=wburl.url, **params)
return cdxlines

View File

@ -92,10 +92,10 @@ enable_cdx_api: true
# optional reporter callback func
# if set, called with request and cdx object
reporter: !!python/object/new:tests.test_integration.PrintReporter []
reporter: !!python/object/new:tests.fixture.PrintReporter []
# custom rules for domain specific matching
#domain_specific_rules: rules.yaml
#perms_checker: !!python/object/new:pywb.cdx.perms.AllowAllPerms []
perms_checker: !!python/object/new:tests.test_integration.TestExclusionPerms []
perms_checker: !!python/object/new:tests.fixture.TestExclusionPerms []

56
tests/fixture.py Normal file
View File

@ -0,0 +1,56 @@
import os
import pytest
import yaml
@pytest.fixture
def testconfig():
config = yaml.load(open('test_config.yaml'))
assert config
if 'index_paths' not in config:
# !!! assumes this module is in a sub-directory of project root.
config['index_paths'] = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
'../sample_archive/cdx')
return config
#================================================================
# Reporter callback for replay view
class PrintReporter:
"""Reporter callback for replay view.
"""
def __call__(self, wbrequest, cdx, response):
print wbrequest
print cdx
pass
#================================================================
class TestExclusionPerms:
"""
Perm Checker fixture which can block one URL.
"""
# sample_archive has captures for this URLKEY
URLKEY_EXCLUDED = 'org,iana)/_img/bookmark_icon.ico'
def allow_url_lookup(self, urlkey, url):
"""
Return true/false if url or urlkey (canonicalized url)
should be allowed
"""
print "allow_url_lookup:urlkey={}".format(urlkey)
if urlkey == self.URLKEY_EXCLUDED:
return False
return True
def allow_capture(self, cdx):
"""
Return True if specified capture (cdx) is allowed.
"""
return True
def filter_fields(self, cdx):
"""
Filter out any forbidden cdx fields from cdx object
"""
return cdx

View File

@ -8,7 +8,9 @@ class TestWb:
def setup(self):
#self.app = pywb.wbapp.create_wb_app(pywb.pywb_init.pywb_config())
self.app = create_wb_app(pywb_config(self.TEST_CONFIG))
# save it in self - useful for debugging
self.router = pywb_config(self.TEST_CONFIG)
self.app = create_wb_app(self.router)
self.testapp = webtest.TestApp(self.app)
def _assert_basic_html(self, resp):
@ -193,43 +195,3 @@ class TestWb:
resp = self.testapp.get('/pywb/?abc', status = 400)
assert resp.status_int == 400
assert 'Invalid Url: http://?abc' in resp.body
#=================================================================
# Reporter callback for replay view
class PrintReporter:
def __call__(self, wbrequest, cdx, response):
print wbrequest
print cdx
pass
#=================================================================
class TestExclusionPerms:
"""
Sample Perm Checker which allows all
"""
def allow_url_lookup(self, urlkey, url):
"""
Return true/false if url or urlkey (canonicalized url)
should be allowed
"""
print urlkey
if urlkey == 'org,iana)/_img/bookmark_icon.ico':
return False
return True
def allow_capture(self, cdx):
"""
Return true/false is specified capture (cdx) should be
allowed
"""
return True
def filter_fields(self, cdx):
"""
Filter out any forbidden cdx fields from cdx dictionary
"""
return cdx

View File

@ -12,15 +12,7 @@ import yaml
from pywb.cdx.cdxobject import CDXObject
from pywb.cdx.wsgi_cdxserver import create_app
@pytest.fixture
def testconfig():
config = yaml.load(open('test_config.yaml'))
assert config
if 'index_paths' not in config:
config['index_paths'] = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
'../sample_archive/cdx')
return config
from tests.fixture import testconfig
@pytest.fixture
def client(testconfig):