1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

ensure cdx loading happens lazily

add perms test to ensure 'short-circuiting' in case of
permission exception
This commit is contained in:
Ilya Kreymer 2014-03-01 18:40:16 -08:00
parent 15d2cdd1b3
commit 06a22c845b
3 changed files with 38 additions and 5 deletions

View File

@ -89,11 +89,13 @@ def process_cdx(cdx_iter, query):
def load_cdx_streams(sources, query):
# Optimize: no need to merge if just one input
if len(sources) == 1:
return sources[0].load_cdx(query)
cdx_iter = sources[0].load_cdx(query)
else:
source_iters = map(lambda src: src.load_cdx(query), sources)
cdx_iter = merge(*(source_iters))
source_iters = map(lambda src: src.load_cdx(query), sources)
merged_stream = merge(*(source_iters))
return merged_stream
for cdx in cdx_iter:
yield cdx
#=================================================================
@ -156,6 +158,7 @@ def cdx_filter(cdx_iter, filter_strings):
if string.startswith('='):
string = string[1:]
self.compare_func = self.exact
# contains match
elif string.startswith('~'):
string = string[1:]
self.compare_func = self.contains

View File

@ -0,0 +1,28 @@
from pywb.cdx.cdxops import cdx_load
from pywb.cdx.perms import AllowAllPerms
from pywb.cdx.query import CDXQuery
from pywb.cdx.cdxobject import AccessException
from pytest import raises
class BlockAllPerms(AllowAllPerms):
def allow_url_lookup(self, urlkey, url):
return False
def test_exclusion_short_circuit():
"""
# Verify that exclusion check 'short-circuits' further evaluation.. eg, a bad cdx source is not even loaded
# if exclusion check does not pass
"""
cdx_iter = cdx_load(['bogus ignored'], CDXQuery(url='example.com', key='com,example)/'),
perms_checker=BlockAllPerms(), process=True)
# exception happens on first access attempt
with raises(AccessException):
cdx_iter.next()

View File

@ -2,7 +2,8 @@ import webtest
from pywb.pywb_init import pywb_config
from pywb.wbapp import create_wb_app
from pywb.cdx.cdxobject import CDXObject
from pywb.cdx.perms import AllowAllPerms
from fixture import TestExclusionPerms
class TestWb:
TEST_CONFIG = 'test_config.yaml'
@ -208,3 +209,4 @@ class TestWb:
resp = self.testapp.get('/pywb/?abc', status = 400)
assert resp.status_int == 400
assert 'Invalid Url: http://?abc' in resp.body