diff --git a/pywb/cdx/cdxops.py b/pywb/cdx/cdxops.py index 10be9bb1..c4f865c2 100644 --- a/pywb/cdx/cdxops.py +++ b/pywb/cdx/cdxops.py @@ -89,11 +89,13 @@ def process_cdx(cdx_iter, query): def load_cdx_streams(sources, query): # Optimize: no need to merge if just one input if len(sources) == 1: - return sources[0].load_cdx(query) + cdx_iter = sources[0].load_cdx(query) + else: + source_iters = map(lambda src: src.load_cdx(query), sources) + cdx_iter = merge(*(source_iters)) - source_iters = map(lambda src: src.load_cdx(query), sources) - merged_stream = merge(*(source_iters)) - return merged_stream + for cdx in cdx_iter: + yield cdx #================================================================= @@ -156,6 +158,7 @@ def cdx_filter(cdx_iter, filter_strings): if string.startswith('='): string = string[1:] self.compare_func = self.exact + # contains match elif string.startswith('~'): string = string[1:] self.compare_func = self.contains diff --git a/pywb/cdx/test/test_perms.py b/pywb/cdx/test/test_perms.py new file mode 100644 index 00000000..eb5a30ac --- /dev/null +++ b/pywb/cdx/test/test_perms.py @@ -0,0 +1,28 @@ +from pywb.cdx.cdxops import cdx_load +from pywb.cdx.perms import AllowAllPerms +from pywb.cdx.query import CDXQuery +from pywb.cdx.cdxobject import AccessException + +from pytest import raises + +class BlockAllPerms(AllowAllPerms): + def allow_url_lookup(self, urlkey, url): + return False + + +def test_exclusion_short_circuit(): + """ + # Verify that exclusion check 'short-circuits' further evaluation.. eg, a bad cdx source is not even loaded + # if exclusion check does not pass + """ + cdx_iter = cdx_load(['bogus ignored'], CDXQuery(url='example.com', key='com,example)/'), + perms_checker=BlockAllPerms(), process=True) + + # exception happens on first access attempt + with raises(AccessException): + cdx_iter.next() + + + + + diff --git a/tests/test_integration.py b/tests/test_integration.py index 4c815677..6e24ec6a 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -2,7 +2,8 @@ import webtest from pywb.pywb_init import pywb_config from pywb.wbapp import create_wb_app from pywb.cdx.cdxobject import CDXObject -from pywb.cdx.perms import AllowAllPerms + +from fixture import TestExclusionPerms class TestWb: TEST_CONFIG = 'test_config.yaml' @@ -208,3 +209,4 @@ class TestWb: resp = self.testapp.get('/pywb/?abc', status = 400) assert resp.status_int == 400 assert 'Invalid Url: http://?abc' in resp.body +