diff --git a/pywb/warcserver/access_checker.py b/pywb/warcserver/access_checker.py index 7502eb4d..c648e4f9 100644 --- a/pywb/warcserver/access_checker.py +++ b/pywb/warcserver/access_checker.py @@ -15,8 +15,9 @@ class FileAccessIndexSource(FileIndexSource): def rev_cmp(a, b): return (a < b) - (a > b) - def _get_gen(self, fh, params): - return search(fh, params['key'], prev_size=1, compare_func=self.rev_cmp) + def _do_iter(self, fh, params): + for line in search(fh, params['key'], prev_size=1, compare_func=self.rev_cmp): + yield line # ============================================================================ @@ -75,26 +76,28 @@ class AccessChecker(object): raise Exception('Invalid Access Source: ' + filename) def find_access_rule(self, url, ts=None, urlkey=None): - params = {'url': url, 'urlkey': urlkey} + params = {'url': url, 'urlkey': urlkey, 'nosource': 'true'} acl_iter, errs = self.aggregator(params) if errs: print(errs) - key = params['key'].decode('utf-8') + key = params['key'] - tld = key.split(',')[0] + tld = key.split(b',')[0] for acl in acl_iter: # skip empty/invalid lines - if 'urlkey' not in acl: + if not acl: continue - if key.startswith(acl['urlkey']): - return acl + acl_key = acl.split(b' ')[0] + + if key.startswith(acl_key): + return CDXObject(acl) # if acl key already less than first tld, # no match can be found - if acl['urlkey'] < tld: + if acl_key < tld: break return self.default_rule diff --git a/pywb/warcserver/index/indexsource.py b/pywb/warcserver/index/indexsource.py index 32a5161b..ff74628b 100644 --- a/pywb/warcserver/index/indexsource.py +++ b/pywb/warcserver/index/indexsource.py @@ -76,20 +76,21 @@ class FileIndexSource(BaseIndexSource): except IOError: raise NotFoundException(filename) - def _get_gen(self, fh, params): - return iter_range(fh, params['key'], params['end_key']) - def load_index(self, params): filename = res_template(self.filename_template, params) fh = self._do_open(filename) - def do_load(fh): + def do_iter(): with fh: - for line in self._get_gen(fh, params): - yield CDXObject(line) + for obj in self._do_iter(fh, params): + yield obj - return do_load(fh) + return do_iter() + + def _do_iter(self, fh, params): + for line in iter_range(fh, params['key'], params['end_key']): + yield CDXObject(line) def __repr__(self): return '{0}(file://{1})'.format(self.__class__.__name__, diff --git a/tests/test_acl_manager.py b/tests/test_acl_manager.py index dc54b76f..945c4bec 100644 --- a/tests/test_acl_manager.py +++ b/tests/test_acl_manager.py @@ -69,9 +69,9 @@ com,example)/ - {"access": "allow", "url": "http://example.com/"} assert out == """\ Matched rule: - com,example, - {"access": "exclude", "url": "com,example,", "source": "%s", "source-coll": "%s"} + com,example, - {"access": "exclude", "url": "com,example,"} -""" % (self.acl_filename, self.acl_filename) +""" def test_remove_acl(self): wb_manager(['acl', 'remove', self.acl_filename, 'com,example,'])