mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
acl optimization: fixes ukwa/ukwa-pywb#39
- don't parse json on every aclj line until key prefix matches, resulting in speed boost! - convert aclj to dict (via cdxobject) only when match is found (disable aggregator source tracking)
This commit is contained in:
parent
ce0ed610bd
commit
9b2ae35b93
@ -15,8 +15,9 @@ class FileAccessIndexSource(FileIndexSource):
|
||||
def rev_cmp(a, b):
|
||||
return (a < b) - (a > b)
|
||||
|
||||
def _get_gen(self, fh, params):
|
||||
return search(fh, params['key'], prev_size=1, compare_func=self.rev_cmp)
|
||||
def _do_iter(self, fh, params):
|
||||
for line in search(fh, params['key'], prev_size=1, compare_func=self.rev_cmp):
|
||||
yield line
|
||||
|
||||
|
||||
# ============================================================================
|
||||
@ -75,26 +76,28 @@ class AccessChecker(object):
|
||||
raise Exception('Invalid Access Source: ' + filename)
|
||||
|
||||
def find_access_rule(self, url, ts=None, urlkey=None):
|
||||
params = {'url': url, 'urlkey': urlkey}
|
||||
params = {'url': url, 'urlkey': urlkey, 'nosource': 'true'}
|
||||
acl_iter, errs = self.aggregator(params)
|
||||
if errs:
|
||||
print(errs)
|
||||
|
||||
key = params['key'].decode('utf-8')
|
||||
key = params['key']
|
||||
|
||||
tld = key.split(',')[0]
|
||||
tld = key.split(b',')[0]
|
||||
|
||||
for acl in acl_iter:
|
||||
# skip empty/invalid lines
|
||||
if 'urlkey' not in acl:
|
||||
if not acl:
|
||||
continue
|
||||
|
||||
if key.startswith(acl['urlkey']):
|
||||
return acl
|
||||
acl_key = acl.split(b' ')[0]
|
||||
|
||||
if key.startswith(acl_key):
|
||||
return CDXObject(acl)
|
||||
|
||||
# if acl key already less than first tld,
|
||||
# no match can be found
|
||||
if acl['urlkey'] < tld:
|
||||
if acl_key < tld:
|
||||
break
|
||||
|
||||
return self.default_rule
|
||||
|
@ -76,20 +76,21 @@ class FileIndexSource(BaseIndexSource):
|
||||
except IOError:
|
||||
raise NotFoundException(filename)
|
||||
|
||||
def _get_gen(self, fh, params):
|
||||
return iter_range(fh, params['key'], params['end_key'])
|
||||
|
||||
def load_index(self, params):
|
||||
filename = res_template(self.filename_template, params)
|
||||
|
||||
fh = self._do_open(filename)
|
||||
|
||||
def do_load(fh):
|
||||
def do_iter():
|
||||
with fh:
|
||||
for line in self._get_gen(fh, params):
|
||||
yield CDXObject(line)
|
||||
for obj in self._do_iter(fh, params):
|
||||
yield obj
|
||||
|
||||
return do_load(fh)
|
||||
return do_iter()
|
||||
|
||||
def _do_iter(self, fh, params):
|
||||
for line in iter_range(fh, params['key'], params['end_key']):
|
||||
yield CDXObject(line)
|
||||
|
||||
def __repr__(self):
|
||||
return '{0}(file://{1})'.format(self.__class__.__name__,
|
||||
|
@ -69,9 +69,9 @@ com,example)/ - {"access": "allow", "url": "http://example.com/"}
|
||||
assert out == """\
|
||||
Matched rule:
|
||||
|
||||
com,example, - {"access": "exclude", "url": "com,example,", "source": "%s", "source-coll": "%s"}
|
||||
com,example, - {"access": "exclude", "url": "com,example,"}
|
||||
|
||||
""" % (self.acl_filename, self.acl_filename)
|
||||
"""
|
||||
|
||||
def test_remove_acl(self):
|
||||
wb_manager(['acl', 'remove', self.acl_filename, 'com,example,'])
|
||||
|
Loading…
x
Reference in New Issue
Block a user