1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

acl optimization: addresses ukwa/ukwa-pywb#38

- stop checking acl rules linearly if acl key < tld
- use existing rule for same url (at least until date-range checking)
This commit is contained in:
Ilya Kreymer 2019-02-14 12:15:15 -08:00 committed by John Berlin
parent 60ad1739b7
commit 0c08b9b5d5
No known key found for this signature in database
GPG Key ID: 6EF5E4B442011B02
2 changed files with 18 additions and 9 deletions

View File

@ -76,21 +76,27 @@ class AccessChecker(object):
def find_access_rule(self, url, ts=None, urlkey=None):
params = {'url': url, 'urlkey': urlkey}
print("Getting acl_iter...')
acl_iter, errs = self.aggregator(params)
if errs:
print(errs)
key = params['key'].decode('utf-8')
print("Iterating acl_iter...')
tld = key.split(',')[0]
for acl in acl_iter:
# skip empty/invalid lines
if 'urlkey' not in acl:
continue
if key.startswith(acl['urlkey']):
return acl
# if acl key already less than first tld,
# no match can be found
if acl['urlkey'] < tld:
break
return self.default_rule
def __call__(self, res):
@ -102,21 +108,24 @@ class AccessChecker(object):
last_url = None
for cdx in cdx_iter:
print("Looking at",cdx)
url = cdx.get('url')
print(url)
# if no url, possible idx or other object, don't apply any checks and pass through
if not url:
yield cdx
continue
rule = self.find_access_rule(url, cdx.get('timestamp'), cdx.get('urlkey'))
print(rule)
# TODO: optimization until date range support is included
if url == last_url:
rule = last_rule
else:
rule = self.find_access_rule(url, cdx.get('timestamp'), cdx.get('urlkey'))
access = rule.get('access', 'exclude')
print(access)
if access == 'exclude':
continue
print("Yielding...")
cdx['access'] = access
yield cdx
last_rule = rule
last_url = url

View File

@ -1,5 +1,5 @@
org,iana)/about - {"access": "block"}
org,iana)/_css/2013.1/fonts/opensans-semibold.ttf - {"access": "allow"}
org,iana)/_css - {"access": "exclude"}
org,example)/?example=1 - {"access": "block"}
org,iana)/ - {"access": "exclude"}
org,example)/?example=1 - {"access": "block"}