mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
acl optimization: addresses ukwa/ukwa-pywb#38
- stop checking acl rules linearly if acl key < tld - use existing rule for same url (at least until date-range checking)
This commit is contained in:
parent
60ad1739b7
commit
0c08b9b5d5
@ -76,21 +76,27 @@ class AccessChecker(object):
|
||||
|
||||
def find_access_rule(self, url, ts=None, urlkey=None):
|
||||
params = {'url': url, 'urlkey': urlkey}
|
||||
print("Getting acl_iter...')
|
||||
acl_iter, errs = self.aggregator(params)
|
||||
if errs:
|
||||
print(errs)
|
||||
|
||||
key = params['key'].decode('utf-8')
|
||||
|
||||
print("Iterating acl_iter...')
|
||||
tld = key.split(',')[0]
|
||||
|
||||
for acl in acl_iter:
|
||||
# skip empty/invalid lines
|
||||
if 'urlkey' not in acl:
|
||||
continue
|
||||
|
||||
if key.startswith(acl['urlkey']):
|
||||
return acl
|
||||
|
||||
# if acl key already less than first tld,
|
||||
# no match can be found
|
||||
if acl['urlkey'] < tld:
|
||||
break
|
||||
|
||||
return self.default_rule
|
||||
|
||||
def __call__(self, res):
|
||||
@ -102,21 +108,24 @@ class AccessChecker(object):
|
||||
last_url = None
|
||||
|
||||
for cdx in cdx_iter:
|
||||
print("Looking at",cdx)
|
||||
url = cdx.get('url')
|
||||
print(url)
|
||||
# if no url, possible idx or other object, don't apply any checks and pass through
|
||||
if not url:
|
||||
yield cdx
|
||||
continue
|
||||
|
||||
rule = self.find_access_rule(url, cdx.get('timestamp'), cdx.get('urlkey'))
|
||||
print(rule)
|
||||
# TODO: optimization until date range support is included
|
||||
if url == last_url:
|
||||
rule = last_rule
|
||||
else:
|
||||
rule = self.find_access_rule(url, cdx.get('timestamp'), cdx.get('urlkey'))
|
||||
|
||||
access = rule.get('access', 'exclude')
|
||||
print(access)
|
||||
if access == 'exclude':
|
||||
continue
|
||||
|
||||
print("Yielding...")
|
||||
cdx['access'] = access
|
||||
yield cdx
|
||||
|
||||
last_rule = rule
|
||||
last_url = url
|
||||
|
@ -1,5 +1,5 @@
|
||||
org,iana)/about - {"access": "block"}
|
||||
org,iana)/_css/2013.1/fonts/opensans-semibold.ttf - {"access": "allow"}
|
||||
org,iana)/_css - {"access": "exclude"}
|
||||
org,example)/?example=1 - {"access": "block"}
|
||||
org,iana)/ - {"access": "exclude"}
|
||||
org,example)/?example=1 - {"access": "block"}
|
||||
|
Loading…
x
Reference in New Issue
Block a user