1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

AccessChecker: exact-match rules not found in single-line ACLJ file, fixes #628 (#629)

* Add unit test to verify whether ACL exact-match rules in a single-line
*.aclj file are found

* Fix AccessChecker to match exact rules in a single-line rule file
This commit is contained in:
Sebastian Nagel 2021-04-27 05:07:19 +02:00 committed by GitHub
parent 084be82550
commit ca14bdd8b2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 21 additions and 2 deletions

View File

@ -78,6 +78,11 @@ class AccessChecker(object):
EXACT_SUFFIX = '###' # type: str
EXACT_SUFFIX_B = b'###' # type: bytes
# rules in the ACL file are followed by a white space (U+0020):
# for searching we need a match suffix which sorts/compares after
# (resp. before because we use the rev_cmp function). Simply add
# another '#' (U+0023 > U+0020)
EXACT_SUFFIX_SEARCH_B = b'####' # type: bytes
def __init__(self, access_source, default_access='allow'):
"""Initialize a new AccessChecker
@ -148,7 +153,7 @@ class AccessChecker(object):
params = {'url': url,
'urlkey': urlkey,
'nosource': 'true',
'exact_match_suffix': self.EXACT_SUFFIX_B
'exact_match_suffix': self.EXACT_SUFFIX_SEARCH_B
}
if collection:
params['param.coll'] = collection

View File

@ -53,6 +53,10 @@ class TestAccess(TempDirTests, BaseTestClass):
assert edx['urlkey'] == 'com,example)/foo'
assert edx['access'] == 'exclude'
edx = access.find_access_rule('https://example.net/abc/path')
assert edx['urlkey'] == 'net,example)/abc/path'
assert edx['access'] == 'block'
edx = access.find_access_rule('https://example.net/abc/path/other')
assert edx['urlkey'] == 'net,example)/abc/path'
assert edx['access'] == 'block'
@ -114,7 +118,7 @@ class TestAccess(TempDirTests, BaseTestClass):
assert edx['urlkey'] == 'net,example)/abc/path'
assert edx['access'] == 'block'
# exact-only matchc
# exact-only match
edx = access.find_access_rule('https://www.iana.org/')
assert edx['urlkey'] == 'org,iana)/###'
assert edx['access'] == 'allow'
@ -127,4 +131,12 @@ class TestAccess(TempDirTests, BaseTestClass):
assert edx['urlkey'] == 'org,iana)/'
assert edx['access'] == 'exclude'
# exact-only match, first line in *.aclj file
edx = access.find_access_rule('https://www.iana.org/exact/match/first/line/aclj/')
assert edx['urlkey'] == 'org,iana)/exact/match/first/line/aclj###'
assert edx['access'] == 'allow'
# exact-only match, single rule in *.aclj file
edx = access.find_access_rule('https://www.lonesome-rule.org/')
assert edx['urlkey'] == 'org,lonesome-rule)/###'
assert edx['access'] == 'allow'

View File

@ -1,3 +1,4 @@
org,iana)/exact/match/first/line/aclj### - {"access": "allow", "url": "https://www.iana.org/exact/match/first/line/aclj/"}
org,iana)/about - {"access": "block"}
org,iana)/_css/2013.1/fonts/opensans-semibold.ttf - {"access": "allow"}
org,iana)/_css - {"access": "exclude"}

View File

@ -0,0 +1 @@
org,lonesome-rule)/### - {"access": "allow", "url": "https://www.lonesome-rule.org/"}