1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

Fix ACL loading for auto collections (#620)

* Pass collection name to ACL checker to load ACL lists
for automatic collections

* Typo: file suffix must be `.aclj`
This commit is contained in:
Sebastian Nagel 2021-04-27 04:58:56 +02:00 committed by GitHub
parent b475d85c4f
commit 662fc747bf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 10 additions and 5 deletions

View File

@ -9,7 +9,7 @@ block or exclude access to individual urls by longest-prefix match.
Access Control Files (.aclj)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Access controls are set in one or more access control json files (.aclj), sorted in reverse alphabetical order.
Access controls are set in one or more access control JSON files (.aclj), sorted in reverse alphabetical order.
To determine the best match, a binary search is used (similar to CDXJ) lookup and then the best match is found forward.
An .aclj file may look as follows::
@ -61,7 +61,7 @@ The .aclj files need not ever be added or edited manually.
The pywb ``wb-manager`` utility has been extended to provide tools for adding, removing and checking access control rules.
The access rules are written to ``<collection>/acl/access-rules.acl`` for a given collection ``<collection>`` for automatic collections.
The access rules are written to ``<collection>/acl/access-rules.aclj`` for a given collection ``<collection>`` for automatic collections.
For example, to add the first line to an ACL file ``access.aclj``, one could run::

View File

@ -134,7 +134,7 @@ class AccessChecker(object):
else:
raise Exception('Invalid Access Source: ' + filename)
def find_access_rule(self, url, ts=None, urlkey=None):
def find_access_rule(self, url, ts=None, urlkey=None, collection=None):
"""Attempts to find the access control rule for the
supplied URL otherwise returns the default rule
@ -150,6 +150,8 @@ class AccessChecker(object):
'nosource': 'true',
'exact_match_suffix': self.EXACT_SUFFIX_B
}
if collection:
params['param.coll'] = collection
acl_iter, errs = self.aggregator(params)
if errs:
@ -214,7 +216,8 @@ class AccessChecker(object):
if url == last_url:
rule = last_rule
else:
rule = self.find_access_rule(url, cdx.get('timestamp'), cdx.get('urlkey'))
rule = self.find_access_rule(url, cdx.get('timestamp'), cdx.get('urlkey'),
cdx.get('source-coll'))
access = rule.get('access', 'exclude')
if access == 'exclude':

View File

@ -142,7 +142,9 @@ class WarcServer(BaseWarcServer):
base_dir=self.index_paths,
config=self.config)
access_checker = AccessChecker(CacheDirectoryAccessSource(self.acl_paths),
access_checker = AccessChecker(CacheDirectoryAccessSource(base_prefix=self.root_dir,
base_dir=self.acl_paths,
config=self.config),
self.default_access)
if self.dedup_index_url: