mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Fix ACL loading for auto collections (#620)
* Pass collection name to ACL checker to load ACL lists for automatic collections * Typo: file suffix must be `.aclj`
This commit is contained in:
parent
b475d85c4f
commit
662fc747bf
@ -9,7 +9,7 @@ block or exclude access to individual urls by longest-prefix match.
|
||||
Access Control Files (.aclj)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Access controls are set in one or more access control json files (.aclj), sorted in reverse alphabetical order.
|
||||
Access controls are set in one or more access control JSON files (.aclj), sorted in reverse alphabetical order.
|
||||
To determine the best match, a binary search is used (similar to CDXJ) lookup and then the best match is found forward.
|
||||
|
||||
An .aclj file may look as follows::
|
||||
@ -61,7 +61,7 @@ The .aclj files need not ever be added or edited manually.
|
||||
|
||||
The pywb ``wb-manager`` utility has been extended to provide tools for adding, removing and checking access control rules.
|
||||
|
||||
The access rules are written to ``<collection>/acl/access-rules.acl`` for a given collection ``<collection>`` for automatic collections.
|
||||
The access rules are written to ``<collection>/acl/access-rules.aclj`` for a given collection ``<collection>`` for automatic collections.
|
||||
|
||||
For example, to add the first line to an ACL file ``access.aclj``, one could run::
|
||||
|
||||
|
@ -134,7 +134,7 @@ class AccessChecker(object):
|
||||
else:
|
||||
raise Exception('Invalid Access Source: ' + filename)
|
||||
|
||||
def find_access_rule(self, url, ts=None, urlkey=None):
|
||||
def find_access_rule(self, url, ts=None, urlkey=None, collection=None):
|
||||
"""Attempts to find the access control rule for the
|
||||
supplied URL otherwise returns the default rule
|
||||
|
||||
@ -150,6 +150,8 @@ class AccessChecker(object):
|
||||
'nosource': 'true',
|
||||
'exact_match_suffix': self.EXACT_SUFFIX_B
|
||||
}
|
||||
if collection:
|
||||
params['param.coll'] = collection
|
||||
|
||||
acl_iter, errs = self.aggregator(params)
|
||||
if errs:
|
||||
@ -214,7 +216,8 @@ class AccessChecker(object):
|
||||
if url == last_url:
|
||||
rule = last_rule
|
||||
else:
|
||||
rule = self.find_access_rule(url, cdx.get('timestamp'), cdx.get('urlkey'))
|
||||
rule = self.find_access_rule(url, cdx.get('timestamp'), cdx.get('urlkey'),
|
||||
cdx.get('source-coll'))
|
||||
|
||||
access = rule.get('access', 'exclude')
|
||||
if access == 'exclude':
|
||||
|
@ -142,7 +142,9 @@ class WarcServer(BaseWarcServer):
|
||||
base_dir=self.index_paths,
|
||||
config=self.config)
|
||||
|
||||
access_checker = AccessChecker(CacheDirectoryAccessSource(self.acl_paths),
|
||||
access_checker = AccessChecker(CacheDirectoryAccessSource(base_prefix=self.root_dir,
|
||||
base_dir=self.acl_paths,
|
||||
config=self.config),
|
||||
self.default_access)
|
||||
|
||||
if self.dedup_index_url:
|
||||
|
Loading…
x
Reference in New Issue
Block a user