mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
cdx: add basic test for CustomUrlCanonicalizer for now
(will likely refactor this configuration)
This commit is contained in:
parent
304a33aa5b
commit
1e3ef6ec5c
@ -10,9 +10,11 @@ from pywb.utils.canonicalize import unsurt, UrlCanonicalizer
|
|||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def load_domain_specific_cdx_rules(ds_rules_file, surt_ordered):
|
def load_domain_specific_cdx_rules(ds_rules_file, surt_ordered):
|
||||||
#fh = pkgutil.get_data(__package__, filename)
|
"""
|
||||||
#config = yaml.load(fh)
|
>>> (canon, fuzzy) = load_domain_specific_cdx_rules(None, True)
|
||||||
|
>>> canon('http://test.example.example/path/index.html?a=b&id=value&c=d')
|
||||||
|
'example,example,test)/path/index.html?id=value'
|
||||||
|
"""
|
||||||
canon = None
|
canon = None
|
||||||
fuzzy = None
|
fuzzy = None
|
||||||
|
|
||||||
@ -128,15 +130,7 @@ class CDXDomainSpecificRule(BaseRule):
|
|||||||
if self.replace:
|
if self.replace:
|
||||||
self.replace = unsurt(self.replace)
|
self.replace = unsurt(self.replace)
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def load_rules(rules_config, surt_ordered=True):
|
|
||||||
if not rules_config:
|
|
||||||
return []
|
|
||||||
|
|
||||||
rules = map(StartsWithRule, rules_config)
|
if __name__ == "__main__":
|
||||||
|
import doctest
|
||||||
if not surt_ordered:
|
doctest.testmod()
|
||||||
for rule in rules:
|
|
||||||
rule.unsurt()
|
|
||||||
|
|
||||||
return rules
|
|
||||||
|
@ -37,7 +37,11 @@ rules:
|
|||||||
# testing rules -- not for valid domain
|
# testing rules -- not for valid domain
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# this rule block is a non-existent prefix merely for testing
|
# this rule block is a non-existent prefix merely for testing
|
||||||
- url_prefix: 'example,example,test)/nolocation_rewrite'
|
- url_prefix: 'example,example,test)/'
|
||||||
|
|
||||||
|
canonicalize:
|
||||||
|
match: '(example,example,test\)/.*?)[?].*?(id=value).*'
|
||||||
|
replace: '\1?\2'
|
||||||
|
|
||||||
rewrite:
|
rewrite:
|
||||||
js_rewrite_location: False
|
js_rewrite_location: False
|
||||||
|
Loading…
x
Reference in New Issue
Block a user