1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

cdx: add basic test for CustomUrlCanonicalizer for now

(will likely refactor this configuration)
This commit is contained in:
Ilya Kreymer 2014-02-28 09:40:51 -08:00
parent 304a33aa5b
commit 1e3ef6ec5c
2 changed files with 13 additions and 15 deletions

View File

@ -10,9 +10,11 @@ from pywb.utils.canonicalize import unsurt, UrlCanonicalizer
#=================================================================
def load_domain_specific_cdx_rules(ds_rules_file, surt_ordered):
#fh = pkgutil.get_data(__package__, filename)
#config = yaml.load(fh)
"""
>>> (canon, fuzzy) = load_domain_specific_cdx_rules(None, True)
>>> canon('http://test.example.example/path/index.html?a=b&id=value&c=d')
'example,example,test)/path/index.html?id=value'
"""
canon = None
fuzzy = None
@ -128,15 +130,7 @@ class CDXDomainSpecificRule(BaseRule):
if self.replace:
self.replace = unsurt(self.replace)
@staticmethod
def load_rules(rules_config, surt_ordered=True):
if not rules_config:
return []
rules = map(StartsWithRule, rules_config)
if not surt_ordered:
for rule in rules:
rule.unsurt()
return rules
if __name__ == "__main__":
import doctest
doctest.testmod()

View File

@ -37,7 +37,11 @@ rules:
# testing rules -- not for valid domain
#=================================================================
# this rule block is a non-existent prefix merely for testing
- url_prefix: 'example,example,test)/nolocation_rewrite'
- url_prefix: 'example,example,test)/'
canonicalize:
match: '(example,example,test\)/.*?)[?].*?(id=value).*'
replace: '\1?\2'
rewrite:
js_rewrite_location: False