mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
fuzzy match rules: to simplify custom fuzzy match use cases, add support
for matching fuzzy match query params as a list
This commit is contained in:
parent
7ac98fbfe2
commit
ec27ccfbb6
@ -125,12 +125,12 @@ class CDXDomainSpecificRule(BaseRule):
|
|||||||
def __init__(self, name, config):
|
def __init__(self, name, config):
|
||||||
super(CDXDomainSpecificRule, self).__init__(name, config)
|
super(CDXDomainSpecificRule, self).__init__(name, config)
|
||||||
|
|
||||||
if isinstance(config, basestring):
|
if not isinstance(config, dict):
|
||||||
self.regex = re.compile(config)
|
self.regex = self.make_regex(config)
|
||||||
self.replace = None
|
self.replace = None
|
||||||
self.filter = self.DEFAULT_FILTER
|
self.filter = self.DEFAULT_FILTER
|
||||||
else:
|
else:
|
||||||
self.regex = re.compile(config.get('match'))
|
self.regex = self.make_regex(config.get('match'))
|
||||||
self.replace = config.get('replace')
|
self.replace = config.get('replace')
|
||||||
self.filter = config.get('filter', self.DEFAULT_FILTER)
|
self.filter = config.get('filter', self.DEFAULT_FILTER)
|
||||||
|
|
||||||
@ -147,6 +147,35 @@ class CDXDomainSpecificRule(BaseRule):
|
|||||||
if self.replace:
|
if self.replace:
|
||||||
self.replace = unsurt(self.replace)
|
self.replace = unsurt(self.replace)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
@staticmethod
|
||||||
import doctest
|
def make_regex(config):
|
||||||
doctest.testmod()
|
if isinstance(config, list):
|
||||||
|
string = CDXDomainSpecificRule.make_query_match_regex(config)
|
||||||
|
# assumes string
|
||||||
|
else:
|
||||||
|
string = config
|
||||||
|
|
||||||
|
return re.compile(string)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def make_query_match_regex(params_list):
|
||||||
|
r"""
|
||||||
|
>>> CDXDomainSpecificRule.make_query_match_regex(['param1', 'id', 'abc'])
|
||||||
|
'(abc=[^&]+).*(id=[^&]+).*(param1=[^&]+)'
|
||||||
|
|
||||||
|
>>> CDXDomainSpecificRule.make_query_match_regex(['id[0]', 'abc()'])
|
||||||
|
'(abc\\(\\)=[^&]+).*(id\\[0\\]=[^&]+)'
|
||||||
|
|
||||||
|
"""
|
||||||
|
params_list.sort()
|
||||||
|
def conv(value):
|
||||||
|
return '({}=[^&]+)'.format(re.escape(value))
|
||||||
|
|
||||||
|
params_list = map(conv, params_list)
|
||||||
|
final_str = '.*'.join(params_list)
|
||||||
|
return final_str
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import doctest
|
||||||
|
doctest.testmod()
|
||||||
|
@ -15,12 +15,14 @@ rules:
|
|||||||
fuzzy_lookup: 'com,facebook\)/.*[?&]data=(.*?(?:[&]|query_type[^,]+))'
|
fuzzy_lookup: 'com,facebook\)/.*[?&]data=(.*?(?:[&]|query_type[^,]+))'
|
||||||
|
|
||||||
- url_prefix: 'com,facebook)/ajax/ufi/'
|
- url_prefix: 'com,facebook)/ajax/ufi/'
|
||||||
|
|
||||||
fuzzy_lookup: '(ft_ent_identifier=[^&]+).*(lsd=[^&]+)'
|
fuzzy_lookup: ['ft_ent_identifier', 'lsd']
|
||||||
|
#fuzzy_lookup: '(ft_ent_identifier=[^&]+).*(lsd=[^&]+)'
|
||||||
|
|
||||||
- url_prefix: 'com,facebook)/ajax/chat/hovercard/sidebar.php'
|
- url_prefix: 'com,facebook)/ajax/chat/hovercard/sidebar.php'
|
||||||
|
|
||||||
fuzzy_lookup: '(ids\[0\]=[^&]+)'
|
fuzzy_lookup: ['ids[0]']
|
||||||
|
#fuzzy_lookup: '(ids\[0\]=[^&]+)'
|
||||||
|
|
||||||
- url_prefix: 'com,facebook)/ajax/'
|
- url_prefix: 'com,facebook)/ajax/'
|
||||||
|
|
||||||
@ -28,7 +30,8 @@ rules:
|
|||||||
|
|
||||||
- url_prefix: 'com,facebook)/login.php'
|
- url_prefix: 'com,facebook)/login.php'
|
||||||
|
|
||||||
fuzzy_lookup: '(email=[^&]+).*(lgnrnd=[^&]+).*(lsd=[^&]+)'
|
#fuzzy_lookup: '(email=[^&]+).*(lgnrnd=[^&]+).*(lsd=[^&]+)'
|
||||||
|
fuzzy_lookup: ['email', 'lgnrnd', 'lsd']
|
||||||
|
|
||||||
# not actually needed, fuzzy match is used instead here
|
# not actually needed, fuzzy match is used instead here
|
||||||
# canonicalize:
|
# canonicalize:
|
||||||
@ -88,6 +91,8 @@ rules:
|
|||||||
match: '(example,example,test\)/.*?)[?].*?(id=value).*'
|
match: '(example,example,test\)/.*?)[?].*?(id=value).*'
|
||||||
replace: '\1?\2'
|
replace: '\1?\2'
|
||||||
|
|
||||||
|
fuzzy_lookup: ['param1', 'id']
|
||||||
|
|
||||||
rewrite:
|
rewrite:
|
||||||
js_rewrite_location: False
|
js_rewrite_location: False
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user