mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
fuzzy match rules: to simplify custom fuzzy match use cases, add support
for matching fuzzy match query params as a list
This commit is contained in:
parent
7ac98fbfe2
commit
ec27ccfbb6
@ -125,12 +125,12 @@ class CDXDomainSpecificRule(BaseRule):
|
||||
def __init__(self, name, config):
|
||||
super(CDXDomainSpecificRule, self).__init__(name, config)
|
||||
|
||||
if isinstance(config, basestring):
|
||||
self.regex = re.compile(config)
|
||||
if not isinstance(config, dict):
|
||||
self.regex = self.make_regex(config)
|
||||
self.replace = None
|
||||
self.filter = self.DEFAULT_FILTER
|
||||
else:
|
||||
self.regex = re.compile(config.get('match'))
|
||||
self.regex = self.make_regex(config.get('match'))
|
||||
self.replace = config.get('replace')
|
||||
self.filter = config.get('filter', self.DEFAULT_FILTER)
|
||||
|
||||
@ -147,6 +147,35 @@ class CDXDomainSpecificRule(BaseRule):
|
||||
if self.replace:
|
||||
self.replace = unsurt(self.replace)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
@staticmethod
|
||||
def make_regex(config):
|
||||
if isinstance(config, list):
|
||||
string = CDXDomainSpecificRule.make_query_match_regex(config)
|
||||
# assumes string
|
||||
else:
|
||||
string = config
|
||||
|
||||
return re.compile(string)
|
||||
|
||||
@staticmethod
|
||||
def make_query_match_regex(params_list):
|
||||
r"""
|
||||
>>> CDXDomainSpecificRule.make_query_match_regex(['param1', 'id', 'abc'])
|
||||
'(abc=[^&]+).*(id=[^&]+).*(param1=[^&]+)'
|
||||
|
||||
>>> CDXDomainSpecificRule.make_query_match_regex(['id[0]', 'abc()'])
|
||||
'(abc\\(\\)=[^&]+).*(id\\[0\\]=[^&]+)'
|
||||
|
||||
"""
|
||||
params_list.sort()
|
||||
def conv(value):
|
||||
return '({}=[^&]+)'.format(re.escape(value))
|
||||
|
||||
params_list = map(conv, params_list)
|
||||
final_str = '.*'.join(params_list)
|
||||
return final_str
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
|
@ -15,12 +15,14 @@ rules:
|
||||
fuzzy_lookup: 'com,facebook\)/.*[?&]data=(.*?(?:[&]|query_type[^,]+))'
|
||||
|
||||
- url_prefix: 'com,facebook)/ajax/ufi/'
|
||||
|
||||
fuzzy_lookup: '(ft_ent_identifier=[^&]+).*(lsd=[^&]+)'
|
||||
|
||||
fuzzy_lookup: ['ft_ent_identifier', 'lsd']
|
||||
#fuzzy_lookup: '(ft_ent_identifier=[^&]+).*(lsd=[^&]+)'
|
||||
|
||||
- url_prefix: 'com,facebook)/ajax/chat/hovercard/sidebar.php'
|
||||
|
||||
fuzzy_lookup: '(ids\[0\]=[^&]+)'
|
||||
fuzzy_lookup: ['ids[0]']
|
||||
#fuzzy_lookup: '(ids\[0\]=[^&]+)'
|
||||
|
||||
- url_prefix: 'com,facebook)/ajax/'
|
||||
|
||||
@ -28,7 +30,8 @@ rules:
|
||||
|
||||
- url_prefix: 'com,facebook)/login.php'
|
||||
|
||||
fuzzy_lookup: '(email=[^&]+).*(lgnrnd=[^&]+).*(lsd=[^&]+)'
|
||||
#fuzzy_lookup: '(email=[^&]+).*(lgnrnd=[^&]+).*(lsd=[^&]+)'
|
||||
fuzzy_lookup: ['email', 'lgnrnd', 'lsd']
|
||||
|
||||
# not actually needed, fuzzy match is used instead here
|
||||
# canonicalize:
|
||||
@ -88,6 +91,8 @@ rules:
|
||||
match: '(example,example,test\)/.*?)[?].*?(id=value).*'
|
||||
replace: '\1?\2'
|
||||
|
||||
fuzzy_lookup: ['param1', 'id']
|
||||
|
||||
rewrite:
|
||||
js_rewrite_location: False
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user