mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
fuzzymatcher: don't modify original params, instad create new fuzzy_params for fuzzy query
This commit is contained in:
parent
685804919a
commit
f0fdc50574
@ -4,6 +4,7 @@ from pywb.utils.loaders import load_yaml_config
|
|||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
from six import iterkeys
|
||||||
from six.moves.urllib.parse import urlsplit
|
from six.moves.urllib.parse import urlsplit
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
|
||||||
@ -20,7 +21,8 @@ class FuzzyMatcher(object):
|
|||||||
DEFAULT_MATCH_TYPE = 'prefix'
|
DEFAULT_MATCH_TYPE = 'prefix'
|
||||||
DEFAULT_REPLACE_AFTER = '?'
|
DEFAULT_REPLACE_AFTER = '?'
|
||||||
|
|
||||||
REMOVE_PARAMS = ['alt_url', 'reverse', 'closest', 'end_key']
|
FUZZY_SKIP_PARAMS = ('alt_url', 'reverse', 'closest', 'end_key',
|
||||||
|
'url', 'matchType', 'filter')
|
||||||
|
|
||||||
def __init__(self, filename):
|
def __init__(self, filename):
|
||||||
config = load_yaml_config(filename)
|
config = load_yaml_config(filename)
|
||||||
@ -103,14 +105,15 @@ class FuzzyMatcher(object):
|
|||||||
host = urlsplit(url).netloc
|
host = urlsplit(url).netloc
|
||||||
url = host.split('.', 1)[1]
|
url = host.split('.', 1)[1]
|
||||||
|
|
||||||
params.update({'url': url,
|
fuzzy_params = {'url': url,
|
||||||
'matchType': matched_rule.match_type,
|
'matchType': matched_rule.match_type,
|
||||||
'filter': filters})
|
'filter': filters}
|
||||||
|
|
||||||
for param in self.REMOVE_PARAMS:
|
for key in iterkeys(params):
|
||||||
params.pop(param, '')
|
if key not in self.FUZZY_SKIP_PARAMS:
|
||||||
|
fuzzy_params[key] = params[key]
|
||||||
|
|
||||||
return matched_rule
|
return matched_rule, fuzzy_params
|
||||||
|
|
||||||
def make_regex(self, config):
|
def make_regex(self, config):
|
||||||
if isinstance(config, list):
|
if isinstance(config, list):
|
||||||
@ -148,11 +151,13 @@ class FuzzyMatcher(object):
|
|||||||
|
|
||||||
url = params['url']
|
url = params['url']
|
||||||
|
|
||||||
rule = self.get_fuzzy_match(params)
|
res = self.get_fuzzy_match(params)
|
||||||
if not rule:
|
if not res:
|
||||||
return
|
return
|
||||||
|
|
||||||
new_iter, errs = index_source(params)
|
rule, fuzzy_params = res
|
||||||
|
|
||||||
|
new_iter, errs = index_source(fuzzy_params)
|
||||||
|
|
||||||
for cdx in new_iter:
|
for cdx in new_iter:
|
||||||
if self.allow_fuzzy_result(rule, url, cdx):
|
if self.allow_fuzzy_result(rule, url, cdx):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user