1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

fuzzy match: make filter string optionally overridable

setup.py: unset PYWB_CONFIG_ENV
This commit is contained in:
Ilya Kreymer 2014-03-27 21:43:30 -07:00
parent 41d51a6427
commit 2c74ea9f23
4 changed files with 24 additions and 16 deletions

View File

@ -88,7 +88,8 @@ class FuzzyQuery:
matched_rule = rule
if len(m.groups()) == 1:
filter_.append('~urlkey:' + m.group(1))
#filter_.append('~urlkey:' + m.group(1))
filter_.append(rule.filter.format(m.group(1)))
break
@ -113,15 +114,19 @@ class FuzzyQuery:
#=================================================================
class CDXDomainSpecificRule(BaseRule):
DEFAULT_FILTER = '~urlkey:{0}'
def __init__(self, name, config):
super(CDXDomainSpecificRule, self).__init__(name, config)
if isinstance(config, basestring):
self.regex = re.compile(config)
self.replace = None
self.filter = self.DEFAULT_FILTER
else:
self.regex = re.compile(config.get('match'))
self.replace = config.get('replace')
self.filter = config.get('filter', self.DEFAULT_FILTER)
def unsurt(self):
"""

View File

@ -35,9 +35,6 @@ class BaseCDXServer(object):
if not self.url_canon:
self.url_canon = UrlCanonicalizer(surt_ordered)
# set perms checker, if any
#self.perms_checker = kwargs.get('perms_checker')
def _check_cdx_iter(self, cdx_iter, query):
""" Check cdx iter semantics
If `cdx_iter` is empty (no matches), check if fuzzy matching
@ -61,17 +58,18 @@ class BaseCDXServer(object):
return self.load_cdx(**fuzzy_query_params)
msg = 'No Captures found for: ' + query.url
print self.fuzzy_query
print query.params
raise NotFoundException(msg)
def _calc_search_keys(self, query):
return calc_search_range(url=query.url,
match_type=query.match_type,
url_canon=self.url_canon)
def load_cdx(self, **params):
query = CDXQuery(**params)
url = query.url
key, end_key = calc_search_range(url=url,
match_type=query.match_type,
url_canon=self.url_canon)
key, end_key = self._calc_search_keys(query)
query.set_key(key, end_key)
cdx_iter = self._load_cdx_query(query)
@ -211,7 +209,7 @@ class RemoteCDXServer(BaseCDXServer):
#=================================================================
def create_cdx_server(config, ds_rules_file=None):
def create_cdx_server(config, ds_rules_file=None, server_cls=None):
if hasattr(config, 'get'):
paths = config.get('index_paths')
surt_ordered = config.get('surt_ordered', True)
@ -223,10 +221,11 @@ def create_cdx_server(config, ds_rules_file=None):
logging.debug('CDX Surt-Ordered? ' + str(surt_ordered))
if isinstance(paths, str) and is_http(paths):
server_cls = RemoteCDXServer
else:
server_cls = CDXServer
if not server_cls:
if isinstance(paths, str) and is_http(paths):
server_cls = RemoteCDXServer
else:
server_cls = CDXServer
return server_cls(paths,
config=pass_config,

View File

@ -58,4 +58,6 @@ rules:
#=================================================================
# Applies to all urls -- should be last
- url_prefix: ''
fuzzy_lookup: '[&?](?:_|uncache)=[\d]+[&]?'
fuzzy_lookup:
match: '(.*)[&?](?:_|uncache)=[\d]+[&]?'
filter: '=urlkey:{0}'

View File

@ -25,6 +25,8 @@ class PyTest(TestCommand):
def run_tests(self):
import pytest
import sys
import os
os.environ.pop('PYWB_CONFIG_FILE', None)
cmdline = ' --cov-config .coveragerc --cov pywb'
cmdline += ' -v --doctest-module ./pywb/ tests/'
errcode = pytest.main(cmdline)