1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

fuzzymatch fix: when fuzzy matching prefix with trailing '/' with default rule, eg. 'path/?_123', remove trailing slash to match 'path' instead of 'path/' to match canonicalizer behavior of removing trailing slashes

tests: add test to verify fuzzy matching with trailing slash before query
This commit is contained in:
Ilya Kreymer 2017-11-09 20:45:15 -08:00
parent 5cc1e60048
commit 41f227d8ae
2 changed files with 11 additions and 0 deletions

View File

@ -102,8 +102,12 @@ class FuzzyMatcher(object):
inx = url.find(matched_rule.replace_after)
if inx > 0:
length = inx + len(matched_rule.replace_after)
# don't include trailing '?' for default filter
if no_filters:
length -= 1
# don't include trailing '/' if match '/?'
if url[length - 1] == '/':
length -= 1
url = url[:length]
elif not no_filters:
url += matched_rule.replace_after[0]

View File

@ -55,6 +55,13 @@ class TestWbIntegration(BaseConfigTest):
# 17 Captures + header
assert len(resp.html.find_all('tr')) == 18
def test_calendar_query_fuzzy_match_add_slash(self):
# fuzzy match removing _= according to standard rules.yaml
resp = self.testapp.get('/pywb/*/http://www.iana.org/_css/2013.1/screen.css/?_=3141592653')
self._assert_basic_html(resp)
# 17 Captures + header
assert len(resp.html.find_all('tr')) == 18
def test_calendar_not_found(self):
# query with no results
resp = self.testapp.get('/pywb/*/http://not-exist.example.com')