mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
memento 404 fix: ensure timemap only includes memento headers on success 200 response
fuzzy match limit: add 'fuzzy_search_limit' option to default_filters in rules.yaml default fuzzy matching search limit to 100 results to avoid timeouts for large result sets that don't have any matches
This commit is contained in:
parent
0a9ad5c8dc
commit
54a4e38531
@ -506,7 +506,7 @@ class RewriterApp(object):
|
||||
|
||||
response = WbResponse.text_response(response, content_type=content_type)
|
||||
|
||||
if self.enable_memento:
|
||||
if self.enable_memento and response.status_headers.statusline.startswith('200'):
|
||||
self._add_memento_links(wb_url.url, full_prefix, None, memento_ts,
|
||||
response.status_headers, is_timegate, is_proxy)
|
||||
return response
|
||||
|
@ -1,5 +1,8 @@
|
||||
# Default Filters
|
||||
default_filters:
|
||||
# limit to fuzzy match prefix results
|
||||
fuzzy_search_limit: '100'
|
||||
|
||||
# exts that should *not* be treated as files (ignore all query args)
|
||||
not_exts:
|
||||
- asp
|
||||
|
@ -55,6 +55,10 @@
|
||||
<script src='{{ static_prefix }}/vidrw.js'> </script>
|
||||
{% endif %}
|
||||
|
||||
{% if config.enable_transclusions %}
|
||||
<script src="{{ static_prefix }}/transclusions.js"> </script>
|
||||
{% endif %}
|
||||
|
||||
{{ banner_html }}
|
||||
|
||||
<!-- End WB Insert -->
|
||||
|
@ -38,6 +38,8 @@ class FuzzyMatcher(object):
|
||||
|
||||
self.default_filters = config.get('default_filters')
|
||||
|
||||
self.fuzzy_search_limit = self.default_filters.get('fuzzy_search_limit')
|
||||
|
||||
self.url_normalize_rx = [(re.compile(rule['match']), rule['replace']) for rule in self.default_filters['url_normalize']]
|
||||
|
||||
def parse_fuzzy_rule(self, rule):
|
||||
@ -121,6 +123,9 @@ class FuzzyMatcher(object):
|
||||
'filter': filters,
|
||||
'is_fuzzy': '1'}
|
||||
|
||||
if self.fuzzy_search_limit:
|
||||
fuzzy_params['limit'] = self.fuzzy_search_limit
|
||||
|
||||
for key in iterkeys(params):
|
||||
if key not in self.FUZZY_SKIP_PARAMS:
|
||||
fuzzy_params[key] = params[key]
|
||||
|
@ -12,6 +12,9 @@ class EchoParamsSource(BaseIndexSource):
|
||||
if params.get('matchType', 'exact') == 'exact':
|
||||
return iter([])
|
||||
|
||||
assert params.get('is_fuzzy') == '1'
|
||||
assert params.get('limit') == '100'
|
||||
|
||||
cdx = {'urlkey': canonicalize(params.get('cdx_url')),
|
||||
'mime': params.get('mime'),
|
||||
'filter': params.get('filter'),
|
||||
|
@ -273,7 +273,25 @@ class TestMementoRedirectClassic(MementoMixin, BaseConfigTest):
|
||||
resp = self.testapp.get('/pywb/2/http://www.iana.org/', headers=headers)
|
||||
assert resp.status_code == 200
|
||||
|
||||
assert VARY not in resp.headers
|
||||
assert MEMENTO_DATETIME in resp.headers
|
||||
|
||||
def test_timegate_error_not_found(self):
|
||||
resp = self.testapp.get('/pywb/http://example.com/x-not-found', status=404)
|
||||
assert resp.status_code == 404
|
||||
|
||||
# No Memento Headers
|
||||
assert VARY not in resp.headers
|
||||
assert MEMENTO_DATETIME not in resp.headers
|
||||
assert 'Link' not in resp.headers
|
||||
|
||||
def test_timemap_error_not_found(self):
|
||||
resp = self.testapp.get('/pywb/timemap/link/http://example.com/x-not-found', status=404)
|
||||
assert resp.status_code == 404
|
||||
|
||||
# No Memento Headers
|
||||
assert VARY not in resp.headers
|
||||
assert MEMENTO_DATETIME not in resp.headers
|
||||
assert 'Link' not in resp.headers
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user