diff --git a/pywb/apps/rewriterapp.py b/pywb/apps/rewriterapp.py index 4118999e..27b9a874 100644 --- a/pywb/apps/rewriterapp.py +++ b/pywb/apps/rewriterapp.py @@ -506,7 +506,7 @@ class RewriterApp(object): response = WbResponse.text_response(response, content_type=content_type) - if self.enable_memento: + if self.enable_memento and response.status_headers.statusline.startswith('200'): self._add_memento_links(wb_url.url, full_prefix, None, memento_ts, response.status_headers, is_timegate, is_proxy) return response diff --git a/pywb/rules.yaml b/pywb/rules.yaml index d89f2549..79fe23fb 100644 --- a/pywb/rules.yaml +++ b/pywb/rules.yaml @@ -1,5 +1,8 @@ # Default Filters default_filters: + # limit to fuzzy match prefix results + fuzzy_search_limit: '100' + # exts that should *not* be treated as files (ignore all query args) not_exts: - asp diff --git a/pywb/templates/head_insert.html b/pywb/templates/head_insert.html index e1506d83..d42d0a35 100644 --- a/pywb/templates/head_insert.html +++ b/pywb/templates/head_insert.html @@ -55,6 +55,10 @@ {% endif %} +{% if config.enable_transclusions %} + +{% endif %} + {{ banner_html }} diff --git a/pywb/warcserver/index/fuzzymatcher.py b/pywb/warcserver/index/fuzzymatcher.py index b758b3ec..d47d00ab 100644 --- a/pywb/warcserver/index/fuzzymatcher.py +++ b/pywb/warcserver/index/fuzzymatcher.py @@ -38,6 +38,8 @@ class FuzzyMatcher(object): self.default_filters = config.get('default_filters') + self.fuzzy_search_limit = self.default_filters.get('fuzzy_search_limit') + self.url_normalize_rx = [(re.compile(rule['match']), rule['replace']) for rule in self.default_filters['url_normalize']] def parse_fuzzy_rule(self, rule): @@ -121,6 +123,9 @@ class FuzzyMatcher(object): 'filter': filters, 'is_fuzzy': '1'} + if self.fuzzy_search_limit: + fuzzy_params['limit'] = self.fuzzy_search_limit + for key in iterkeys(params): if key not in self.FUZZY_SKIP_PARAMS: fuzzy_params[key] = params[key] diff --git a/pywb/warcserver/index/test/test_fuzzymatcher.py b/pywb/warcserver/index/test/test_fuzzymatcher.py index a9b1cd72..13b17976 100644 --- a/pywb/warcserver/index/test/test_fuzzymatcher.py +++ b/pywb/warcserver/index/test/test_fuzzymatcher.py @@ -12,6 +12,9 @@ class EchoParamsSource(BaseIndexSource): if params.get('matchType', 'exact') == 'exact': return iter([]) + assert params.get('is_fuzzy') == '1' + assert params.get('limit') == '100' + cdx = {'urlkey': canonicalize(params.get('cdx_url')), 'mime': params.get('mime'), 'filter': params.get('filter'), diff --git a/tests/test_memento.py b/tests/test_memento.py index f13a6a63..7c6d6164 100644 --- a/tests/test_memento.py +++ b/tests/test_memento.py @@ -273,7 +273,25 @@ class TestMementoRedirectClassic(MementoMixin, BaseConfigTest): resp = self.testapp.get('/pywb/2/http://www.iana.org/', headers=headers) assert resp.status_code == 200 + assert VARY not in resp.headers + assert MEMENTO_DATETIME in resp.headers + def test_timegate_error_not_found(self): resp = self.testapp.get('/pywb/http://example.com/x-not-found', status=404) assert resp.status_code == 404 + # No Memento Headers + assert VARY not in resp.headers + assert MEMENTO_DATETIME not in resp.headers + assert 'Link' not in resp.headers + + def test_timemap_error_not_found(self): + resp = self.testapp.get('/pywb/timemap/link/http://example.com/x-not-found', status=404) + assert resp.status_code == 404 + + # No Memento Headers + assert VARY not in resp.headers + assert MEMENTO_DATETIME not in resp.headers + assert 'Link' not in resp.headers + +