diff --git a/pywb/warcserver/index/indexsource.py b/pywb/warcserver/index/indexsource.py index 8305f3cc..c7b91d04 100644 --- a/pywb/warcserver/index/indexsource.py +++ b/pywb/warcserver/index/indexsource.py @@ -222,6 +222,8 @@ class RemoteIndexSource(BaseIndexSource): # ============================================================================= class XmlQueryIndexSource(BaseIndexSource): + EXACT_QUERY = 'type:urlquery url:' + PREFIX_QUERY = 'type:prefixquery url:' def __init__(self, query_api_url): self.query_api_url = query_api_url @@ -235,13 +237,16 @@ class XmlQueryIndexSource(BaseIndexSource): matchType = params.get('matchType', 'exact') if matchType == 'exact': - query_url = self.query_api_url + '?q=' + quote_plus('type:urlquery url:' + quote_plus(url)) + query = self.EXACT_QUERY elif matchType == 'prefix': - query_url = self.query_api_url + '?q=' + quote_plus('type:prefixquery url:' + quote_plus(url)) + query = self.PREFIX_QUERY else: raise BadRequestException('matchType={0} is not supported'.format(matchType=matchType)) try: + #OpenSearch API requires double-escaping + #TODO: add option to not double escape if needed + query_url = self.query_api_url + '?q' + quote_plus(query + quote_plus(url)) self.logger.debug("Running query: %s" % query_url) response = self.session.get(query_url) response.raise_for_status() diff --git a/pywb/warcserver/index/test/test_xmlquery_indexsource.py b/pywb/warcserver/index/test/test_xmlquery_indexsource.py index 22158148..3a3da4bb 100644 --- a/pywb/warcserver/index/test/test_xmlquery_indexsource.py +++ b/pywb/warcserver/index/test/test_xmlquery_indexsource.py @@ -10,14 +10,14 @@ import pytest # ============================================================================ def mock_get(self, url): string = '' - if 'type:urlquery' in url: - if 'http%3A%2F%2Fexample.com%2Fsome%2Fpath' in url: + if 'type%3Aurlquery' in url: + if 'http%253A%252F%252Fexample.com%252Fsome%252Fpath' in url: string = URL_RESPONSE_2 - elif 'http%3A%2F%2Fexample.com%2F' in url: + elif 'http%253A%252F%252Fexample.com%252F' in url: string = URL_RESPONSE_1 - elif 'type:prefixquery' in url: + elif 'type%3Aprefixquery' in url: string = PREFIX_QUERY class MockResponse(object):