From 0be84520edebd084fcd1af84938227edeb4b87c1 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Fri, 22 Nov 2019 12:25:18 -0800 Subject: [PATCH] index query limit: ensure 'limit' is correctly applied to XmlQueryIndexSource, fixes ukwa/ukwa-pywb#49 (#523) --- pywb/warcserver/index/indexsource.py | 4 ++++ .../index/test/test_xmlquery_indexsource.py | 11 ++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/pywb/warcserver/index/indexsource.py b/pywb/warcserver/index/indexsource.py index afd9dce6..cd12a5f4 100644 --- a/pywb/warcserver/index/indexsource.py +++ b/pywb/warcserver/index/indexsource.py @@ -243,6 +243,10 @@ class XmlQueryIndexSource(BaseIndexSource): raise BadRequestException('matchType={0} is not supported'.format(matchType=matchType)) try: + limit = params.get('limit') + if limit: + query = 'limit: {0} '.format(limit) + query + # OpenSearch API requires double-escaping # TODO: add option to not double escape if needed query_url = self.query_api_url + '?q=' + quote_plus(query + quote_plus(url)) diff --git a/pywb/warcserver/index/test/test_xmlquery_indexsource.py b/pywb/warcserver/index/test/test_xmlquery_indexsource.py index 77a05823..6861aff8 100644 --- a/pywb/warcserver/index/test/test_xmlquery_indexsource.py +++ b/pywb/warcserver/index/test/test_xmlquery_indexsource.py @@ -9,9 +9,14 @@ from mock import patch import pytest +query_url = None + + # ============================================================================ def mock_get(self, url): string = '' + global query_url + query_url = url if quote_plus(XmlQueryIndexSource.EXACT_QUERY) in url: if quote_plus(quote_plus('http://example.com/some/path')) in url: string = URL_RESPONSE_2 @@ -65,12 +70,14 @@ class TestXmlQueryIndexSource(BaseTestClass): @patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get) def test_exact_query(self): - res, errs = self.do_query({'url': 'http://example.com/'}) + res, errs = self.do_query({'url': 'http://example.com/', 'limit': 100}) + expected = """\ com,example)/ 20180112200243 example.warc.gz com,example)/ 20180216200300 example.warc.gz""" assert(key_ts_res(res) == expected) assert(errs == {}) + assert query_url == 'http://localhost:8080/path?q=limit%3A+100+type%3Aurlquery+url%3Ahttp%253A%252F%252Fexample.com%252F' @patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get) @@ -82,6 +89,8 @@ com,example)/some/path 20180216200300 example.warc.gz""" assert(key_ts_res(res) == expected) assert(errs == {}) + assert query_url == 'http://localhost:8080/path?q=type%3Aurlquery+url%3Ahttp%253A%252F%252Fexample.com%252Fsome%252Fpath' + @patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get) def test_prefix_query(self):