1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

index query limit: ensure 'limit' is correctly applied to XmlQueryIndexSource, fixes ukwa/ukwa-pywb#49 (#523)

This commit is contained in:
Ilya Kreymer 2019-11-22 12:25:18 -08:00 committed by GitHub
parent 30680803e8
commit 0be84520ed
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 14 additions and 1 deletions

View File

@ -243,6 +243,10 @@ class XmlQueryIndexSource(BaseIndexSource):
raise BadRequestException('matchType={0} is not supported'.format(matchType=matchType)) raise BadRequestException('matchType={0} is not supported'.format(matchType=matchType))
try: try:
limit = params.get('limit')
if limit:
query = 'limit: {0} '.format(limit) + query
# OpenSearch API requires double-escaping # OpenSearch API requires double-escaping
# TODO: add option to not double escape if needed # TODO: add option to not double escape if needed
query_url = self.query_api_url + '?q=' + quote_plus(query + quote_plus(url)) query_url = self.query_api_url + '?q=' + quote_plus(query + quote_plus(url))

View File

@ -9,9 +9,14 @@ from mock import patch
import pytest import pytest
query_url = None
# ============================================================================ # ============================================================================
def mock_get(self, url): def mock_get(self, url):
string = '' string = ''
global query_url
query_url = url
if quote_plus(XmlQueryIndexSource.EXACT_QUERY) in url: if quote_plus(XmlQueryIndexSource.EXACT_QUERY) in url:
if quote_plus(quote_plus('http://example.com/some/path')) in url: if quote_plus(quote_plus('http://example.com/some/path')) in url:
string = URL_RESPONSE_2 string = URL_RESPONSE_2
@ -65,12 +70,14 @@ class TestXmlQueryIndexSource(BaseTestClass):
@patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get) @patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get)
def test_exact_query(self): def test_exact_query(self):
res, errs = self.do_query({'url': 'http://example.com/'}) res, errs = self.do_query({'url': 'http://example.com/', 'limit': 100})
expected = """\ expected = """\
com,example)/ 20180112200243 example.warc.gz com,example)/ 20180112200243 example.warc.gz
com,example)/ 20180216200300 example.warc.gz""" com,example)/ 20180216200300 example.warc.gz"""
assert(key_ts_res(res) == expected) assert(key_ts_res(res) == expected)
assert(errs == {}) assert(errs == {})
assert query_url == 'http://localhost:8080/path?q=limit%3A+100+type%3Aurlquery+url%3Ahttp%253A%252F%252Fexample.com%252F'
@patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get) @patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get)
@ -82,6 +89,8 @@ com,example)/some/path 20180216200300 example.warc.gz"""
assert(key_ts_res(res) == expected) assert(key_ts_res(res) == expected)
assert(errs == {}) assert(errs == {})
assert query_url == 'http://localhost:8080/path?q=type%3Aurlquery+url%3Ahttp%253A%252F%252Fexample.com%252Fsome%252Fpath'
@patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get) @patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get)
def test_prefix_query(self): def test_prefix_query(self):