diff --git a/pywb/warcserver/index/indexsource.py b/pywb/warcserver/index/indexsource.py index 8807e1a5..efd5e1c5 100644 --- a/pywb/warcserver/index/indexsource.py +++ b/pywb/warcserver/index/indexsource.py @@ -299,6 +299,14 @@ class XmlQueryIndexSource(BaseIndexSource): return cls(value[9:]) + @classmethod + def init_from_config(cls, config): + if config['type'] != 'xmlquery': + return + + return cls(config['api_url']) + + # ============================================================================= class LiveIndexSource(BaseIndexSource): def __init__(self): diff --git a/pywb/warcserver/index/test/test_xmlquery_indexsource.py b/pywb/warcserver/index/test/test_xmlquery_indexsource.py new file mode 100644 index 00000000..fc5d45b2 --- /dev/null +++ b/pywb/warcserver/index/test/test_xmlquery_indexsource.py @@ -0,0 +1,162 @@ +from pywb.warcserver.test.testutils import BaseTestClass, key_ts_res + +from pywb.warcserver.index.indexsource import XmlQueryIndexSource +from pywb.warcserver.index.aggregator import SimpleAggregator + +from mock import patch + + +# ============================================================================ +def mock_get(self, url): + string = '' + if 'type:urlquery' in url: + if 'http%3A%2F%2Fexample.com%2Fsome%2Fpath' in url: + string = URL_RESPONSE_2 + + elif 'http%3A%2F%2Fexample.com%2F' in url: + string = URL_RESPONSE_1 + + elif 'type:prefixquery' in url: + string = PREFIX_QUERY + + class MockResponse(object): + def __init__(self, string): + self.string = string + + @property + def text(self): + return self.string + + def raise_for_status(self): + pass + + + return MockResponse(string) + + +# ============================================================================ +class TestXmlQueryIndexSource(BaseTestClass): + @classmethod + def setup_class(cls): + super(TestXmlQueryIndexSource, cls).setup_class() + + def do_query(self, params): + return SimpleAggregator({'source': XmlQueryIndexSource('http://localhost:8080/path')})(params) + + @patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get) + def test_exact_query(self): + res, errs = self.do_query({'url': 'http://example.com/'}) + expected = """\ +com,example)/ 20180112200243 example.warc.gz +com,example)/ 20180216200300 example.warc.gz""" + assert(key_ts_res(res) == expected) + assert(errs == {}) + + + @patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get) + def test_exact_query_2(self): + res, errs = self.do_query({'url': 'http://example.com/some/path'}) + expected = """\ +com,example)/some/path 20180112200243 example.warc.gz +com,example)/some/path 20180216200300 example.warc.gz""" + assert(key_ts_res(res) == expected) + assert(errs == {}) + + + @patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get) + def test_prefix_query(self): + res, errs = self.do_query({'url': 'http://example.com/', 'matchType': 'prefix'}) + expected = """\ +com,example)/ 20180112200243 example.warc.gz +com,example)/ 20180216200300 example.warc.gz +com,example)/some/path 20180112200243 example.warc.gz +com,example)/some/path 20180216200300 example.warc.gz""" + assert(key_ts_res(res) == expected) + assert(errs == {}) + + +# ============================================================================ +URL_RESPONSE_1 = """ + + + + 10 + text/html + example.warc.gz + - + com,example)/ + 7NZ7K6ZTRC4SOJODXH3S4AGZV7QSBWLF + 200 + - + http://example.ccom/ + 20180112200243 + + + 29570 + text/html + example.warc.gz + - + com,example)/ + LCKPKJJU5VPEN6HUJZ6JUYRGTPFD7ZC3 + 200 + - + http://example.com/ + 20180216200300 + + + +""" + +URL_RESPONSE_2 = """ + + + + 10 + text/html + example.warc.gz + - + com,example)/some/path + 7NZ7K6ZTRC4SOJODXH3S4AGZV7QSBWLF + 200 + - + http://example.com/some/path + 20180112200243 + + + 29570 + text/html + example.warc.gz + - + com,example)/some/path + LCKPKJJU5VPEN6HUJZ6JUYRGTPFD7ZC3 + 200 + - + http://example.com/some/path + 20180216200300 + + + +""" + +PREFIX_QUERY = """ + + + + com,example)/ + http://example.com/ + 2 + 2 + 20180112200243 + 20180216200300 + + + com,example)/some/path + http://example.com/some/path + 2 + 2 + 20180112200243 + 20180216200300 + + + +"""