diff --git a/pywb/warcserver/index/indexsource.py b/pywb/warcserver/index/indexsource.py
index 8807e1a5..efd5e1c5 100644
--- a/pywb/warcserver/index/indexsource.py
+++ b/pywb/warcserver/index/indexsource.py
@@ -299,6 +299,14 @@ class XmlQueryIndexSource(BaseIndexSource):
return cls(value[9:])
+ @classmethod
+ def init_from_config(cls, config):
+ if config['type'] != 'xmlquery':
+ return
+
+ return cls(config['api_url'])
+
+
# =============================================================================
class LiveIndexSource(BaseIndexSource):
def __init__(self):
diff --git a/pywb/warcserver/index/test/test_xmlquery_indexsource.py b/pywb/warcserver/index/test/test_xmlquery_indexsource.py
new file mode 100644
index 00000000..fc5d45b2
--- /dev/null
+++ b/pywb/warcserver/index/test/test_xmlquery_indexsource.py
@@ -0,0 +1,162 @@
+from pywb.warcserver.test.testutils import BaseTestClass, key_ts_res
+
+from pywb.warcserver.index.indexsource import XmlQueryIndexSource
+from pywb.warcserver.index.aggregator import SimpleAggregator
+
+from mock import patch
+
+
+# ============================================================================
+def mock_get(self, url):
+ string = ''
+ if 'type:urlquery' in url:
+ if 'http%3A%2F%2Fexample.com%2Fsome%2Fpath' in url:
+ string = URL_RESPONSE_2
+
+ elif 'http%3A%2F%2Fexample.com%2F' in url:
+ string = URL_RESPONSE_1
+
+ elif 'type:prefixquery' in url:
+ string = PREFIX_QUERY
+
+ class MockResponse(object):
+ def __init__(self, string):
+ self.string = string
+
+ @property
+ def text(self):
+ return self.string
+
+ def raise_for_status(self):
+ pass
+
+
+ return MockResponse(string)
+
+
+# ============================================================================
+class TestXmlQueryIndexSource(BaseTestClass):
+ @classmethod
+ def setup_class(cls):
+ super(TestXmlQueryIndexSource, cls).setup_class()
+
+ def do_query(self, params):
+ return SimpleAggregator({'source': XmlQueryIndexSource('http://localhost:8080/path')})(params)
+
+ @patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get)
+ def test_exact_query(self):
+ res, errs = self.do_query({'url': 'http://example.com/'})
+ expected = """\
+com,example)/ 20180112200243 example.warc.gz
+com,example)/ 20180216200300 example.warc.gz"""
+ assert(key_ts_res(res) == expected)
+ assert(errs == {})
+
+
+ @patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get)
+ def test_exact_query_2(self):
+ res, errs = self.do_query({'url': 'http://example.com/some/path'})
+ expected = """\
+com,example)/some/path 20180112200243 example.warc.gz
+com,example)/some/path 20180216200300 example.warc.gz"""
+ assert(key_ts_res(res) == expected)
+ assert(errs == {})
+
+
+ @patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get)
+ def test_prefix_query(self):
+ res, errs = self.do_query({'url': 'http://example.com/', 'matchType': 'prefix'})
+ expected = """\
+com,example)/ 20180112200243 example.warc.gz
+com,example)/ 20180216200300 example.warc.gz
+com,example)/some/path 20180112200243 example.warc.gz
+com,example)/some/path 20180216200300 example.warc.gz"""
+ assert(key_ts_res(res) == expected)
+ assert(errs == {})
+
+
+# ============================================================================
+URL_RESPONSE_1 = """
+
+
+
+ 10
+ text/html
+ example.warc.gz
+ -
+ com,example)/
+ 7NZ7K6ZTRC4SOJODXH3S4AGZV7QSBWLF
+ 200
+ -
+ http://example.ccom/
+ 20180112200243
+
+
+ 29570
+ text/html
+ example.warc.gz
+ -
+ com,example)/
+ LCKPKJJU5VPEN6HUJZ6JUYRGTPFD7ZC3
+ 200
+ -
+ http://example.com/
+ 20180216200300
+
+
+
+"""
+
+URL_RESPONSE_2 = """
+
+
+
+ 10
+ text/html
+ example.warc.gz
+ -
+ com,example)/some/path
+ 7NZ7K6ZTRC4SOJODXH3S4AGZV7QSBWLF
+ 200
+ -
+ http://example.com/some/path
+ 20180112200243
+
+
+ 29570
+ text/html
+ example.warc.gz
+ -
+ com,example)/some/path
+ LCKPKJJU5VPEN6HUJZ6JUYRGTPFD7ZC3
+ 200
+ -
+ http://example.com/some/path
+ 20180216200300
+
+
+
+"""
+
+PREFIX_QUERY = """
+
+
+
+ com,example)/
+ http://example.com/
+ 2
+ 2
+ 20180112200243
+ 20180216200300
+
+
+ com,example)/some/path
+ http://example.com/some/path
+ 2
+ 2
+ 20180112200243
+ 20180216200300
+
+
+
+"""