mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
indexsource: add tests for XmlQueryIndexSource, add missing init_from_config() (ukwa/ukwa-pywb#2)
This commit is contained in:
parent
94eb4ad206
commit
ec88e962b3
@ -299,6 +299,14 @@ class XmlQueryIndexSource(BaseIndexSource):
|
|||||||
return cls(value[9:])
|
return cls(value[9:])
|
||||||
|
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def init_from_config(cls, config):
|
||||||
|
if config['type'] != 'xmlquery':
|
||||||
|
return
|
||||||
|
|
||||||
|
return cls(config['api_url'])
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
class LiveIndexSource(BaseIndexSource):
|
class LiveIndexSource(BaseIndexSource):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
162
pywb/warcserver/index/test/test_xmlquery_indexsource.py
Normal file
162
pywb/warcserver/index/test/test_xmlquery_indexsource.py
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
from pywb.warcserver.test.testutils import BaseTestClass, key_ts_res
|
||||||
|
|
||||||
|
from pywb.warcserver.index.indexsource import XmlQueryIndexSource
|
||||||
|
from pywb.warcserver.index.aggregator import SimpleAggregator
|
||||||
|
|
||||||
|
from mock import patch
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
def mock_get(self, url):
|
||||||
|
string = ''
|
||||||
|
if 'type:urlquery' in url:
|
||||||
|
if 'http%3A%2F%2Fexample.com%2Fsome%2Fpath' in url:
|
||||||
|
string = URL_RESPONSE_2
|
||||||
|
|
||||||
|
elif 'http%3A%2F%2Fexample.com%2F' in url:
|
||||||
|
string = URL_RESPONSE_1
|
||||||
|
|
||||||
|
elif 'type:prefixquery' in url:
|
||||||
|
string = PREFIX_QUERY
|
||||||
|
|
||||||
|
class MockResponse(object):
|
||||||
|
def __init__(self, string):
|
||||||
|
self.string = string
|
||||||
|
|
||||||
|
@property
|
||||||
|
def text(self):
|
||||||
|
return self.string
|
||||||
|
|
||||||
|
def raise_for_status(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
return MockResponse(string)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class TestXmlQueryIndexSource(BaseTestClass):
|
||||||
|
@classmethod
|
||||||
|
def setup_class(cls):
|
||||||
|
super(TestXmlQueryIndexSource, cls).setup_class()
|
||||||
|
|
||||||
|
def do_query(self, params):
|
||||||
|
return SimpleAggregator({'source': XmlQueryIndexSource('http://localhost:8080/path')})(params)
|
||||||
|
|
||||||
|
@patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get)
|
||||||
|
def test_exact_query(self):
|
||||||
|
res, errs = self.do_query({'url': 'http://example.com/'})
|
||||||
|
expected = """\
|
||||||
|
com,example)/ 20180112200243 example.warc.gz
|
||||||
|
com,example)/ 20180216200300 example.warc.gz"""
|
||||||
|
assert(key_ts_res(res) == expected)
|
||||||
|
assert(errs == {})
|
||||||
|
|
||||||
|
|
||||||
|
@patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get)
|
||||||
|
def test_exact_query_2(self):
|
||||||
|
res, errs = self.do_query({'url': 'http://example.com/some/path'})
|
||||||
|
expected = """\
|
||||||
|
com,example)/some/path 20180112200243 example.warc.gz
|
||||||
|
com,example)/some/path 20180216200300 example.warc.gz"""
|
||||||
|
assert(key_ts_res(res) == expected)
|
||||||
|
assert(errs == {})
|
||||||
|
|
||||||
|
|
||||||
|
@patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get)
|
||||||
|
def test_prefix_query(self):
|
||||||
|
res, errs = self.do_query({'url': 'http://example.com/', 'matchType': 'prefix'})
|
||||||
|
expected = """\
|
||||||
|
com,example)/ 20180112200243 example.warc.gz
|
||||||
|
com,example)/ 20180216200300 example.warc.gz
|
||||||
|
com,example)/some/path 20180112200243 example.warc.gz
|
||||||
|
com,example)/some/path 20180216200300 example.warc.gz"""
|
||||||
|
assert(key_ts_res(res) == expected)
|
||||||
|
assert(errs == {})
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
URL_RESPONSE_1 = """
|
||||||
|
<wayback>
|
||||||
|
<results>
|
||||||
|
<result>
|
||||||
|
<compressedoffset>10</compressedoffset>
|
||||||
|
<mimetype>text/html</mimetype>
|
||||||
|
<file>example.warc.gz</file>
|
||||||
|
<redirecturl>-</redirecturl>
|
||||||
|
<urlkey>com,example)/</urlkey>
|
||||||
|
<digest>7NZ7K6ZTRC4SOJODXH3S4AGZV7QSBWLF</digest>
|
||||||
|
<httpresponsecode>200</httpresponsecode>
|
||||||
|
<robotflags>-</robotflags>
|
||||||
|
<url>http://example.ccom/</url>
|
||||||
|
<capturedate>20180112200243</capturedate>
|
||||||
|
</result>
|
||||||
|
<result>
|
||||||
|
<compressedoffset>29570</compressedoffset>
|
||||||
|
<mimetype>text/html</mimetype>
|
||||||
|
<file>example.warc.gz</file>
|
||||||
|
<redirecturl>-</redirecturl>
|
||||||
|
<urlkey>com,example)/</urlkey>
|
||||||
|
<digest>LCKPKJJU5VPEN6HUJZ6JUYRGTPFD7ZC3</digest>
|
||||||
|
<httpresponsecode>200</httpresponsecode>
|
||||||
|
<robotflags>-</robotflags>
|
||||||
|
<url>http://example.com/</url>
|
||||||
|
<capturedate>20180216200300</capturedate>
|
||||||
|
</result>
|
||||||
|
</results>
|
||||||
|
</wayback>
|
||||||
|
"""
|
||||||
|
|
||||||
|
URL_RESPONSE_2 = """
|
||||||
|
<wayback>
|
||||||
|
<results>
|
||||||
|
<result>
|
||||||
|
<compressedoffset>10</compressedoffset>
|
||||||
|
<mimetype>text/html</mimetype>
|
||||||
|
<file>example.warc.gz</file>
|
||||||
|
<redirecturl>-</redirecturl>
|
||||||
|
<urlkey>com,example)/some/path</urlkey>
|
||||||
|
<digest>7NZ7K6ZTRC4SOJODXH3S4AGZV7QSBWLF</digest>
|
||||||
|
<httpresponsecode>200</httpresponsecode>
|
||||||
|
<robotflags>-</robotflags>
|
||||||
|
<url>http://example.com/some/path</url>
|
||||||
|
<capturedate>20180112200243</capturedate>
|
||||||
|
</result>
|
||||||
|
<result>
|
||||||
|
<compressedoffset>29570</compressedoffset>
|
||||||
|
<mimetype>text/html</mimetype>
|
||||||
|
<file>example.warc.gz</file>
|
||||||
|
<redirecturl>-</redirecturl>
|
||||||
|
<urlkey>com,example)/some/path</urlkey>
|
||||||
|
<digest>LCKPKJJU5VPEN6HUJZ6JUYRGTPFD7ZC3</digest>
|
||||||
|
<httpresponsecode>200</httpresponsecode>
|
||||||
|
<robotflags>-</robotflags>
|
||||||
|
<url>http://example.com/some/path</url>
|
||||||
|
<capturedate>20180216200300</capturedate>
|
||||||
|
</result>
|
||||||
|
</results>
|
||||||
|
</wayback>
|
||||||
|
"""
|
||||||
|
|
||||||
|
PREFIX_QUERY = """
|
||||||
|
<wayback>
|
||||||
|
<results>
|
||||||
|
<result>
|
||||||
|
<urlkey>com,example)/</urlkey>
|
||||||
|
<originalurl>http://example.com/</originalurl>
|
||||||
|
<numversions>2</numversions>
|
||||||
|
<numcaptures>2</numcaptures>
|
||||||
|
<firstcapturets>20180112200243</firstcapturets>
|
||||||
|
<lastcapturets>20180216200300</lastcapturets>
|
||||||
|
</result>
|
||||||
|
<result>
|
||||||
|
<urlkey>com,example)/some/path</urlkey>
|
||||||
|
<originalurl>http://example.com/some/path</originalurl>
|
||||||
|
<numversions>2</numversions>
|
||||||
|
<numcaptures>2</numcaptures>
|
||||||
|
<firstcapturets>20180112200243</firstcapturets>
|
||||||
|
<lastcapturets>20180216200300</lastcapturets>
|
||||||
|
</result>
|
||||||
|
</results>
|
||||||
|
</wayback>
|
||||||
|
"""
|
Loading…
x
Reference in New Issue
Block a user