1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

indexsource: add tests for XmlQueryIndexSource, add missing init_from_config() (ukwa/ukwa-pywb#2)

This commit is contained in:
Ilya Kreymer 2018-02-05 13:35:54 -08:00 committed by John Berlin
parent 94eb4ad206
commit ec88e962b3
No known key found for this signature in database
GPG Key ID: 6EF5E4B442011B02
2 changed files with 170 additions and 0 deletions

View File

@ -299,6 +299,14 @@ class XmlQueryIndexSource(BaseIndexSource):
return cls(value[9:])
@classmethod
def init_from_config(cls, config):
if config['type'] != 'xmlquery':
return
return cls(config['api_url'])
# =============================================================================
class LiveIndexSource(BaseIndexSource):
def __init__(self):

View File

@ -0,0 +1,162 @@
from pywb.warcserver.test.testutils import BaseTestClass, key_ts_res
from pywb.warcserver.index.indexsource import XmlQueryIndexSource
from pywb.warcserver.index.aggregator import SimpleAggregator
from mock import patch
# ============================================================================
def mock_get(self, url):
string = ''
if 'type:urlquery' in url:
if 'http%3A%2F%2Fexample.com%2Fsome%2Fpath' in url:
string = URL_RESPONSE_2
elif 'http%3A%2F%2Fexample.com%2F' in url:
string = URL_RESPONSE_1
elif 'type:prefixquery' in url:
string = PREFIX_QUERY
class MockResponse(object):
def __init__(self, string):
self.string = string
@property
def text(self):
return self.string
def raise_for_status(self):
pass
return MockResponse(string)
# ============================================================================
class TestXmlQueryIndexSource(BaseTestClass):
@classmethod
def setup_class(cls):
super(TestXmlQueryIndexSource, cls).setup_class()
def do_query(self, params):
return SimpleAggregator({'source': XmlQueryIndexSource('http://localhost:8080/path')})(params)
@patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get)
def test_exact_query(self):
res, errs = self.do_query({'url': 'http://example.com/'})
expected = """\
com,example)/ 20180112200243 example.warc.gz
com,example)/ 20180216200300 example.warc.gz"""
assert(key_ts_res(res) == expected)
assert(errs == {})
@patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get)
def test_exact_query_2(self):
res, errs = self.do_query({'url': 'http://example.com/some/path'})
expected = """\
com,example)/some/path 20180112200243 example.warc.gz
com,example)/some/path 20180216200300 example.warc.gz"""
assert(key_ts_res(res) == expected)
assert(errs == {})
@patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get)
def test_prefix_query(self):
res, errs = self.do_query({'url': 'http://example.com/', 'matchType': 'prefix'})
expected = """\
com,example)/ 20180112200243 example.warc.gz
com,example)/ 20180216200300 example.warc.gz
com,example)/some/path 20180112200243 example.warc.gz
com,example)/some/path 20180216200300 example.warc.gz"""
assert(key_ts_res(res) == expected)
assert(errs == {})
# ============================================================================
URL_RESPONSE_1 = """
<wayback>
<results>
<result>
<compressedoffset>10</compressedoffset>
<mimetype>text/html</mimetype>
<file>example.warc.gz</file>
<redirecturl>-</redirecturl>
<urlkey>com,example)/</urlkey>
<digest>7NZ7K6ZTRC4SOJODXH3S4AGZV7QSBWLF</digest>
<httpresponsecode>200</httpresponsecode>
<robotflags>-</robotflags>
<url>http://example.ccom/</url>
<capturedate>20180112200243</capturedate>
</result>
<result>
<compressedoffset>29570</compressedoffset>
<mimetype>text/html</mimetype>
<file>example.warc.gz</file>
<redirecturl>-</redirecturl>
<urlkey>com,example)/</urlkey>
<digest>LCKPKJJU5VPEN6HUJZ6JUYRGTPFD7ZC3</digest>
<httpresponsecode>200</httpresponsecode>
<robotflags>-</robotflags>
<url>http://example.com/</url>
<capturedate>20180216200300</capturedate>
</result>
</results>
</wayback>
"""
URL_RESPONSE_2 = """
<wayback>
<results>
<result>
<compressedoffset>10</compressedoffset>
<mimetype>text/html</mimetype>
<file>example.warc.gz</file>
<redirecturl>-</redirecturl>
<urlkey>com,example)/some/path</urlkey>
<digest>7NZ7K6ZTRC4SOJODXH3S4AGZV7QSBWLF</digest>
<httpresponsecode>200</httpresponsecode>
<robotflags>-</robotflags>
<url>http://example.com/some/path</url>
<capturedate>20180112200243</capturedate>
</result>
<result>
<compressedoffset>29570</compressedoffset>
<mimetype>text/html</mimetype>
<file>example.warc.gz</file>
<redirecturl>-</redirecturl>
<urlkey>com,example)/some/path</urlkey>
<digest>LCKPKJJU5VPEN6HUJZ6JUYRGTPFD7ZC3</digest>
<httpresponsecode>200</httpresponsecode>
<robotflags>-</robotflags>
<url>http://example.com/some/path</url>
<capturedate>20180216200300</capturedate>
</result>
</results>
</wayback>
"""
PREFIX_QUERY = """
<wayback>
<results>
<result>
<urlkey>com,example)/</urlkey>
<originalurl>http://example.com/</originalurl>
<numversions>2</numversions>
<numcaptures>2</numcaptures>
<firstcapturets>20180112200243</firstcapturets>
<lastcapturets>20180216200300</lastcapturets>
</result>
<result>
<urlkey>com,example)/some/path</urlkey>
<originalurl>http://example.com/some/path</originalurl>
<numversions>2</numversions>
<numcaptures>2</numcaptures>
<firstcapturets>20180112200243</firstcapturets>
<lastcapturets>20180216200300</lastcapturets>
</result>
</results>
</wayback>
"""