diff --git a/pywb/warcserver/index/indexsource.py b/pywb/warcserver/index/indexsource.py index 84a54800..e5e3f7bb 100644 --- a/pywb/warcserver/index/indexsource.py +++ b/pywb/warcserver/index/indexsource.py @@ -314,6 +314,11 @@ class XmlQueryIndexSource(BaseIndexSource): cdx['digest'] = self.gettext(item, 'digest') cdx['offset'] = self.gettext(item, 'compressedoffset') cdx['filename'] = self.gettext(item, 'file') + + length = self.gettext(item, 'compressedendoffset') + if length: + cdx['length'] = length + return cdx def gettext(self, item, name): diff --git a/pywb/warcserver/index/test/test_xmlquery_indexsource.py b/pywb/warcserver/index/test/test_xmlquery_indexsource.py index 6861aff8..63f832ce 100644 --- a/pywb/warcserver/index/test/test_xmlquery_indexsource.py +++ b/pywb/warcserver/index/test/test_xmlquery_indexsource.py @@ -71,13 +71,16 @@ class TestXmlQueryIndexSource(BaseTestClass): @patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get) def test_exact_query(self): res, errs = self.do_query({'url': 'http://example.com/', 'limit': 100}) + reslist = list(res) expected = """\ com,example)/ 20180112200243 example.warc.gz com,example)/ 20180216200300 example.warc.gz""" - assert(key_ts_res(res) == expected) + assert(key_ts_res(reslist) == expected) assert(errs == {}) assert query_url == 'http://localhost:8080/path?q=limit%3A+100+type%3Aurlquery+url%3Ahttp%253A%252F%252Fexample.com%252F' + assert reslist[0]['length'] == '123' + assert 'length' not in reslist[1] @patch('pywb.warcserver.index.indexsource.requests.sessions.Session.get', mock_get) @@ -119,6 +122,7 @@ URL_RESPONSE_1 = """ 10 + 123 text/html example.warc.gz -