1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00

zipnum: fix block count off-by-1 error in showNumPages query

This commit is contained in:
Ilya Kreymer 2015-03-25 20:43:59 -07:00
parent 72ddb54f82
commit 1cfe73c9db
3 changed files with 10 additions and 6 deletions

View File

@ -234,7 +234,7 @@ def cdx_collapse_time_status(cdx_iter, timelen=10):
last_token = None
for cdx in cdx_iter:
curr_token = (cdx[TIMESTAMP][:timelen], cdx[STATUSCODE])
curr_token = (cdx[TIMESTAMP][:timelen], cdx.get(STATUSCODE, ''))
# yield if last_dedup_time is diff, otherwise skip
if curr_token != last_token:

View File

@ -22,16 +22,19 @@ org,iana)/domains/root/db 20140126200928 http://www.iana.org/domains/root/db tex
org,iana)/domains/root/servers 20140126201227 http://www.iana.org/domains/root/servers text/html 200 AFW34N3S4NK2RJ6QWMVPB5E2AIUETAHU - - 3137 733840 iana.warc.gz
# Pages -- default page size
>>> zip_ops_test(url='http://iana.org/domains/example', matchType='exact', showNumPages=True)
{"blocks": 1, "pages": 1, "pageSize": 10}
>>> zip_ops_test(url='http://iana.org/domains/', matchType='domain', showNumPages=True)
{"blocks": 37, "pages": 4, "pageSize": 10}
{"blocks": 38, "pages": 4, "pageSize": 10}
# set page size
>>> zip_ops_test(url='http://iana.org/domains/', matchType='domain', pageSize=4, showNumPages=True)
{"blocks": 37, "pages": 10, "pageSize": 4}
{"blocks": 38, "pages": 10, "pageSize": 4}
# set page size -- alt domain query
>>> zip_ops_test(url='*.iana.org', pageSize=4, showNumPages=True)
{"blocks": 37, "pages": 10, "pageSize": 4}
{"blocks": 38, "pages": 10, "pageSize": 4}
# first page
>>> zip_ops_test(url='http://iana.org/domains/', matchType='domain', showPagedIndex=True, pageSize=4, page=0)
@ -145,7 +148,7 @@ def test_zip_prefix_load():
results = list(results)
assert len(results) == 1, results
assert json.loads(results[0]) == {"blocks": 37, "pages": 4, "pageSize": 10}
assert json.loads(results[0]) == {"blocks": 38, "pages": 4, "pageSize": 10}
# Test simple query

View File

@ -172,6 +172,7 @@ class ZipNumCluster(CDXSource):
try:
first_line = first_iter.next()
except StopIteration:
reader.close()
raise
first = IDXObject(first_line)
@ -188,7 +189,7 @@ class ZipNumCluster(CDXSource):
if query.page_count:
info = dict(pages=total_pages,
pageSize=pagesize,
blocks=diff)
blocks=diff + 1)
yield json.dumps(info)
reader.close()
return