mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
cdx-server query & zipnum: fixes for showNumPages query:
- if query contained in <1 secondary index block, must read first line of cdx to determine if any matches - if no matches, don't throw 404 exception but always return json info with 0 pages
This commit is contained in:
parent
313a2efeac
commit
f3a066f58b
@ -33,9 +33,17 @@ org,iana)/domains/root/servers 20140126201227 http://www.iana.org/domains/root/s
|
|||||||
{"blocks": 38, "pages": 10, "pageSize": 4}
|
{"blocks": 38, "pages": 10, "pageSize": 4}
|
||||||
|
|
||||||
# set page size -- alt domain query
|
# set page size -- alt domain query
|
||||||
>>> zip_ops_test(url='*.iana.org', pageSize=4, showNumPages=True)
|
>>> zip_ops_test(url='*.iana.org', pageSize='4', showNumPages=True)
|
||||||
{"blocks": 38, "pages": 10, "pageSize": 4}
|
{"blocks": 38, "pages": 10, "pageSize": 4}
|
||||||
|
|
||||||
|
# page size for non-existent, but secondary index match
|
||||||
|
>>> zip_ops_test(url='iana.org/domains/int/blah', pageSize=4, showNumPages=True)
|
||||||
|
{"blocks": 0, "pages": 0, "pageSize": 4}
|
||||||
|
|
||||||
|
# page size for non-existent, no secondary index match
|
||||||
|
>>> zip_ops_test(url='*.foo.bar', showNumPages=True)
|
||||||
|
{"blocks": 0, "pages": 0, "pageSize": 10}
|
||||||
|
|
||||||
# first page
|
# first page
|
||||||
>>> zip_ops_test(url='http://iana.org/domains/', matchType='domain', showPagedIndex=True, pageSize=4, page=0)
|
>>> zip_ops_test(url='http://iana.org/domains/', matchType='domain', showPagedIndex=True, pageSize=4, page=0)
|
||||||
com,example)/ 20140127171200 zipnum 0 276 1
|
com,example)/ 20140127171200 zipnum 0 276 1
|
||||||
|
@ -152,7 +152,18 @@ class ZipNumCluster(CDXSource):
|
|||||||
return gen_cdx()
|
return gen_cdx()
|
||||||
|
|
||||||
|
|
||||||
|
def _page_info(self, pages, pagesize, blocks):
|
||||||
|
info = dict(pages=pages,
|
||||||
|
pageSize=pagesize,
|
||||||
|
blocks=blocks)
|
||||||
|
return json.dumps(info)
|
||||||
|
|
||||||
def compute_page_range(self, reader, query):
|
def compute_page_range(self, reader, query):
|
||||||
|
pagesize = query.page_size
|
||||||
|
if not pagesize:
|
||||||
|
pagesize = self.max_blocks
|
||||||
|
else:
|
||||||
|
pagesize = int(pagesize)
|
||||||
|
|
||||||
# Get End
|
# Get End
|
||||||
end_iter = search(reader, query.end_key, prev_size=1)
|
end_iter = search(reader, query.end_key, prev_size=1)
|
||||||
@ -163,7 +174,6 @@ class ZipNumCluster(CDXSource):
|
|||||||
end_line = read_last_line(reader)
|
end_line = read_last_line(reader)
|
||||||
|
|
||||||
# Get Start
|
# Get Start
|
||||||
|
|
||||||
first_iter = iter_range(reader,
|
first_iter = iter_range(reader,
|
||||||
query.key,
|
query.key,
|
||||||
query.end_key,
|
query.end_key,
|
||||||
@ -173,24 +183,33 @@ class ZipNumCluster(CDXSource):
|
|||||||
first_line = first_iter.next()
|
first_line = first_iter.next()
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
reader.close()
|
reader.close()
|
||||||
raise
|
if query.page_count:
|
||||||
|
yield self._page_info(0, pagesize, 0)
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
first = IDXObject(first_line)
|
first = IDXObject(first_line)
|
||||||
|
|
||||||
end = IDXObject(end_line)
|
end = IDXObject(end_line)
|
||||||
diff = end['lineno'] - first['lineno']
|
diff = end['lineno'] - first['lineno']
|
||||||
|
|
||||||
pagesize = query.page_size
|
|
||||||
if not pagesize:
|
|
||||||
pagesize = self.max_blocks
|
|
||||||
|
|
||||||
total_pages = diff / pagesize + 1
|
total_pages = diff / pagesize + 1
|
||||||
|
|
||||||
if query.page_count:
|
if query.page_count:
|
||||||
info = dict(pages=total_pages,
|
blocks = diff + 1
|
||||||
pageSize=pagesize,
|
# same line, so actually need to look at cdx
|
||||||
blocks=diff + 1)
|
# to determine if it exists
|
||||||
yield json.dumps(info)
|
if total_pages == 1:
|
||||||
|
try:
|
||||||
|
block_cdx_iter = self.idx_to_cdx([first_line], query)
|
||||||
|
block = block_cdx_iter.next()
|
||||||
|
cdx = block.next()
|
||||||
|
except StopIteration:
|
||||||
|
total_pages = 0
|
||||||
|
blocks = 0
|
||||||
|
|
||||||
|
yield self._page_info(total_pages, pagesize, blocks)
|
||||||
reader.close()
|
reader.close()
|
||||||
return
|
return
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user