From a51b2936f31b7f383d4420a26655c4f8560ea2c7 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Fri, 29 May 2015 11:42:30 -0700 Subject: [PATCH] zipnum: fix bug with urls in last block not being accessible. when iter_range() fails, if check to see if last_line == end_line, and if so, check if start_line should also be end_line #112 support non-linenumbered idx files w/o pagination queries add new zipnum-sample to test cdx lines in last block (previous sample had only one line in last block except the first) --- pywb/cdx/test/test_zipnum.py | 60 +++++++++++++++------ pywb/cdx/zipnum.py | 42 ++++++++++----- sample_archive/zipcdx/zipnum-sample.cdx.gz | Bin 9768 -> 9797 bytes sample_archive/zipcdx/zipnum-sample.idx | 60 ++++++++++----------- 4 files changed, 102 insertions(+), 60 deletions(-) diff --git a/pywb/cdx/test/test_zipnum.py b/pywb/cdx/test/test_zipnum.py index b6a398e7..5c671c83 100644 --- a/pywb/cdx/test/test_zipnum.py +++ b/pywb/cdx/test/test_zipnum.py @@ -6,9 +6,10 @@ org,iana)/ 20140127171238 http://www.iana.org/ warc/revisit - OSSAPWJ23L56IYVRW3 # test idx index (tabs replacad with 4 spaces) >>> zip_ops_test(url='http://iana.org/domains/', matchType='prefix', showPagedIndex=True) -org,iana)/dnssec 20140126201307 zipnum 8511 373 35 -org,iana)/domains/int 20140126201239 zipnum 8884 353 36 -org,iana)/domains/root/servers 20140126201227 zipnum 9237 386 37 +org,iana)/dnssec 20140126201307 zipnum 8517 373 35 +org,iana)/domains/int 20140126201239 zipnum 8890 355 36 +org,iana)/domains/root/servers 20140126201227 zipnum 9245 386 37 + >>> zip_ops_test(url='http://iana.org/domains/*') org,iana)/domains/arpa 20140126201248 http://www.iana.org/domains/arpa text/html 200 QOFZZRN6JIKAL2JRL6ZC2VVG42SPKGHT - - 2939 759039 iana.warc.gz @@ -46,29 +47,30 @@ org,iana)/domains/root/servers 20140126201227 http://www.iana.org/domains/root/s # first page >>> zip_ops_test(url='http://iana.org/domains/', matchType='domain', showPagedIndex=True, pageSize=4, page=0) -com,example)/ 20140127171200 zipnum 0 276 1 -org,iana)/ 20140127171238 zipnum 276 328 2 -org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126201055 zipnum 604 312 3 -org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126200718 zipnum 916 235 4 +com,example)/ 20140127171200 zipnum 0 275 1 +org,iana)/ 20140127171238 zipnum 275 328 2 +org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126201055 zipnum 603 312 3 +org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126200718 zipnum 915 235 4 + # first page -- simplified query >>> zip_ops_test(url='*.iana.org/path_part_ignored/', showPagedIndex=True, pageSize=4) -com,example)/ 20140127171200 zipnum 0 276 1 -org,iana)/ 20140127171238 zipnum 276 328 2 -org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126201055 zipnum 604 312 3 -org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126200718 zipnum 916 235 4 +com,example)/ 20140127171200 zipnum 0 275 1 +org,iana)/ 20140127171238 zipnum 275 328 2 +org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126201055 zipnum 603 312 3 +org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126200718 zipnum 915 235 4 # next page + json >>> zip_ops_test(url='http://iana.org/domains/', matchType='domain', output='json', showPagedIndex=True, pageSize=4, page=1) -{"urlkey": "org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126200912", "part": "zipnum", "offset": 1151, "length": 235, "lineno": 5} -{"urlkey": "org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126201240", "part": "zipnum", "offset": 1386, "length": 306, "lineno": 6} +{"urlkey": "org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126200912", "part": "zipnum", "offset": 1150, "length": 235, "lineno": 5} +{"urlkey": "org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126201240", "part": "zipnum", "offset": 1385, "length": 307, "lineno": 6} {"urlkey": "org,iana)/_css/2013.1/fonts/opensans-regular.ttf 20140126200654", "part": "zipnum", "offset": 1692, "length": 235, "lineno": 7} {"urlkey": "org,iana)/_css/2013.1/fonts/opensans-regular.ttf 20140126200816", "part": "zipnum", "offset": 1927, "length": 231, "lineno": 8} # last page >>> zip_ops_test(url='http://iana.org/domains/', matchType='domain', showPagedIndex=True, pageSize=4, page=9) -org,iana)/domains/root/servers 20140126201227 zipnum 9237 386 37 -org,iana)/time-zones 20140126200737 zipnum 9623 145 38 +org,iana)/domains/root/servers 20140126201227 zipnum 9245 386 37 +org,iana)/time-zones 20140126200737 zipnum 9631 166 38 # last page cdx >>> zip_ops_test(url='http://iana.org/domains/', matchType='domain', pageSize=4, page=9) @@ -78,7 +80,8 @@ org,iana)/performance/ietf-draft-status 20140126200815 http://www.iana.org/perfo org,iana)/performance/ietf-statistics 20140126200804 http://www.iana.org/performance/ietf-statistics text/html 200 XOFML5WNBQMTSULLIIPLSP6U5MX33HN6 - - 3712 582987 iana.warc.gz org,iana)/protocols 20140126200715 http://www.iana.org/protocols text/html 200 IRUJZEUAXOUUG224ZMI4VWTUPJX6XJTT - - 63663 496277 iana.warc.gz org,iana)/time-zones 20140126200737 http://www.iana.org/time-zones text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz - +org,iana)/time-zones/x 20140126200737 http://www.iana.org/time-zones/X text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz +org,iana)/time-zones/y 20140126200737 http://www.iana.org/time-zones/Y text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz # last page reverse -- not yet supported #>>> zip_ops_test(url='http://iana.org/domains/', matchType='domain', reverse=True, showPagedIndex=True, pageSize=4, page=9) @@ -88,6 +91,8 @@ org,iana)/time-zones 20140126200737 http://www.iana.org/time-zones text/html 200 # last page reverse CDX >>> zip_ops_test(url='http://iana.org/domains/', matchType='domain', reverse=True, pageSize=4, page=9) +org,iana)/time-zones/y 20140126200737 http://www.iana.org/time-zones/Y text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz +org,iana)/time-zones/x 20140126200737 http://www.iana.org/time-zones/X text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz org,iana)/time-zones 20140126200737 http://www.iana.org/time-zones text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz org,iana)/protocols 20140126200715 http://www.iana.org/protocols text/html 200 IRUJZEUAXOUUG224ZMI4VWTUPJX6XJTT - - 63663 496277 iana.warc.gz org,iana)/performance/ietf-statistics 20140126200804 http://www.iana.org/performance/ietf-statistics text/html 200 XOFML5WNBQMTSULLIIPLSP6U5MX33HN6 - - 3712 582987 iana.warc.gz @@ -95,6 +100,20 @@ org,iana)/performance/ietf-draft-status 20140126200815 http://www.iana.org/perfo org,iana)/numbers 20140126200651 http://www.iana.org/numbers text/html 200 HWT5UZKURYLW5QNWVZCWFCANGEMU7XWK - - 3498 321385 iana.warc.gz org,iana)/domains/root/servers 20140126201227 http://www.iana.org/domains/root/servers text/html 200 AFW34N3S4NK2RJ6QWMVPB5E2AIUETAHU - - 3137 733840 iana.warc.gz +# last url prefix +>>> zip_ops_test(url='http://iana.org/time-zones*') +org,iana)/time-zones 20140126200737 http://www.iana.org/time-zones text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz +org,iana)/time-zones/x 20140126200737 http://www.iana.org/time-zones/X text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz +org,iana)/time-zones/y 20140126200737 http://www.iana.org/time-zones/Y text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz + +# last url prefix w/ slash +>>> zip_ops_test(url='http://iana.org/time-zones/*') +org,iana)/time-zones/x 20140126200737 http://www.iana.org/time-zones/X text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz +org,iana)/time-zones/y 20140126200737 http://www.iana.org/time-zones/Y text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz + +# last url exact +>>> zip_ops_test(url='http://iana.org/time-zones/Y') +org,iana)/time-zones/y 20140126200737 http://www.iana.org/time-zones/Y text/html 200 4Z27MYWOSXY2XDRAJRW7WRMT56LXDD4R - - 2449 569675 iana.warc.gz # invalid page >>> zip_ops_test(url='http://iana.org/domains/', matchType='domain', showPagedIndex=True, pageSize=4, page=10) @@ -110,7 +129,16 @@ NotFoundException: No Captures found for: http://aaa.aaa/ Traceback (most recent call last): NotFoundException: No Captures found for: http://aaa.aaa/ (domain query) +# list last index line, as we don't know if there are any captures at end >>> zip_ops_test(url='http://aaa.zz/', matchType='domain', showPagedIndex=True) +org,iana)/time-zones 20140126200737 zipnum 9631 166 38 + +# read cdx to find 0 pages +>>> zip_ops_test(url='http://aaa.zz/', matchType='domain', showNumPages=True) +{"blocks": 0, "pages": 0, "pageSize": 10} + +# read cdx to find no captures +>>> zip_ops_test(url='http://aaa.zz/', matchType='domain') Traceback (most recent call last): NotFoundException: No Captures found for: http://aaa.zz/ (domain query) diff --git a/pywb/cdx/zipnum.py b/pywb/cdx/zipnum.py index a1bf4c18..a81f359f 100644 --- a/pywb/cdx/zipnum.py +++ b/pywb/cdx/zipnum.py @@ -22,6 +22,7 @@ class ZipBlocks: self.length = length self.count = count + #================================================================= #TODO: see if these could be combined with warc path resolvers @@ -134,8 +135,10 @@ class ZipNumCluster(CDXSource): def load_cdx(self, query): self.loc_resolver.load_loc() + return self._do_load_cdx(self.summary, query) - reader = open(self.summary, 'rb') + def _do_load_cdx(self, filename, query): + reader = open(filename, 'rb') idx_iter = self.compute_page_range(reader, query) @@ -165,13 +168,16 @@ class ZipNumCluster(CDXSource): else: pagesize = int(pagesize) + last_line = None + # Get End end_iter = search(reader, query.end_key, prev_size=1) try: end_line = end_iter.next() except StopIteration: - end_line = read_last_line(reader) + last_line = read_last_line(reader) + end_line = last_line # Get Start first_iter = iter_range(reader, @@ -182,34 +188,40 @@ class ZipNumCluster(CDXSource): try: first_line = first_iter.next() except StopIteration: - reader.close() - if query.page_count: - yield self._page_info(0, pagesize, 0) - return + if end_line == last_line and query.key >= last_line: + first_line = last_line else: - raise + reader.close() + if query.page_count: + yield self._page_info(0, pagesize, 0) + return + else: + raise first = IDXObject(first_line) end = IDXObject(end_line) - diff = end['lineno'] - first['lineno'] - total_pages = diff / pagesize + 1 + try: + blocks = end['lineno'] - first['lineno'] + total_pages = blocks / pagesize + 1 + except: + blocks = -1 + total_pages = 1 if query.page_count: - blocks = diff + 1 # same line, so actually need to look at cdx # to determine if it exists - if total_pages == 1: + if blocks == 0: try: block_cdx_iter = self.idx_to_cdx([first_line], query) block = block_cdx_iter.next() cdx = block.next() except StopIteration: total_pages = 0 - blocks = 0 + blocks = -1 - yield self._page_info(total_pages, pagesize, blocks) + yield self._page_info(total_pages, pagesize, blocks + 1) reader.close() return @@ -220,7 +232,9 @@ class ZipNumCluster(CDXSource): raise CDXException(msg.format(curr_page, total_pages - 1)) startline = curr_page * pagesize - endline = min(startline + pagesize - 1, diff) + endline = startline + pagesize - 1 + if blocks >= 0: + endline = min(endline, blocks) if curr_page == 0: yield first_line diff --git a/sample_archive/zipcdx/zipnum-sample.cdx.gz b/sample_archive/zipcdx/zipnum-sample.cdx.gz index 8687b97a4f9db618b9b3defac200e2ffb89e865b..540f7a6699c53d9f74b09b8c10dd198ebfecb60c 100644 GIT binary patch delta 4658 zcmZ9PcQ_l|`^IDMCPu8<#MV$d)K=7}y{WDCRT~P3ITxYh4+2vQ+hqfgMAp?CHg_#Cz zvcwl=6Fc5{Tz`vb)aW^>k6t55ZBOZ(E8Bs8TOg;Y++xrf^S*rg3ut139(of#cBj{t za7ZpJqOytII};Xv(4KO-;-YE*Y2P(`+qr{TwZtlGQuPA3)9934H_of?&YCzZ-g%&T ztoq!ZenO_Vr}tP{@i$Xp^REEDKZK@>2dagN0>e3o^Ea6%MCx-=4PcdOjUsd|Np$sB z3Re^E;p(Tf%`-P|F`KvkTj{5qvj-(YV1o-0SdP*KyUYh)zcfMs;QvEu<{*$H03+Z8 zMe-tI_lWS28zCfs9oFGstJ=_*1ay4l*9^^)_?HX zk$-9%6&_zx&$`&8dub~~LeZD@`k#j_)~Ykd=8fi7P$x`V>pEaAl$O7%azJy_?BIyC zuM78`s2|SQbW0*Bk$XiD^fPG>Ra3l;d+jOJpu(c;YM;eF`1Qlcja>4U=;!98uupk| z>+0sYv{Ydnr1PbYpeRRdv?KNx)$z0F9}|SchRH%927H^v4j@DKNYzb4Jp%`ad&fYQ z6M}~X$Gs8QfRKWy?K9u;?Q|&D#&lZnNLPj>*5^@v^4`N1L(@DjU60YFe5>f5NTs3- znMygkh>6#q4r=7fZH(=X^64{@2ULA3)TEnhPY`C)7~3P>#m_Lo znB>^DU<8jbrX^5}@%LGq@0}Qieb$pM#f&Va{A~|J+P5oXvrI#eciU^bnqtcYEkv=& zY}}*_J+*bXAffznTDwPCO26p5jp2yf>yhe?1fhaZcgK!(j%PDF4K2R}ts$_Tg~faF zp`$!f-^Ung-29^-hW_xy!8Fd1_(%_#64TkG3iDo+vGeX9AKSJ;0+Op;wKO8~-d6!I{1X zYwvs|_n6WNr%4>4{iB&gKss+$U;~NC;(nFWUe_eR?Bh}8`K)+0$Fv~sH{!55z3_#_ zCCIO2MeD7|V0dr^Dq=cmYtKmpDS)Qib8?|J^4~OgD5tqitKEA`7+xGmSMET~OoRG; zgM*g7M(M-wx{A~ULEy_)mLCQ}zo2fuEG=i33M;s)znsWVpr;%q_6xLNaNhy{NLEu# z0Ew~6wSIS<2*-vTIy$$CY{+9KzO90>Rh8D{4(PWNi+kKUS$U$yDA75y5}D)4nsX^^ z{+D#(Vpn3}RFJgdS5B6i4x0VP;+lVQlU43k#@!H@EU!xl5#RHAGUt09;S)L@3IV8-r_x%Hv|p?Njee(9b&E@|7FI^R zcKQuOmO~w_dBoLkP|WMP7wRxXwEX?Vn}V84vs&aJ=uS@15$Bi)iV0f-y`6zL26@I;S^g={y5yHr2`aJOUK2wQ~p;kG*B8g!HJbX*#u`|9^flu7P zf&>i6ba077#T@dm3w<>LEcG8>9eCqK;d8bb{tpi&QOPJ&1jS$6CS#;2FK&A5Y#LjfJZ(VZLm7|i zWQEdg8<^ppEg~U~&Up=aqb`#usDO!8q)*d*OdPa)I)h8kj{38Vh_1WE-k8rh>#0eC zIBg_^I=a2B){pBDJ^4U;ZEU}L-p;KZKE3mNE|*b`E4OaefmVBeLo8=B44H~eS)xyJ zR_cDeI2Yc%1(+nxC=fE}Js#-oUnpW)=nm*~^k>OX3yh!af3M9Rkk7qkX1OW3m2K&y zMwW~vGUfqxbYa7TVis8s7_4qYtoh^=lCJGMll68RaCJ(_c7Aa^aq%;;Y?{^ZU}S<6 z_wm3*(O_btq2eS@C|m==AOaMx%euUxdSb-aW#ECAU1#Q~XTSJ=(?%qv9aU>9bWcp$ zZp)b7`DuZDjAu-BSyHO#)@()nmM!{I97kVI!4CGt3)X{us?zprwtExh2~&%8vdgth zjta374-+j;TwzX#9eX5UkAKSukC`i-~VL(dBGi7dzuuOzJs( zV=&yj)XL{!;A&hxD5cYU_;82b+T}g5k)RA6rQw8DYV=E+3Og_s=YAgMsVA{txx(9c zp-^mdr04a*L5?y@lB8^lGD&sPjOs#MBH?DB>c;7f{f~{$w|bFn*NnBN2JP9BuW_}r z6ui$`K+#YE;Gnb6c*U=)Fnd18RQ=9guQ1RU!DfCfD_l{b0gj?G0AF*~P;&+v*X!w= zez#Nx^v@5`I^m^-j9~2nKlvgHc+++0fKMLZ<%d(j;>b{Q)HrIO$>JVQbNpMR*E*;y z6_fgc)OTHFgL2G}`+WlU>T;Vk-E7EP1->!ZO{;75ej%jo^H_}H+f^lRWe}CS@N%|>6(^PujR*NN23fuSV_VDgh-m%$Vz6IE>z+x9f+Fs;z zF8CEmv-g2hE@_S~p&WuCS1 zw$53DaF(*sXGV~1Au_q{N8WMLC87JTbI4zo*Z#A--13w*rj|58-d$-@NDnr$<-A&f z=_rc{$oJ1Hf+l@Jbi}!FvoN7iwp?sg`%)z>bzUb7h`zFokPz?lc|L5-gm6Poh=o&E z&vgIs{b}oZ0ULN51(Ai;?LjU$=h(#cM!wJ!d=tT>F^fu!hAz z5D&%A>iWf~fXIG4S01#_H(esbq;wj^X{x9cj=dH(qJLsUr>`8@{(*LmxcSwm3HZtQ z;bHoCddMe{jqJgr;Hn*fdYM`rs)jhQq3WQ2&%|hQH|S4&eYf>y025Wwu%8+6ajF z-FMC7EI7R+Z@8_-VsHVF=p6h>=gm-rBH@E8@g3I90{QD1i&IPnBrQ0d##X zX1Rct!rr>i@--y$oS-BFLx2j1VM-!*lwklH8s2B%%Xc48aHm)ls=e@jl|yF1?!9XA z2fdKWBLrEh3Uxt@W5IqdxpkTM!*aA&_4)|#_dFX*`BR6ipd{q5fBp=C4K%b9{d?W? z`yX}kp9VG}XC1ODvc@6-L%n<;w7fHgtFK!|oB(Q z!i&za75W(TiYaE_+(u@_v-$a)3??ngHy2Hy_g&?c%UVc1Ft2R`NA_w-b*?VrtKH3v z$ARy!k{B3*A&$J?ssIHaT|Jw9n%c|I-Luxs_|N~vmL($9RGflaYdxtJhIt63WDiB81Nx zbcMk)1}>&#Y8CupQjU&tX`2$Gd^xU$Uo`J$GAH#ZkLf|2Rsk=s?*56j!uGtB-U*xC ziJD`R+~6GcxL1-z(N67@^LzwZI^H0t4j}>@j&=cvBrZ>n@k5>-_3aa9AeoNr?pr~M z@`2z_%VH4Dj~42ovDOQ8rrAt>Kab94ke)wH!I-TX>ro%0@1D9km9=P7-1{nm7Itri zj6(i6>JHCNcw5ca<1dP@@{E0acZ09q)mn5$DoB6n!BNJUNDX=r3lPB=N4@8H2u{^G zTwWH%4()N1N%VjM8)@!+8&(s`&z~Z@QeTC$w?WIt$_GAGUfWA^M-3f?iDOy2p&duL-4(NQ0#gK)9g^MOJe)Et;+GexfS^ahs> zwEoJRf$Om#XeMY%V@NBuU?)}-;~-eVKyYX@qW76eHqDWa6Yo_)zJt8vIBAsA>W+~j b+tZp}*U9FC31#LP5CTBhpRp|j0KoqML1Ga4 delta 4621 zcmZ9PcQ_l~+s3V^S%ipLd)HQ?)K)}nS}Tg8RmRK=@BC#o7)T&xlo7y!>t=gMZ ziB;4n%CGNV@0I@jxzBa(=YFp9oO60%r7$E{90ieB8l4%*^&1S_BSMM=wSOw+t+aXg zhbKb%qbFRysQy7w3nSP%->_!(_QkQ8$=!PY#le@1{R4_dh8Wze+Wz|c8N3IXupkHH zyBJ=k$JNrsjoPL2(R#VWg{R~QSUn)2BY(n~DX`PiS+&f5XL!A>&po1@?-o3Nu3#|hNZ#!;W z)PoI#v#AvAP;oH{x43oyFQOS=;JC%#$Y+^4)U_A*ZTYAY7yKdllSo2D^#7^UauUlD zgHn;i$U>;8Bq;%MCgCFn(VPKZ$qSEvB|z9Wq*Ea3OA@6nj7~2hi6IjuQD(w^BzuP> zz9c!1_4!B8i_tfVP^0hL4?(wOQCzIltk5R78ssqr{&IW(O(wJY(31K#Zwb}YBWLHH z6_nKt3&AcA<;CHPUBp|Ws!%CO+@6^%djaMAIQT4d3c8-4U}ILsIf`I+Si?g-$(B;_ zUOYP&OB^hMNbsa&2I}+s8$*i<#ZL-C3ohR~SA-HD{uJ;I-{KF>EsA9)S7$N6y51R) zFCPs=o8EII>gx=>`|VU&YLAV@#jkM<3H~X0TaBnF!*m-)!hCq2IB*Ocz}$25d_hRfD)t`FVSdOJJKi1`F|CAPlvbduCwRwt9HeG`-XI&I9EW5};V=GtYo1Ur9w;-M5>ge8A)k2(>FNTQyxYb#Pg zYT@wDTg9`1;tKcelL*eIysYs{kKB+Fb8EwE;$ijGnvYeX$9;AXs@UmFKi#!U^)P6FmEayeNo_>rZqD?_^B^g=X?;2Djl#R7fb*a<t^$yJQ!_hYs)*{ ziAV94TDoemQSwR~@TS{$lqk$uQ2(szCw?yvpNG4c2?@H7U+Km8wV$tyAD?+<3f`w4%?E^XFqhQVP9eAd|Pz7K5SWIpj2W&c<_X_=(?1v%JD&)*N| z-xK}(NU~R7bDYT_D9cG^KU-+zV{>!akzl)E6-qj?`Ie9J_*Qex*XmG*PI%&=gXvy( zOWh(TPw=u*bjk!CuT?$br|$dPDtoSr4Re9*yD)fX~(YN5o1K{i1&vh+=tNO-+_kR}Duw>U*5DuC+HyzY6Ohgq`m#iP!% zF!N>%x;9Bi&nCp`&}LPmk$NxBf>}G~1|3@4Nc45Uhq*6jC@-xKY^=dd9B16!d3=Xt zZ|#v`Ii)!$5f;+>!Gc29SHv)ccCZp*moaTrSbcgPq%{?@Xbq>oi>iOU|M$|tud_*9 zN5!j=8Zw>P3!vW2KtOAOaT1SK9JwLlC9BVpALq*%%iX~jUnP7LU-Z_wzhfrJ@Di%^ z3(^G^3C5WVe(_B$HE5MBzc+j=c)~~Q${I$+r$sw*rb90FG~%2QHl|w4J9=N#0ojpK z{{lv(E@9Jd5R%ARfajVBzsU#uMY_QSJ)M>l2DDvna2gD)&TA6b#6!r(wj}X`@h+tN`086`2c832Kqn6q2x>@o0R3aK4 zsryc#g{2p&S_ew+uDW_$1ByxH(FdxqtWS|{u~Ib0PaFdz7wNXN$T#}APkcVCJ*+ci z)`Zx8Z9)oHjidns|1sSI6e1}EV|Rg-px}(B(PZbe<%IQCGm4OMz7)YOFlK@9c<`+h zU8)j7MI2K^l#sZBgo(+q$)E!J%F`%)_dSHt^M29_Z9zxZDw54KIFbqZKN@du=hR8 zY$p<>>351J+zQLw23lSK|8&}|@gOA5U;z@H8`yYo2Gac$(1;H`Ar!Rzy=A+M=~1Uw z=$1QkOQ(fCLt08;4+L4Oz_nmt->VR=ijt#{Ug8`|Zbh@4@b%b;=<@cecgXdi?iCqs z6pjiWrJsCvOJ0ekISPt!Sq%Sr~j_07~^igk2jIr_v?@54`0FxMbeMMTN@826f_Jbi3G5fCh zNi3rF1GkY-1Km8o--Eh8id3&8pY>^eznCX7T2&k$`_59e_Kb1Bo|CI%OJ5fNHR--X zqoZUah%*kY!DEoEvc$6#%bS(;TN*(z2m;jtZ+G;!~uqqzy#?s#sKzgZ1)9=Vh4ZXbF zXj|ZqkW=Xk{xm`*0hpr+^l#%Fj&KHiqZTm6Fm-8$n_1aK$N)$uihxq~<6Jjnu&Wlz*yhdARevoex^# zp~X(2r@zQ-Qh2F}7I*T8;ybNWL4-NVafPc|8czb*ReG~ zT%&rjL+Zy+pCU@{hSjV13*Yo~b15%I!=obsX$4IB=xzRW4FxQ9q^8B2`zcd?2D!YZ z8NXU8Uk~SI;j&V#0L+bh&>=8O5X6q$di&UEdqm!01>c_+IQCJoMokYOZ2TM`c~o*k zZ@+EriPqS?0$Slr$6W@*4>}RiFo`+01mS2nA5^X4ZTCFy+z#A#7BK-aA1W z_pTH{{IzvlC}NQ10i|b+K(+`0!W!`~BQ4a7a|aVdO&9@%{8=IAUcp+rj6=6JWUdD! zYt1;Px#nAkmQyVP?}GG0c}#h_U=K(XXTBY;{wfpxany7@Jze#BTgL-#>+Wo}|3ZeW z(-%j+n!_l|TgQPaT8dq6_p9 zx14O2Yq*kk+pw`>4j8(n)Eu|l;hY(U)hdPL+;wDH)N>+&aH?6^C86~{sqUbop%F|b znRwHCyhbcxEHkX#kSS>fxwzSI8o%fbNCEw7m!3lbd0;*KdsJyP>Q4rO# z`2xXD@XaXvX}A;<`O3u~^peCZnW3Wi-fJp!QvNFnl#)C$HVbBzH00bKGAR(}IUuJm z8RWBcqWE;8a!-D;vUxIXxPETuWX@%pVU*TPA+(HrGwFTH-m&_`&FJnGh?47^#naaE zWhKNS;*q(r5GI`70TL%Go7rN=mLQ$s2sy!Ui<2@Zzx3<)82SwR{0nc10-eQ$vSsjs zT)P^gt!f_G5ENKBK6Yt#{AO#y@gqV3BTSvL31eYYdtw;#RRdQ;d#P2}FS3f4#DU!m zeC*Oxdo$0-+ySr5LEDsH2G^>U@V_1ogfx<=^jT*+m%$_RmtP_r6dzfqXOCa-osv%%}3eI zr^D@a=6>S39N1(q(uTa|%RSt{fU{*vK8Bd+#5s?pO0@itAY-f$KPQqKXpd_w!cV8G zYa0MShiizXHkBaL3OlrC`aO2)reQSPO<1JPWCPFlP$0AzWk5Z?9+?zYH+Au_8^Uq( z*sN4y67O&LBX7=fdv{~To}@vX{`DA3A2G-Arm+@QJtAQAxGB96e$%}#`gRYIX8&hF zNII9yovNx&xwzEEm&oN$78Zg6QwEbG?x1lHMw(7@ znJaRg_&Yz80sr}zx3i17n&;-sX(!ZOF+H0qaj|97uL*si)2eXPn0=zSuNLzpIZ=pr rHNp-$VTZ|)W->t&`3pNC<(7e60cdW*sZk~nLP@l;O)QW|L`3vIWIZZA diff --git a/sample_archive/zipcdx/zipnum-sample.idx b/sample_archive/zipcdx/zipnum-sample.idx index 6697c131..e7ef2645 100644 --- a/sample_archive/zipcdx/zipnum-sample.idx +++ b/sample_archive/zipcdx/zipnum-sample.idx @@ -1,9 +1,9 @@ -com,example)/ 20140127171200 zipnum 0 276 1 -org,iana)/ 20140127171238 zipnum 276 328 2 -org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126201055 zipnum 604 312 3 -org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126200718 zipnum 916 235 4 -org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126200912 zipnum 1151 235 5 -org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126201240 zipnum 1386 306 6 +com,example)/ 20140127171200 zipnum 0 275 1 +org,iana)/ 20140127171238 zipnum 275 328 2 +org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126201055 zipnum 603 312 3 +org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126200718 zipnum 915 235 4 +org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126200912 zipnum 1150 235 5 +org,iana)/_css/2013.1/fonts/opensans-bold.ttf 20140126201240 zipnum 1385 307 6 org,iana)/_css/2013.1/fonts/opensans-regular.ttf 20140126200654 zipnum 1692 235 7 org,iana)/_css/2013.1/fonts/opensans-regular.ttf 20140126200816 zipnum 1927 231 8 org,iana)/_css/2013.1/fonts/opensans-regular.ttf 20140126201128 zipnum 2158 236 9 @@ -12,27 +12,27 @@ org,iana)/_css/2013.1/fonts/opensans-semibold.ttf 20140126200805 zipnum 2706 234 org,iana)/_css/2013.1/fonts/opensans-semibold.ttf 20140126201055 zipnum 2940 235 12 org,iana)/_css/2013.1/fonts/opensans-semibold.ttf 20140126201308 zipnum 3175 289 13 org,iana)/_css/2013.1/print.css 20140126200737 zipnum 3464 208 14 -org,iana)/_css/2013.1/print.css 20140126200929 zipnum 3672 207 15 -org,iana)/_css/2013.1/print.css 20140126201248 zipnum 3879 276 16 -org,iana)/_css/2013.1/screen.css 20140126200706 zipnum 4155 210 17 -org,iana)/_css/2013.1/screen.css 20140126200825 zipnum 4365 211 18 -org,iana)/_css/2013.1/screen.css 20140126201227 zipnum 4576 216 19 -org,iana)/_img/2013.1/iana-logo-header.svg 20140126200654 zipnum 4792 236 20 -org,iana)/_img/2013.1/iana-logo-header.svg 20140126200816 zipnum 5028 219 21 -org,iana)/_img/2013.1/iana-logo-header.svg 20140126201128 zipnum 5247 221 22 -org,iana)/_img/2013.1/iana-logo-homepage.png 20140126200625 zipnum 5468 299 23 -org,iana)/_img/2013.1/icann-logo.svg 20140126200719 zipnum 5767 210 24 -org,iana)/_img/2013.1/icann-logo.svg 20140126200912 zipnum 5977 212 25 -org,iana)/_img/2013.1/icann-logo.svg 20140126201240 zipnum 6189 281 26 -org,iana)/_img/bookmark_icon.ico 20140126200631 zipnum 6470 298 27 -org,iana)/_js/2013.1/iana.js 20140126200716 zipnum 6768 213 28 -org,iana)/_js/2013.1/iana.js 20140126200912 zipnum 6981 216 29 -org,iana)/_js/2013.1/iana.js 20140126201239 zipnum 7197 270 30 -org,iana)/_js/2013.1/jquery.js 20140126200653 zipnum 7467 215 31 -org,iana)/_js/2013.1/jquery.js 20140126200816 zipnum 7682 209 32 -org,iana)/_js/2013.1/jquery.js 20140126201127 zipnum 7891 210 33 -org,iana)/_js/2013.1/jquery.js 20140127171239 zipnum 8101 410 34 -org,iana)/dnssec 20140126201307 zipnum 8511 373 35 -org,iana)/domains/int 20140126201239 zipnum 8884 353 36 -org,iana)/domains/root/servers 20140126201227 zipnum 9237 386 37 -org,iana)/time-zones 20140126200737 zipnum 9623 145 38 +org,iana)/_css/2013.1/print.css 20140126200929 zipnum 3672 209 15 +org,iana)/_css/2013.1/print.css 20140126201248 zipnum 3881 276 16 +org,iana)/_css/2013.1/screen.css 20140126200706 zipnum 4157 210 17 +org,iana)/_css/2013.1/screen.css 20140126200825 zipnum 4367 211 18 +org,iana)/_css/2013.1/screen.css 20140126201227 zipnum 4578 216 19 +org,iana)/_img/2013.1/iana-logo-header.svg 20140126200654 zipnum 4794 236 20 +org,iana)/_img/2013.1/iana-logo-header.svg 20140126200816 zipnum 5030 219 21 +org,iana)/_img/2013.1/iana-logo-header.svg 20140126201128 zipnum 5249 221 22 +org,iana)/_img/2013.1/iana-logo-homepage.png 20140126200625 zipnum 5470 299 23 +org,iana)/_img/2013.1/icann-logo.svg 20140126200719 zipnum 5769 210 24 +org,iana)/_img/2013.1/icann-logo.svg 20140126200912 zipnum 5979 212 25 +org,iana)/_img/2013.1/icann-logo.svg 20140126201240 zipnum 6191 281 26 +org,iana)/_img/bookmark_icon.ico 20140126200631 zipnum 6472 300 27 +org,iana)/_js/2013.1/iana.js 20140126200716 zipnum 6772 213 28 +org,iana)/_js/2013.1/iana.js 20140126200912 zipnum 6985 216 29 +org,iana)/_js/2013.1/iana.js 20140126201239 zipnum 7201 270 30 +org,iana)/_js/2013.1/jquery.js 20140126200653 zipnum 7471 215 31 +org,iana)/_js/2013.1/jquery.js 20140126200816 zipnum 7686 210 32 +org,iana)/_js/2013.1/jquery.js 20140126201127 zipnum 7896 211 33 +org,iana)/_js/2013.1/jquery.js 20140127171239 zipnum 8107 410 34 +org,iana)/dnssec 20140126201307 zipnum 8517 373 35 +org,iana)/domains/int 20140126201239 zipnum 8890 355 36 +org,iana)/domains/root/servers 20140126201227 zipnum 9245 386 37 +org,iana)/time-zones 20140126200737 zipnum 9631 166 38