mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
remove SeekableTextFileReader, replaced with standard file-like objects
and seek(0, 2) and tell() to get file length
This commit is contained in:
parent
46449ac188
commit
e7957a5cae
@ -1,5 +1,4 @@
|
||||
from pywb.utils.binsearch import iter_range
|
||||
from pywb.utils.loaders import SeekableTextFileReader
|
||||
|
||||
from pywb.utils.wbexception import AccessException, NotFoundException
|
||||
from pywb.utils.wbexception import BadRequestException, WbException
|
||||
@ -29,7 +28,7 @@ class CDXFile(CDXSource):
|
||||
self.filename = filename
|
||||
|
||||
def load_cdx(self, query):
|
||||
source = SeekableTextFileReader(self.filename)
|
||||
source = open(self.filename)
|
||||
return iter_range(source, query.key, query.end_key)
|
||||
|
||||
def __str__(self):
|
||||
|
@ -9,7 +9,6 @@ from cdxsource import CDXSource
|
||||
from cdxobject import IDXObject
|
||||
|
||||
from pywb.utils.loaders import BlockLoader
|
||||
from pywb.utils.loaders import SeekableTextFileReader
|
||||
from pywb.utils.bufferedreaders import gzip_decompressor
|
||||
from pywb.utils.binsearch import iter_range, linearsearch
|
||||
|
||||
@ -113,7 +112,7 @@ class ZipNumCluster(CDXSource):
|
||||
def load_cdx(self, query):
|
||||
self.load_loc()
|
||||
|
||||
reader = SeekableTextFileReader(self.summary)
|
||||
reader = open(self.summary)
|
||||
|
||||
idx_iter = iter_range(reader,
|
||||
query.key,
|
||||
|
@ -16,7 +16,9 @@ def binsearch_offset(reader, key, compare_func=cmp, block_size=8192):
|
||||
Optional compare_func may be specified
|
||||
"""
|
||||
min_ = 0
|
||||
max_ = reader.getsize() / block_size
|
||||
|
||||
reader.seek(0, 2)
|
||||
max_ = reader.tell() / block_size
|
||||
|
||||
while max_ - min_ > 1:
|
||||
mid = min_ + ((max_ - min_) / 2)
|
||||
|
@ -198,34 +198,3 @@ class LimitReader(object):
|
||||
pass
|
||||
|
||||
return stream
|
||||
|
||||
|
||||
#=================================================================
|
||||
# Local text file with known size -- used for binsearch
|
||||
#=================================================================
|
||||
class SeekableTextFileReader(object):
|
||||
"""
|
||||
A very simple file-like object wrapper that knows it's total size,
|
||||
via getsize()
|
||||
Supports seek() operation.
|
||||
Assumed to be a text file. Used for binsearch.
|
||||
"""
|
||||
def __init__(self, filename):
|
||||
self.fh = open(filename, 'rb')
|
||||
self.filename = filename
|
||||
self.size = os.path.getsize(filename)
|
||||
|
||||
def getsize(self):
|
||||
return self.size
|
||||
|
||||
def read(self, length=None):
|
||||
return self.fh.read(length)
|
||||
|
||||
def readline(self, length=None):
|
||||
return self.fh.readline(length)
|
||||
|
||||
def seek(self, offset):
|
||||
return self.fh.seek(offset)
|
||||
|
||||
def close(self):
|
||||
return self.fh.close()
|
||||
|
@ -59,7 +59,6 @@ org,iana)/about 20140126200706 http://www.iana.org/about text/html 200 6G77LZKFA
|
||||
#=================================================================
|
||||
import os
|
||||
from pywb.utils.binsearch import iter_prefix, iter_exact, iter_range
|
||||
from pywb.utils.loaders import SeekableTextFileReader
|
||||
|
||||
from pywb import get_test_dir
|
||||
|
||||
@ -67,17 +66,14 @@ from pywb import get_test_dir
|
||||
test_cdx_dir = get_test_dir() + 'cdx/'
|
||||
|
||||
def print_binsearch_results(key, iter_func):
|
||||
cdx = SeekableTextFileReader(test_cdx_dir + 'iana.cdx')
|
||||
|
||||
for line in iter_func(cdx, key):
|
||||
print line
|
||||
|
||||
with open(test_cdx_dir + 'iana.cdx') as cdx:
|
||||
for line in iter_func(cdx, key):
|
||||
print line
|
||||
|
||||
def print_binsearch_results_range(key, end_key, iter_func, prev_size=0):
|
||||
cdx = SeekableTextFileReader(test_cdx_dir + 'iana.cdx')
|
||||
|
||||
for line in iter_func(cdx, key, end_key, prev_size=prev_size):
|
||||
print line
|
||||
with open(test_cdx_dir + 'iana.cdx') as cdx:
|
||||
for line in iter_func(cdx, key, end_key, prev_size=prev_size):
|
||||
print line
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -39,18 +39,6 @@ True
|
||||
# test with extra id, ensure 4 parts of the A-B=C-D form are present
|
||||
>>> len(re.split('[-=]', HMACCookieMaker('test', 'test', 5).make('extra')))
|
||||
4
|
||||
|
||||
# SeekableTextFileReader Test
|
||||
>>> sr = SeekableTextFileReader(test_cdx_dir + 'iana.cdx')
|
||||
>>> sr.getsize()
|
||||
30399
|
||||
|
||||
>>> seek_read_full(sr, 100)
|
||||
'org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200826 http://www.iana.org/_css/2013.1/fonts/Inconsolata.otf application/octet-stream 200 LNMEDYOENSOEI5VPADCKL3CB6N3GWXPR - - 34054 620049 iana.warc.gz\\n'
|
||||
|
||||
# seek, read, close
|
||||
>>> r = sr.seek(0); sr.read(10); sr.close()
|
||||
' CDX N b a'
|
||||
"""
|
||||
|
||||
|
||||
@ -58,7 +46,7 @@ True
|
||||
import re
|
||||
from io import BytesIO
|
||||
from pywb.utils.loaders import BlockLoader, HMACCookieMaker
|
||||
from pywb.utils.loaders import LimitReader, SeekableTextFileReader
|
||||
from pywb.utils.loaders import LimitReader
|
||||
|
||||
from pywb import get_test_dir
|
||||
|
||||
|
@ -1,7 +1,6 @@
|
||||
import redis
|
||||
|
||||
from pywb.utils.binsearch import iter_exact
|
||||
from pywb.utils.loaders import SeekableTextFileReader
|
||||
|
||||
import urlparse
|
||||
import os
|
||||
@ -57,7 +56,7 @@ class RedisResolver:
|
||||
class PathIndexResolver:
|
||||
def __init__(self, pathindex_file):
|
||||
self.pathindex_file = pathindex_file
|
||||
self.reader = SeekableTextFileReader(pathindex_file)
|
||||
self.reader = open(pathindex_file)
|
||||
|
||||
def __call__(self, filename):
|
||||
result = iter_exact(self.reader, filename, '\t')
|
||||
|
Loading…
x
Reference in New Issue
Block a user