1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

remove SeekableTextFileReader, replaced with standard file-like objects

and seek(0, 2) and tell() to get file length
This commit is contained in:
Ilya Kreymer 2014-05-06 20:54:42 -07:00
parent 46449ac188
commit e7957a5cae
7 changed files with 13 additions and 61 deletions

View File

@ -1,5 +1,4 @@
from pywb.utils.binsearch import iter_range
from pywb.utils.loaders import SeekableTextFileReader
from pywb.utils.wbexception import AccessException, NotFoundException
from pywb.utils.wbexception import BadRequestException, WbException
@ -29,7 +28,7 @@ class CDXFile(CDXSource):
self.filename = filename
def load_cdx(self, query):
source = SeekableTextFileReader(self.filename)
source = open(self.filename)
return iter_range(source, query.key, query.end_key)
def __str__(self):

View File

@ -9,7 +9,6 @@ from cdxsource import CDXSource
from cdxobject import IDXObject
from pywb.utils.loaders import BlockLoader
from pywb.utils.loaders import SeekableTextFileReader
from pywb.utils.bufferedreaders import gzip_decompressor
from pywb.utils.binsearch import iter_range, linearsearch
@ -113,7 +112,7 @@ class ZipNumCluster(CDXSource):
def load_cdx(self, query):
self.load_loc()
reader = SeekableTextFileReader(self.summary)
reader = open(self.summary)
idx_iter = iter_range(reader,
query.key,

View File

@ -16,7 +16,9 @@ def binsearch_offset(reader, key, compare_func=cmp, block_size=8192):
Optional compare_func may be specified
"""
min_ = 0
max_ = reader.getsize() / block_size
reader.seek(0, 2)
max_ = reader.tell() / block_size
while max_ - min_ > 1:
mid = min_ + ((max_ - min_) / 2)

View File

@ -198,34 +198,3 @@ class LimitReader(object):
pass
return stream
#=================================================================
# Local text file with known size -- used for binsearch
#=================================================================
class SeekableTextFileReader(object):
"""
A very simple file-like object wrapper that knows it's total size,
via getsize()
Supports seek() operation.
Assumed to be a text file. Used for binsearch.
"""
def __init__(self, filename):
self.fh = open(filename, 'rb')
self.filename = filename
self.size = os.path.getsize(filename)
def getsize(self):
return self.size
def read(self, length=None):
return self.fh.read(length)
def readline(self, length=None):
return self.fh.readline(length)
def seek(self, offset):
return self.fh.seek(offset)
def close(self):
return self.fh.close()

View File

@ -59,7 +59,6 @@ org,iana)/about 20140126200706 http://www.iana.org/about text/html 200 6G77LZKFA
#=================================================================
import os
from pywb.utils.binsearch import iter_prefix, iter_exact, iter_range
from pywb.utils.loaders import SeekableTextFileReader
from pywb import get_test_dir
@ -67,17 +66,14 @@ from pywb import get_test_dir
test_cdx_dir = get_test_dir() + 'cdx/'
def print_binsearch_results(key, iter_func):
cdx = SeekableTextFileReader(test_cdx_dir + 'iana.cdx')
for line in iter_func(cdx, key):
print line
with open(test_cdx_dir + 'iana.cdx') as cdx:
for line in iter_func(cdx, key):
print line
def print_binsearch_results_range(key, end_key, iter_func, prev_size=0):
cdx = SeekableTextFileReader(test_cdx_dir + 'iana.cdx')
for line in iter_func(cdx, key, end_key, prev_size=prev_size):
print line
with open(test_cdx_dir + 'iana.cdx') as cdx:
for line in iter_func(cdx, key, end_key, prev_size=prev_size):
print line
if __name__ == "__main__":

View File

@ -39,18 +39,6 @@ True
# test with extra id, ensure 4 parts of the A-B=C-D form are present
>>> len(re.split('[-=]', HMACCookieMaker('test', 'test', 5).make('extra')))
4
# SeekableTextFileReader Test
>>> sr = SeekableTextFileReader(test_cdx_dir + 'iana.cdx')
>>> sr.getsize()
30399
>>> seek_read_full(sr, 100)
'org,iana)/_css/2013.1/fonts/inconsolata.otf 20140126200826 http://www.iana.org/_css/2013.1/fonts/Inconsolata.otf application/octet-stream 200 LNMEDYOENSOEI5VPADCKL3CB6N3GWXPR - - 34054 620049 iana.warc.gz\\n'
# seek, read, close
>>> r = sr.seek(0); sr.read(10); sr.close()
' CDX N b a'
"""
@ -58,7 +46,7 @@ True
import re
from io import BytesIO
from pywb.utils.loaders import BlockLoader, HMACCookieMaker
from pywb.utils.loaders import LimitReader, SeekableTextFileReader
from pywb.utils.loaders import LimitReader
from pywb import get_test_dir

View File

@ -1,7 +1,6 @@
import redis
from pywb.utils.binsearch import iter_exact
from pywb.utils.loaders import SeekableTextFileReader
import urlparse
import os
@ -57,7 +56,7 @@ class RedisResolver:
class PathIndexResolver:
def __init__(self, pathindex_file):
self.pathindex_file = pathindex_file
self.reader = SeekableTextFileReader(pathindex_file)
self.reader = open(pathindex_file)
def __call__(self, filename):
result = iter_exact(self.reader, filename, '\t')