1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

fixup loading from archive, add LimitReader to ensure record length is respected

rename FileReader -> FileLoader, HttpReader -> HttpLoader
loaders create 'readers', which support read()/readline()
This commit is contained in:
Ilya Kreymer 2014-02-01 14:02:53 -08:00
parent d9c4e5cba4
commit b685772b96

View File

@ -9,7 +9,10 @@ import wbexceptions
from wbrequestresponse import StatusAndHeaders
#=================================================================
class HttpReader:
# load a reader from http
#=================================================================
class HttpLoader:
def __init__(self, hmac = None, hmac_duration = 30):
self.hmac = hmac
self.hmac_duration = hmac_duration
@ -31,16 +34,67 @@ class HttpReader:
#=================================================================
# Untested, but for completeness
class FileReader:
# load a reader from local filesystem
#=================================================================
class FileLoader:
"""
# Ensure attempt to read more than 100 bytes, only reads 100 bytes
>>> len(FileLoader().load(utils.test_data_dir() + 'warcs/iana.warc.gz', 0, 100).read('400'))
100
"""
def load(self, url, offset, length):
if url.startswith('file://'):
url = url[len('file://'):]
afile = open(url, 'rb')
afile.seek(offset)
return afile
if length > 0:
return LimitReader(afile, length)
else:
return afile
#=================================================================
# A reader which will not read past the specified limit
#=================================================================
class LimitReader:
"""
>>> LimitReader(StringIO.StringIO('abcdefghjiklmnopqrstuvwxyz'), 10).read(26)
'abcdefghji'
>>> LimitReader(StringIO.StringIO('abcdefghjiklmnopqrstuvwxyz'), 8).readline(26)
'abcdefgh'
>>> test_multiple_reads(LimitReader(StringIO.StringIO('abcdefghjiklmnopqrstuvwxyz'), 10), [2, 2, 20])
'efghji'
"""
def __init__(self, stream, limit):
self.stream = stream
self.limit = limit
if not self.limit:
self.limit = 1
def read(self, length = None):
length = min(length, self.limit) if length else self.limit
buff = self.stream.read(length)
self.limit -= len(buff)
return buff
def readline(self, length = None):
length = min(length, self.limit) if length else self.limit
buff = self.stream.readline(length)
self.limit -= len(buff)
return buff
def close(self):
self.stream.close()
#=================================================================
@ -115,8 +169,8 @@ class ArchiveLoader:
@staticmethod
def create_default_loaders():
http = HttpReader()
file = FileReader()
http = HttpLoader()
file = FileLoader()
return {
'http': http,
'https': http,
@ -385,6 +439,12 @@ if __name__ == "__main__" or utils.enable_doctests():
archive = testloader.load(path, offset, length)
pprint.pprint((archive.type, archive.rec_headers, archive.status_headers))
def test_multiple_reads(reader, inc_reads):
result = None
for x in inc_reads:
result = reader.read(x)
return result
import doctest
doctest.testmod()