mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
fixup loading from archive, add LimitReader to ensure record length is respected
rename FileReader -> FileLoader, HttpReader -> HttpLoader loaders create 'readers', which support read()/readline()
This commit is contained in:
parent
d9c4e5cba4
commit
b685772b96
@ -9,7 +9,10 @@ import wbexceptions
|
|||||||
from wbrequestresponse import StatusAndHeaders
|
from wbrequestresponse import StatusAndHeaders
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class HttpReader:
|
# load a reader from http
|
||||||
|
#=================================================================
|
||||||
|
|
||||||
|
class HttpLoader:
|
||||||
def __init__(self, hmac = None, hmac_duration = 30):
|
def __init__(self, hmac = None, hmac_duration = 30):
|
||||||
self.hmac = hmac
|
self.hmac = hmac
|
||||||
self.hmac_duration = hmac_duration
|
self.hmac_duration = hmac_duration
|
||||||
@ -31,16 +34,67 @@ class HttpReader:
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# Untested, but for completeness
|
# load a reader from local filesystem
|
||||||
class FileReader:
|
#=================================================================
|
||||||
|
class FileLoader:
|
||||||
|
"""
|
||||||
|
# Ensure attempt to read more than 100 bytes, only reads 100 bytes
|
||||||
|
>>> len(FileLoader().load(utils.test_data_dir() + 'warcs/iana.warc.gz', 0, 100).read('400'))
|
||||||
|
100
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
def load(self, url, offset, length):
|
def load(self, url, offset, length):
|
||||||
if url.startswith('file://'):
|
if url.startswith('file://'):
|
||||||
url = url[len('file://'):]
|
url = url[len('file://'):]
|
||||||
|
|
||||||
afile = open(url, 'rb')
|
afile = open(url, 'rb')
|
||||||
afile.seek(offset)
|
afile.seek(offset)
|
||||||
return afile
|
|
||||||
|
|
||||||
|
if length > 0:
|
||||||
|
return LimitReader(afile, length)
|
||||||
|
else:
|
||||||
|
return afile
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
# A reader which will not read past the specified limit
|
||||||
|
#=================================================================
|
||||||
|
class LimitReader:
|
||||||
|
"""
|
||||||
|
>>> LimitReader(StringIO.StringIO('abcdefghjiklmnopqrstuvwxyz'), 10).read(26)
|
||||||
|
'abcdefghji'
|
||||||
|
|
||||||
|
>>> LimitReader(StringIO.StringIO('abcdefghjiklmnopqrstuvwxyz'), 8).readline(26)
|
||||||
|
'abcdefgh'
|
||||||
|
|
||||||
|
>>> test_multiple_reads(LimitReader(StringIO.StringIO('abcdefghjiklmnopqrstuvwxyz'), 10), [2, 2, 20])
|
||||||
|
'efghji'
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, stream, limit):
|
||||||
|
self.stream = stream
|
||||||
|
self.limit = limit
|
||||||
|
|
||||||
|
if not self.limit:
|
||||||
|
self.limit = 1
|
||||||
|
|
||||||
|
|
||||||
|
def read(self, length = None):
|
||||||
|
length = min(length, self.limit) if length else self.limit
|
||||||
|
buff = self.stream.read(length)
|
||||||
|
self.limit -= len(buff)
|
||||||
|
return buff
|
||||||
|
|
||||||
|
|
||||||
|
def readline(self, length = None):
|
||||||
|
length = min(length, self.limit) if length else self.limit
|
||||||
|
buff = self.stream.readline(length)
|
||||||
|
self.limit -= len(buff)
|
||||||
|
return buff
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.stream.close()
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
@ -115,8 +169,8 @@ class ArchiveLoader:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_default_loaders():
|
def create_default_loaders():
|
||||||
http = HttpReader()
|
http = HttpLoader()
|
||||||
file = FileReader()
|
file = FileLoader()
|
||||||
return {
|
return {
|
||||||
'http': http,
|
'http': http,
|
||||||
'https': http,
|
'https': http,
|
||||||
@ -385,6 +439,12 @@ if __name__ == "__main__" or utils.enable_doctests():
|
|||||||
archive = testloader.load(path, offset, length)
|
archive = testloader.load(path, offset, length)
|
||||||
pprint.pprint((archive.type, archive.rec_headers, archive.status_headers))
|
pprint.pprint((archive.type, archive.rec_headers, archive.status_headers))
|
||||||
|
|
||||||
|
def test_multiple_reads(reader, inc_reads):
|
||||||
|
result = None
|
||||||
|
for x in inc_reads:
|
||||||
|
result = reader.read(x)
|
||||||
|
return result
|
||||||
|
|
||||||
import doctest
|
import doctest
|
||||||
doctest.testmod()
|
doctest.testmod()
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user