diff --git a/pywb/utils/limitreader.py b/pywb/utils/limitreader.py new file mode 100644 index 00000000..49d5d713 --- /dev/null +++ b/pywb/utils/limitreader.py @@ -0,0 +1,69 @@ +# ============================================================================ +class LimitReader(object): + """ + A reader which will not read more than specified limit + """ + + def __init__(self, stream, limit): + self.stream = stream + self.limit = limit + + if hasattr(stream, 'tell'): + self.tell = self._tell + + def _update(self, buff): + length = len(buff) + self.limit -= length + return buff + + def read(self, length=None): + if length is not None: + length = min(length, self.limit) + else: + length = self.limit + + if length == 0: + return b'' + + buff = self.stream.read(length) + return self._update(buff) + + def readline(self, length=None): + if length is not None: + length = min(length, self.limit) + else: + length = self.limit + + if length == 0: + return b'' + + buff = self.stream.readline(length) + return self._update(buff) + + def close(self): + self.stream.close() + + def _tell(self): + return self.stream.tell() + + @staticmethod + def wrap_stream(stream, content_length): + """ + If given content_length is an int > 0, wrap the stream + in a LimitReader. Otherwise, return the stream unaltered + """ + try: + content_length = int(content_length) + if content_length >= 0: + # optimize: if already a LimitStream, set limit to + # the smaller of the two limits + if isinstance(stream, LimitReader): + stream.limit = min(stream.limit, content_length) + else: + stream = LimitReader(stream, content_length) + + except (ValueError, TypeError): + pass + + return stream + diff --git a/pywb/utils/loaders.py b/pywb/utils/loaders.py index d803333d..53be7896 100644 --- a/pywb/utils/loaders.py +++ b/pywb/utils/loaders.py @@ -17,6 +17,7 @@ import base64 import cgi from io import open, BytesIO +from pywb.utils.limitreader import LimitReader try: from boto import connect_s3 @@ -500,78 +501,6 @@ class HMACCookieMaker(object): return cookie -#================================================================= -# Limit Reader -#================================================================= -class LimitReader(object): - """ - A reader which will not read more than specified limit - """ - - def __init__(self, stream, limit): - self.stream = stream - self.limit = limit - - if hasattr(stream, 'tell'): - self.tell = self._tell - - def _update(self, buff): - length = len(buff) - self.limit -= length - return buff - - def read(self, length=None): - if length is not None: - length = min(length, self.limit) - else: - length = self.limit - - if length == 0: - return b'' - - buff = self.stream.read(length) - return self._update(buff) - - def readline(self, length=None): - if length is not None: - length = min(length, self.limit) - else: - length = self.limit - - if length == 0: - return b'' - - buff = self.stream.readline(length) - return self._update(buff) - - def close(self): - self.stream.close() - - def _tell(self): - return self.stream.tell() - - @staticmethod - def wrap_stream(stream, content_length): - """ - If given content_length is an int > 0, wrap the stream - in a LimitReader. Otherwise, return the stream unaltered - """ - try: - content_length = int(content_length) - if content_length >= 0: - # optimize: if already a LimitStream, set limit to - # the smaller of the two limits - if isinstance(stream, LimitReader): - stream.limit = min(stream.limit, content_length) - else: - stream = LimitReader(stream, content_length) - - except (ValueError, TypeError): - pass - - return stream - - # ============================================================================ BlockLoader.init_default_loaders() diff --git a/pywb/utils/test/test_bufferedreaders.py b/pywb/utils/test/test_bufferedreaders.py index 1960e374..c5b91f21 100644 --- a/pywb/utils/test/test_bufferedreaders.py +++ b/pywb/utils/test/test_bufferedreaders.py @@ -101,7 +101,7 @@ Zero-Length chunk: from io import BytesIO from pywb.utils.bufferedreaders import ChunkedDataReader, ChunkedDataException from pywb.utils.bufferedreaders import DecompressingBufferedReader -from pywb.utils.loaders import LimitReader +from pywb.utils.limitreader import LimitReader from pywb import get_test_dir diff --git a/pywb/utils/test/test_loaders.py b/pywb/utils/test/test_loaders.py index dd5c3861..703ef81e 100644 --- a/pywb/utils/test/test_loaders.py +++ b/pywb/utils/test/test_loaders.py @@ -141,8 +141,9 @@ from io import BytesIO import requests from pywb.utils.loaders import BlockLoader, HMACCookieMaker, to_file_url -from pywb.utils.loaders import LimitReader, extract_client_cookie, extract_post_query +from pywb.utils.loaders import extract_client_cookie, extract_post_query from pywb.utils.loaders import append_post_query, read_last_line +from pywb.utils.limitreader import LimitReader from pywb.utils.bufferedreaders import DecompressingBufferedReader diff --git a/pywb/warc/recordloader.py b/pywb/warc/recordloader.py index 3becb294..90af9496 100644 --- a/pywb/warc/recordloader.py +++ b/pywb/warc/recordloader.py @@ -4,7 +4,7 @@ from pywb.utils.statusandheaders import StatusAndHeaders from pywb.utils.statusandheaders import StatusAndHeadersParser from pywb.utils.statusandheaders import StatusAndHeadersParserException -from pywb.utils.loaders import LimitReader +from pywb.utils.limitreader import LimitReader from pywb.utils.loaders import to_native_str from pywb.utils.wbexception import WbException diff --git a/pywb/webagg/inputrequest.py b/pywb/webagg/inputrequest.py index 50112959..1d2196cf 100644 --- a/pywb/webagg/inputrequest.py +++ b/pywb/webagg/inputrequest.py @@ -1,5 +1,5 @@ from pywb.utils.loaders import extract_post_query, append_post_query -from pywb.utils.loaders import LimitReader +from pywb.utils.limitreader import LimitReader from pywb.utils.statusandheaders import StatusAndHeadersParser from six.moves.urllib.parse import urlsplit, quote diff --git a/pywb/webapp/rangecache.py b/pywb/webapp/rangecache.py index a45b36f9..53a5e2ac 100644 --- a/pywb/webapp/rangecache.py +++ b/pywb/webapp/rangecache.py @@ -1,5 +1,5 @@ from pywb.utils.statusandheaders import StatusAndHeaders -from pywb.utils.loaders import LimitReader +from pywb.utils.limitreader import LimitReader from pywb.framework.cache import create_cache from tempfile import NamedTemporaryFile, mkdtemp diff --git a/pywb/webapp/replay_views.py b/pywb/webapp/replay_views.py index 69e9ea2d..b05ade12 100644 --- a/pywb/webapp/replay_views.py +++ b/pywb/webapp/replay_views.py @@ -7,7 +7,7 @@ from itertools import chain from pywb.utils.statusandheaders import StatusAndHeaders from pywb.utils.wbexception import WbException, NotFoundException -from pywb.utils.loaders import LimitReader +from pywb.utils.limitreader import LimitReader from pywb.utils.timeutils import timestamp_now from pywb.framework.wbrequestresponse import WbResponse