1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00

better fix:

- wrap HttpLoader streams in StreamClosingReader() which should close the connection even if stream not fully consumed
This commit is contained in:
Ilya Kreymer 2020-01-22 20:12:38 -08:00
parent aebd9d6359
commit 5278d5128b
3 changed files with 19 additions and 8 deletions

View File

@ -121,3 +121,18 @@ class OffsetLimitReader(LimitReader):
def readline(self, length=None):
self._skip()
return super(OffsetLimitReader, self).readline(length)
# ============================================================================
class StreamClosingReader(object):
def __init__(self, stream):
self.stream = stream
def read(self, length=None):
return self.stream.read(length)
def readline(self, length=None):
return self.stream.readline(length)
def close(self):
no_except_close(self.stream)

View File

@ -21,7 +21,7 @@ import re
from io import open, BytesIO
from warcio.limitreader import LimitReader
from pywb.utils.io import no_except_close
from pywb.utils.io import no_except_close, StreamClosingReader
try:
import boto3
@ -355,7 +355,7 @@ class HttpLoader(BaseLoader):
r = self.session.get(url, headers=headers, stream=True)
r.raise_for_status()
return r.raw
return StreamClosingReader(r.raw)
# =================================================================

View File

@ -2,7 +2,7 @@ from warcio.bufferedreaders import DecompressingBufferedReader
from warcio.recordloader import ArcWarcRecordLoader
from pywb.utils.loaders import BlockLoader
from pywb.utils.io import BUFF_SIZE, no_except_close
from pywb.utils.io import BUFF_SIZE
#=================================================================
@ -32,8 +32,4 @@ class BlockArcWarcRecordLoader(ArcWarcRecordLoader):
decomp_type=decomp_type,
block_size=self.block_size)
res = self.parse_record_stream(stream, no_record_parse=no_record_parse)
no_except_close(stream)
return res
return self.parse_record_stream(stream, no_record_parse=no_record_parse)