diff --git a/pywb/utils/io.py b/pywb/utils/io.py index 1c96741a..aa55abce 100644 --- a/pywb/utils/io.py +++ b/pywb/utils/io.py @@ -1,13 +1,13 @@ import zlib -from contextlib import closing +from contextlib import closing, contextmanager from warcio.utils import BUFF_SIZE from tempfile import SpooledTemporaryFile #============================================================================= -def StreamIter(stream, header1=None, header2=None, size=BUFF_SIZE): - with closing(stream): +def StreamIter(stream, header1=None, header2=None, size=BUFF_SIZE, closer=closing): + with closer(stream): if header1: yield header1 @@ -21,6 +21,15 @@ def StreamIter(stream, header1=None, header2=None, size=BUFF_SIZE): yield buff +#============================================================================= +@contextmanager +def call_release_conn(stream): + try: + yield stream + finally: + stream.release_conn() + + #============================================================================= def chunk_encode_iter(orig_iter): for chunk in orig_iter: diff --git a/pywb/warcserver/index/indexsource.py b/pywb/warcserver/index/indexsource.py index 8c40d665..5fc62fab 100644 --- a/pywb/warcserver/index/indexsource.py +++ b/pywb/warcserver/index/indexsource.py @@ -32,7 +32,6 @@ class BaseIndexSource(object): return None - #============================================================================= class FileIndexSource(BaseIndexSource): CDX_EXT = ('.cdx', '.cdxj') @@ -360,10 +359,11 @@ class MementoIndexSource(BaseIndexSource): try: headers = self._get_headers(params) headers['Accept-Datetime'] = accept_dt - res = self.sesh.head(url, headers=headers, timeout=None) + res = self.sesh.head(url, headers=headers) if res.status_code >= 400: raise NotFoundException(url) - except: + except Exception as e: + print('FAILED:', e) raise NotFoundException(url) links = res.headers.get('Link') @@ -374,7 +374,8 @@ class MementoIndexSource(BaseIndexSource): return links def _get_headers(self, params): - return {} + headers = {'Connection': 'close'} + return headers def handle_timemap(self, params): url = res_template(self.timemap_url, params) @@ -469,11 +470,11 @@ class WBMementoIndexSource(MementoIndexSource): return ref_url def _get_headers(self, params): + headers = super(WBMementoIndexSource, self)._get_headers(params) ref_url = self._get_referrer(params) if ref_url: - return {'Referer': ref_url} - else: - return {} + headers['Referer'] = ref_url + return headers def _extract_location(self, url, location): if not location or not location.startswith(self.prefix): diff --git a/pywb/warcserver/resource/responseloader.py b/pywb/warcserver/resource/responseloader.py index c7e57ca8..9cc828e0 100644 --- a/pywb/warcserver/resource/responseloader.py +++ b/pywb/warcserver/resource/responseloader.py @@ -8,7 +8,7 @@ from warcio.statusandheaders import StatusAndHeaders, StatusAndHeadersParser from pywb.utils.wbexception import LiveResourceException, WbException from pywb.utils.memento import MementoUtils -from pywb.utils.io import StreamIter, compress_gzip_iter +from pywb.utils.io import StreamIter, compress_gzip_iter, call_release_conn from pywb.utils.format import ParamFormatter from pywb.warcserver.resource.resolvingloader import ResolvingLoader @@ -63,7 +63,7 @@ class BaseLoader(object): if not compress: out_headers['Content-Length'] = other_headers.get('Content-Length') - return out_headers, StreamIter(stream) + return out_headers, StreamIter(stream, closer=call_release_conn) target_uri = warc_headers.get_header('WARC-Target-URI') @@ -85,7 +85,8 @@ class BaseLoader(object): streamiter = StreamIter(stream, header1=warc_headers_buff, - header2=other_headers) + header2=other_headers, + closer=call_release_conn) if compress: streamiter = compress_gzip_iter(streamiter)