mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 08:04:49 +01:00
mementoindexsource: add 'connection: close' to ensure connection closed after memento timegate query!
io utils: StreamIter() supports custom closer responseloader: use release_conn() instead of close() to recycle urllib3 connections!
This commit is contained in:
parent
9bda61cab5
commit
1bd8a85a4d
@ -1,13 +1,13 @@
|
||||
import zlib
|
||||
from contextlib import closing
|
||||
from contextlib import closing, contextmanager
|
||||
|
||||
from warcio.utils import BUFF_SIZE
|
||||
from tempfile import SpooledTemporaryFile
|
||||
|
||||
|
||||
#=============================================================================
|
||||
def StreamIter(stream, header1=None, header2=None, size=BUFF_SIZE):
|
||||
with closing(stream):
|
||||
def StreamIter(stream, header1=None, header2=None, size=BUFF_SIZE, closer=closing):
|
||||
with closer(stream):
|
||||
if header1:
|
||||
yield header1
|
||||
|
||||
@ -21,6 +21,15 @@ def StreamIter(stream, header1=None, header2=None, size=BUFF_SIZE):
|
||||
yield buff
|
||||
|
||||
|
||||
#=============================================================================
|
||||
@contextmanager
|
||||
def call_release_conn(stream):
|
||||
try:
|
||||
yield stream
|
||||
finally:
|
||||
stream.release_conn()
|
||||
|
||||
|
||||
#=============================================================================
|
||||
def chunk_encode_iter(orig_iter):
|
||||
for chunk in orig_iter:
|
||||
|
@ -32,7 +32,6 @@ class BaseIndexSource(object):
|
||||
return None
|
||||
|
||||
|
||||
|
||||
#=============================================================================
|
||||
class FileIndexSource(BaseIndexSource):
|
||||
CDX_EXT = ('.cdx', '.cdxj')
|
||||
@ -360,10 +359,11 @@ class MementoIndexSource(BaseIndexSource):
|
||||
try:
|
||||
headers = self._get_headers(params)
|
||||
headers['Accept-Datetime'] = accept_dt
|
||||
res = self.sesh.head(url, headers=headers, timeout=None)
|
||||
res = self.sesh.head(url, headers=headers)
|
||||
if res.status_code >= 400:
|
||||
raise NotFoundException(url)
|
||||
except:
|
||||
except Exception as e:
|
||||
print('FAILED:', e)
|
||||
raise NotFoundException(url)
|
||||
|
||||
links = res.headers.get('Link')
|
||||
@ -374,7 +374,8 @@ class MementoIndexSource(BaseIndexSource):
|
||||
return links
|
||||
|
||||
def _get_headers(self, params):
|
||||
return {}
|
||||
headers = {'Connection': 'close'}
|
||||
return headers
|
||||
|
||||
def handle_timemap(self, params):
|
||||
url = res_template(self.timemap_url, params)
|
||||
@ -469,11 +470,11 @@ class WBMementoIndexSource(MementoIndexSource):
|
||||
return ref_url
|
||||
|
||||
def _get_headers(self, params):
|
||||
headers = super(WBMementoIndexSource, self)._get_headers(params)
|
||||
ref_url = self._get_referrer(params)
|
||||
if ref_url:
|
||||
return {'Referer': ref_url}
|
||||
else:
|
||||
return {}
|
||||
headers['Referer'] = ref_url
|
||||
return headers
|
||||
|
||||
def _extract_location(self, url, location):
|
||||
if not location or not location.startswith(self.prefix):
|
||||
|
@ -8,7 +8,7 @@ from warcio.statusandheaders import StatusAndHeaders, StatusAndHeadersParser
|
||||
from pywb.utils.wbexception import LiveResourceException, WbException
|
||||
|
||||
from pywb.utils.memento import MementoUtils
|
||||
from pywb.utils.io import StreamIter, compress_gzip_iter
|
||||
from pywb.utils.io import StreamIter, compress_gzip_iter, call_release_conn
|
||||
from pywb.utils.format import ParamFormatter
|
||||
|
||||
from pywb.warcserver.resource.resolvingloader import ResolvingLoader
|
||||
@ -63,7 +63,7 @@ class BaseLoader(object):
|
||||
if not compress:
|
||||
out_headers['Content-Length'] = other_headers.get('Content-Length')
|
||||
|
||||
return out_headers, StreamIter(stream)
|
||||
return out_headers, StreamIter(stream, closer=call_release_conn)
|
||||
|
||||
target_uri = warc_headers.get_header('WARC-Target-URI')
|
||||
|
||||
@ -85,7 +85,8 @@ class BaseLoader(object):
|
||||
|
||||
streamiter = StreamIter(stream,
|
||||
header1=warc_headers_buff,
|
||||
header2=other_headers)
|
||||
header2=other_headers,
|
||||
closer=call_release_conn)
|
||||
|
||||
if compress:
|
||||
streamiter = compress_gzip_iter(streamiter)
|
||||
|
Loading…
x
Reference in New Issue
Block a user