mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
mementoindexsource: add 'connection: close' to ensure connection closed after memento timegate query!
io utils: StreamIter() supports custom closer responseloader: use release_conn() instead of close() to recycle urllib3 connections!
This commit is contained in:
parent
9bda61cab5
commit
1bd8a85a4d
@ -1,13 +1,13 @@
|
|||||||
import zlib
|
import zlib
|
||||||
from contextlib import closing
|
from contextlib import closing, contextmanager
|
||||||
|
|
||||||
from warcio.utils import BUFF_SIZE
|
from warcio.utils import BUFF_SIZE
|
||||||
from tempfile import SpooledTemporaryFile
|
from tempfile import SpooledTemporaryFile
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
def StreamIter(stream, header1=None, header2=None, size=BUFF_SIZE):
|
def StreamIter(stream, header1=None, header2=None, size=BUFF_SIZE, closer=closing):
|
||||||
with closing(stream):
|
with closer(stream):
|
||||||
if header1:
|
if header1:
|
||||||
yield header1
|
yield header1
|
||||||
|
|
||||||
@ -21,6 +21,15 @@ def StreamIter(stream, header1=None, header2=None, size=BUFF_SIZE):
|
|||||||
yield buff
|
yield buff
|
||||||
|
|
||||||
|
|
||||||
|
#=============================================================================
|
||||||
|
@contextmanager
|
||||||
|
def call_release_conn(stream):
|
||||||
|
try:
|
||||||
|
yield stream
|
||||||
|
finally:
|
||||||
|
stream.release_conn()
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
def chunk_encode_iter(orig_iter):
|
def chunk_encode_iter(orig_iter):
|
||||||
for chunk in orig_iter:
|
for chunk in orig_iter:
|
||||||
|
@ -32,7 +32,6 @@ class BaseIndexSource(object):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
class FileIndexSource(BaseIndexSource):
|
class FileIndexSource(BaseIndexSource):
|
||||||
CDX_EXT = ('.cdx', '.cdxj')
|
CDX_EXT = ('.cdx', '.cdxj')
|
||||||
@ -360,10 +359,11 @@ class MementoIndexSource(BaseIndexSource):
|
|||||||
try:
|
try:
|
||||||
headers = self._get_headers(params)
|
headers = self._get_headers(params)
|
||||||
headers['Accept-Datetime'] = accept_dt
|
headers['Accept-Datetime'] = accept_dt
|
||||||
res = self.sesh.head(url, headers=headers, timeout=None)
|
res = self.sesh.head(url, headers=headers)
|
||||||
if res.status_code >= 400:
|
if res.status_code >= 400:
|
||||||
raise NotFoundException(url)
|
raise NotFoundException(url)
|
||||||
except:
|
except Exception as e:
|
||||||
|
print('FAILED:', e)
|
||||||
raise NotFoundException(url)
|
raise NotFoundException(url)
|
||||||
|
|
||||||
links = res.headers.get('Link')
|
links = res.headers.get('Link')
|
||||||
@ -374,7 +374,8 @@ class MementoIndexSource(BaseIndexSource):
|
|||||||
return links
|
return links
|
||||||
|
|
||||||
def _get_headers(self, params):
|
def _get_headers(self, params):
|
||||||
return {}
|
headers = {'Connection': 'close'}
|
||||||
|
return headers
|
||||||
|
|
||||||
def handle_timemap(self, params):
|
def handle_timemap(self, params):
|
||||||
url = res_template(self.timemap_url, params)
|
url = res_template(self.timemap_url, params)
|
||||||
@ -469,11 +470,11 @@ class WBMementoIndexSource(MementoIndexSource):
|
|||||||
return ref_url
|
return ref_url
|
||||||
|
|
||||||
def _get_headers(self, params):
|
def _get_headers(self, params):
|
||||||
|
headers = super(WBMementoIndexSource, self)._get_headers(params)
|
||||||
ref_url = self._get_referrer(params)
|
ref_url = self._get_referrer(params)
|
||||||
if ref_url:
|
if ref_url:
|
||||||
return {'Referer': ref_url}
|
headers['Referer'] = ref_url
|
||||||
else:
|
return headers
|
||||||
return {}
|
|
||||||
|
|
||||||
def _extract_location(self, url, location):
|
def _extract_location(self, url, location):
|
||||||
if not location or not location.startswith(self.prefix):
|
if not location or not location.startswith(self.prefix):
|
||||||
|
@ -8,7 +8,7 @@ from warcio.statusandheaders import StatusAndHeaders, StatusAndHeadersParser
|
|||||||
from pywb.utils.wbexception import LiveResourceException, WbException
|
from pywb.utils.wbexception import LiveResourceException, WbException
|
||||||
|
|
||||||
from pywb.utils.memento import MementoUtils
|
from pywb.utils.memento import MementoUtils
|
||||||
from pywb.utils.io import StreamIter, compress_gzip_iter
|
from pywb.utils.io import StreamIter, compress_gzip_iter, call_release_conn
|
||||||
from pywb.utils.format import ParamFormatter
|
from pywb.utils.format import ParamFormatter
|
||||||
|
|
||||||
from pywb.warcserver.resource.resolvingloader import ResolvingLoader
|
from pywb.warcserver.resource.resolvingloader import ResolvingLoader
|
||||||
@ -63,7 +63,7 @@ class BaseLoader(object):
|
|||||||
if not compress:
|
if not compress:
|
||||||
out_headers['Content-Length'] = other_headers.get('Content-Length')
|
out_headers['Content-Length'] = other_headers.get('Content-Length')
|
||||||
|
|
||||||
return out_headers, StreamIter(stream)
|
return out_headers, StreamIter(stream, closer=call_release_conn)
|
||||||
|
|
||||||
target_uri = warc_headers.get_header('WARC-Target-URI')
|
target_uri = warc_headers.get_header('WARC-Target-URI')
|
||||||
|
|
||||||
@ -85,7 +85,8 @@ class BaseLoader(object):
|
|||||||
|
|
||||||
streamiter = StreamIter(stream,
|
streamiter = StreamIter(stream,
|
||||||
header1=warc_headers_buff,
|
header1=warc_headers_buff,
|
||||||
header2=other_headers)
|
header2=other_headers,
|
||||||
|
closer=call_release_conn)
|
||||||
|
|
||||||
if compress:
|
if compress:
|
||||||
streamiter = compress_gzip_iter(streamiter)
|
streamiter = compress_gzip_iter(streamiter)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user