mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 08:04:49 +01:00
warcserver: define default HTTPAdapter in warcserver.http.default_adapter, for use with both index sources and responseloader
responseloader uses existing pool from shared HTTPAdapter fix tests: call_release_conn() checks if release_conn() exists before calling, else default to close()
This commit is contained in:
parent
1bd8a85a4d
commit
dd7c1bd752
@ -27,7 +27,10 @@ def call_release_conn(stream):
|
||||
try:
|
||||
yield stream
|
||||
finally:
|
||||
stream.release_conn()
|
||||
if hasattr(stream, 'release_conn'):
|
||||
stream.release_conn()
|
||||
else:
|
||||
stream.close()
|
||||
|
||||
|
||||
#=============================================================================
|
||||
|
5
pywb/warcserver/http.py
Normal file
5
pywb/warcserver/http.py
Normal file
@ -0,0 +1,5 @@
|
||||
from requests.adapters import HTTPAdapter
|
||||
|
||||
default_adapter = HTTPAdapter(max_retries=3)
|
||||
|
||||
|
@ -5,6 +5,7 @@ from pywb.utils.wbexception import NotFoundException
|
||||
from warcio.timeutils import timestamp_to_http_date, http_date_to_timestamp
|
||||
from warcio.timeutils import timestamp_now, pad_timestamp, PAD_14_DOWN
|
||||
|
||||
from pywb.warcserver.http import default_adapter
|
||||
from pywb.warcserver.index.cdxobject import CDXObject
|
||||
|
||||
from pywb.utils.format import ParamFormatter, res_template
|
||||
@ -31,6 +32,11 @@ class BaseIndexSource(object):
|
||||
else:
|
||||
return None
|
||||
|
||||
def _init_sesh(self):
|
||||
self.sesh = requests.Session()
|
||||
self.sesh.mount('http://', default_adapter)
|
||||
self.sesh.mount('https://', default_adapter)
|
||||
|
||||
|
||||
#=============================================================================
|
||||
class FileIndexSource(BaseIndexSource):
|
||||
@ -96,6 +102,7 @@ class RemoteIndexSource(BaseIndexSource):
|
||||
self.replay_url = replay_url
|
||||
self.url_field = url_field
|
||||
self.closest_limit = closest_limit
|
||||
self._init_sesh()
|
||||
|
||||
def _get_api_url(self, params):
|
||||
api_url = res_template(self.api_url, params)
|
||||
@ -106,8 +113,11 @@ class RemoteIndexSource(BaseIndexSource):
|
||||
|
||||
def load_index(self, params):
|
||||
api_url = self._get_api_url(params)
|
||||
r = requests.get(api_url, timeout=params.get('_timeout'))
|
||||
if r.status_code >= 400:
|
||||
try:
|
||||
r = self.sesh.get(api_url, timeout=params.get('_timeout'))
|
||||
r.raise_for_status()
|
||||
except Exception as e:
|
||||
print('FAILED: ' + str(e))
|
||||
raise NotFoundException(api_url)
|
||||
|
||||
lines = r.content.strip().split(b'\n')
|
||||
@ -317,12 +327,6 @@ class MementoIndexSource(BaseIndexSource):
|
||||
self.replay_url = replay_url
|
||||
self._init_sesh()
|
||||
|
||||
def _init_sesh(self):
|
||||
self.sesh = requests.Session()
|
||||
adapt = requests.adapters.HTTPAdapter(max_retries=3)
|
||||
self.sesh.mount('http://', adapt)
|
||||
self.sesh.mount('https://', adapt)
|
||||
|
||||
def links_to_cdxobject(self, link_header, def_name):
|
||||
results = MementoUtils.parse_links(link_header, def_name)
|
||||
|
||||
@ -360,10 +364,9 @@ class MementoIndexSource(BaseIndexSource):
|
||||
headers = self._get_headers(params)
|
||||
headers['Accept-Datetime'] = accept_dt
|
||||
res = self.sesh.head(url, headers=headers)
|
||||
if res.status_code >= 400:
|
||||
raise NotFoundException(url)
|
||||
res.raise_for_status()
|
||||
except Exception as e:
|
||||
print('FAILED:', e)
|
||||
print('FAILED: ' + str(e))
|
||||
raise NotFoundException(url)
|
||||
|
||||
links = res.headers.get('Link')
|
||||
|
@ -14,6 +14,8 @@ from pywb.utils.format import ParamFormatter
|
||||
from pywb.warcserver.resource.resolvingloader import ResolvingLoader
|
||||
from pywb.warcserver.resource.pathresolvers import DefaultResolverMixin
|
||||
|
||||
from pywb.warcserver.http import default_adapter
|
||||
|
||||
from six.moves.urllib.parse import urlsplit, quote, unquote
|
||||
|
||||
from io import BytesIO
|
||||
@ -26,7 +28,6 @@ import glob
|
||||
import datetime
|
||||
|
||||
from requests.models import PreparedRequest
|
||||
from requests.packages import urllib3
|
||||
|
||||
import six.moves.http_client
|
||||
six.moves.http_client._MAXHEADERS = 10000
|
||||
@ -233,14 +234,14 @@ class LiveWebLoader(BaseLoader):
|
||||
|
||||
UNREWRITE_HEADERS = ('location', 'content-location')
|
||||
|
||||
def __init__(self, forward_proxy_prefix=None):
|
||||
self.num_retries = 3
|
||||
self.num_pools = 10
|
||||
self.num_conn_per_pool = 10
|
||||
def __init__(self, forward_proxy_prefix=None, adapter=None):
|
||||
self.forward_proxy_prefix = forward_proxy_prefix
|
||||
|
||||
self.pool = urllib3.PoolManager(num_pools=self.num_pools,
|
||||
maxsize=self.num_conn_per_pool)
|
||||
if not adapter:
|
||||
adapter = default_adapter
|
||||
|
||||
self.pool = adapter.poolmanager
|
||||
self.max_retries = adapter.max_retries
|
||||
|
||||
def load_resource(self, cdx, params):
|
||||
load_url = cdx.get('load_url')
|
||||
@ -442,7 +443,7 @@ class LiveWebLoader(BaseLoader):
|
||||
assert_same_host=False,
|
||||
preload_content=False,
|
||||
decode_content=False,
|
||||
retries=self.num_retries,
|
||||
retries=self.max_retries,
|
||||
timeout=params.get('_timeout'))
|
||||
|
||||
return upstream_res
|
||||
|
Loading…
x
Reference in New Issue
Block a user