mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
warcserver: support different default adapters, for live web and remote sources
warcserver.http.DefaultAdapters.live_adapter used if is_live, else DefaultAdapters.remote_adapter tests: fix test to ignore order in dir listing
This commit is contained in:
parent
324a36b5b7
commit
84eb070938
@ -1,5 +1,7 @@
|
||||
from requests.adapters import HTTPAdapter
|
||||
|
||||
default_adapter = HTTPAdapter(max_retries=3)
|
||||
class DefaultAdapters(object):
|
||||
live_adapter = HTTPAdapter(max_retries=3)
|
||||
remote_adapter = HTTPAdapter(pool_connections=8, pool_maxsize=8, pool_block=True)
|
||||
|
||||
|
||||
|
@ -5,7 +5,7 @@ from pywb.utils.wbexception import NotFoundException
|
||||
from warcio.timeutils import timestamp_to_http_date, http_date_to_timestamp
|
||||
from warcio.timeutils import timestamp_now, pad_timestamp, PAD_14_DOWN
|
||||
|
||||
from pywb.warcserver.http import default_adapter
|
||||
from pywb.warcserver.http import DefaultAdapters
|
||||
from pywb.warcserver.index.cdxobject import CDXObject
|
||||
|
||||
from pywb.utils.format import ParamFormatter, res_template
|
||||
@ -32,10 +32,12 @@ class BaseIndexSource(object):
|
||||
else:
|
||||
return None
|
||||
|
||||
def _init_sesh(self):
|
||||
def _init_sesh(self, adapter=None):
|
||||
if not adapter:
|
||||
adapter = DefaultAdapters.remote_adapter
|
||||
self.sesh = requests.Session()
|
||||
self.sesh.mount('http://', default_adapter)
|
||||
self.sesh.mount('https://', default_adapter)
|
||||
self.sesh.mount('http://', adapter)
|
||||
self.sesh.mount('https://', adapter)
|
||||
|
||||
|
||||
#=============================================================================
|
||||
@ -193,7 +195,7 @@ class RemoteIndexSource(BaseIndexSource):
|
||||
class LiveIndexSource(BaseIndexSource):
|
||||
def __init__(self, proxy_url='{url}'):
|
||||
self.proxy_url = proxy_url
|
||||
self._init_sesh()
|
||||
self._init_sesh(DefaultAdapters.live_adapter)
|
||||
|
||||
def load_index(self, params):
|
||||
# no fuzzy match for live resources
|
||||
|
@ -14,7 +14,7 @@ from pywb.utils.format import ParamFormatter
|
||||
from pywb.warcserver.resource.resolvingloader import ResolvingLoader
|
||||
from pywb.warcserver.resource.pathresolvers import DefaultResolverMixin
|
||||
|
||||
from pywb.warcserver.http import default_adapter
|
||||
from pywb.warcserver.http import DefaultAdapters
|
||||
|
||||
from six.moves.urllib.parse import urlsplit, quote, unquote
|
||||
|
||||
@ -237,12 +237,6 @@ class LiveWebLoader(BaseLoader):
|
||||
def __init__(self, forward_proxy_prefix=None, adapter=None):
|
||||
self.forward_proxy_prefix = forward_proxy_prefix
|
||||
|
||||
if not adapter:
|
||||
adapter = default_adapter
|
||||
|
||||
self.pool = adapter.poolmanager
|
||||
self.max_retries = adapter.max_retries
|
||||
|
||||
def load_resource(self, cdx, params):
|
||||
load_url = cdx.get('load_url')
|
||||
if not load_url:
|
||||
@ -407,7 +401,8 @@ class LiveWebLoader(BaseLoader):
|
||||
data, req_headers, params, cdx):
|
||||
|
||||
upstream_res = self._do_request(method, load_url,
|
||||
data, req_headers, params)
|
||||
data, req_headers, params,
|
||||
cdx.get('is_live'))
|
||||
|
||||
if cdx.get('is_live'):
|
||||
return upstream_res
|
||||
@ -428,23 +423,28 @@ class LiveWebLoader(BaseLoader):
|
||||
raise
|
||||
|
||||
load_url = location
|
||||
upstream_res = self._do_request(method, load_url, data, req_headers, params)
|
||||
upstream_res = self._do_request(method, load_url, data,
|
||||
req_headers, params, cdx.get('is_live'))
|
||||
self_redir_count += 1
|
||||
|
||||
return upstream_res
|
||||
|
||||
def _do_request(self, method, load_url, data, req_headers, params):
|
||||
def _do_request(self, method, load_url, data, req_headers, params, is_live):
|
||||
adapter = DefaultAdapters.live_adapter if is_live else DefaultAdapters.remote_adapter
|
||||
pool = adapter.poolmanager
|
||||
max_retries = adapter.max_retries
|
||||
|
||||
try:
|
||||
upstream_res = self.pool.urlopen(method=method,
|
||||
url=load_url,
|
||||
body=data,
|
||||
headers=req_headers,
|
||||
redirect=False,
|
||||
assert_same_host=False,
|
||||
preload_content=False,
|
||||
decode_content=False,
|
||||
retries=self.max_retries,
|
||||
timeout=params.get('_timeout'))
|
||||
upstream_res = pool.urlopen(method=method,
|
||||
url=load_url,
|
||||
body=data,
|
||||
headers=req_headers,
|
||||
redirect=False,
|
||||
assert_same_host=False,
|
||||
preload_content=False,
|
||||
decode_content=False,
|
||||
retries=max_retries,
|
||||
timeout=params.get('_timeout'))
|
||||
|
||||
return upstream_res
|
||||
|
||||
|
@ -53,7 +53,7 @@ class TestWarcServer(TempDirTests, BaseTestClass):
|
||||
assert len(self.loader.list_fixed_routes()) == 13
|
||||
|
||||
def test_list_dynamic(self):
|
||||
assert self.loader.list_dynamic_routes() == ['auto1', 'auto2']
|
||||
assert set(self.loader.list_dynamic_routes()) == set(['auto1', 'auto2'])
|
||||
|
||||
def test_remote_cdx(self):
|
||||
sources = self._get_sources('ait')
|
||||
|
Loading…
x
Reference in New Issue
Block a user