1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

warcserver: support different default adapters, for live web and remote sources

warcserver.http.DefaultAdapters.live_adapter used if is_live, else DefaultAdapters.remote_adapter
tests: fix test to ignore order in dir listing
This commit is contained in:
Ilya Kreymer 2017-07-02 03:58:55 +00:00
parent 324a36b5b7
commit 84eb070938
4 changed files with 31 additions and 27 deletions

View File

@ -1,5 +1,7 @@
from requests.adapters import HTTPAdapter
default_adapter = HTTPAdapter(max_retries=3)
class DefaultAdapters(object):
live_adapter = HTTPAdapter(max_retries=3)
remote_adapter = HTTPAdapter(pool_connections=8, pool_maxsize=8, pool_block=True)

View File

@ -5,7 +5,7 @@ from pywb.utils.wbexception import NotFoundException
from warcio.timeutils import timestamp_to_http_date, http_date_to_timestamp
from warcio.timeutils import timestamp_now, pad_timestamp, PAD_14_DOWN
from pywb.warcserver.http import default_adapter
from pywb.warcserver.http import DefaultAdapters
from pywb.warcserver.index.cdxobject import CDXObject
from pywb.utils.format import ParamFormatter, res_template
@ -32,10 +32,12 @@ class BaseIndexSource(object):
else:
return None
def _init_sesh(self):
def _init_sesh(self, adapter=None):
if not adapter:
adapter = DefaultAdapters.remote_adapter
self.sesh = requests.Session()
self.sesh.mount('http://', default_adapter)
self.sesh.mount('https://', default_adapter)
self.sesh.mount('http://', adapter)
self.sesh.mount('https://', adapter)
#=============================================================================
@ -193,7 +195,7 @@ class RemoteIndexSource(BaseIndexSource):
class LiveIndexSource(BaseIndexSource):
def __init__(self, proxy_url='{url}'):
self.proxy_url = proxy_url
self._init_sesh()
self._init_sesh(DefaultAdapters.live_adapter)
def load_index(self, params):
# no fuzzy match for live resources

View File

@ -14,7 +14,7 @@ from pywb.utils.format import ParamFormatter
from pywb.warcserver.resource.resolvingloader import ResolvingLoader
from pywb.warcserver.resource.pathresolvers import DefaultResolverMixin
from pywb.warcserver.http import default_adapter
from pywb.warcserver.http import DefaultAdapters
from six.moves.urllib.parse import urlsplit, quote, unquote
@ -237,12 +237,6 @@ class LiveWebLoader(BaseLoader):
def __init__(self, forward_proxy_prefix=None, adapter=None):
self.forward_proxy_prefix = forward_proxy_prefix
if not adapter:
adapter = default_adapter
self.pool = adapter.poolmanager
self.max_retries = adapter.max_retries
def load_resource(self, cdx, params):
load_url = cdx.get('load_url')
if not load_url:
@ -407,7 +401,8 @@ class LiveWebLoader(BaseLoader):
data, req_headers, params, cdx):
upstream_res = self._do_request(method, load_url,
data, req_headers, params)
data, req_headers, params,
cdx.get('is_live'))
if cdx.get('is_live'):
return upstream_res
@ -428,23 +423,28 @@ class LiveWebLoader(BaseLoader):
raise
load_url = location
upstream_res = self._do_request(method, load_url, data, req_headers, params)
upstream_res = self._do_request(method, load_url, data,
req_headers, params, cdx.get('is_live'))
self_redir_count += 1
return upstream_res
def _do_request(self, method, load_url, data, req_headers, params):
def _do_request(self, method, load_url, data, req_headers, params, is_live):
adapter = DefaultAdapters.live_adapter if is_live else DefaultAdapters.remote_adapter
pool = adapter.poolmanager
max_retries = adapter.max_retries
try:
upstream_res = self.pool.urlopen(method=method,
url=load_url,
body=data,
headers=req_headers,
redirect=False,
assert_same_host=False,
preload_content=False,
decode_content=False,
retries=self.max_retries,
timeout=params.get('_timeout'))
upstream_res = pool.urlopen(method=method,
url=load_url,
body=data,
headers=req_headers,
redirect=False,
assert_same_host=False,
preload_content=False,
decode_content=False,
retries=max_retries,
timeout=params.get('_timeout'))
return upstream_res

View File

@ -53,7 +53,7 @@ class TestWarcServer(TempDirTests, BaseTestClass):
assert len(self.loader.list_fixed_routes()) == 13
def test_list_dynamic(self):
assert self.loader.list_dynamic_routes() == ['auto1', 'auto2']
assert set(self.loader.list_dynamic_routes()) == set(['auto1', 'auto2'])
def test_remote_cdx(self):
sources = self._get_sources('ait')