mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 08:04:49 +01:00
added 'upstream' handler for connecting to another webagg when 'upstream_url' is set
output 'is_live' as string in live index
This commit is contained in:
parent
20ebccc13e
commit
0823ff4bd0
@ -113,7 +113,7 @@ class TestResAgg(object):
|
||||
|
||||
res = to_json_list(resp.text)
|
||||
res[0]['timestamp'] = '2016'
|
||||
assert(res == [{'url': 'http://httpbin.org/get', 'urlkey': 'org,httpbin)/get', 'is_live': True,
|
||||
assert(res == [{'url': 'http://httpbin.org/get', 'urlkey': 'org,httpbin)/get', 'is_live': 'true',
|
||||
'load_url': 'http://httpbin.org/get', 'source': 'live', 'timestamp': '2016'}])
|
||||
|
||||
def test_live_resource(self):
|
||||
|
@ -46,6 +46,9 @@ def list_routes():
|
||||
|
||||
#=============================================================================
|
||||
def err_handler(exc):
|
||||
if bottle.debug:
|
||||
print(exc)
|
||||
traceback.print_exc()
|
||||
response.status = exc.status_code
|
||||
response.content_type = JSON_CT
|
||||
err_msg = json.dumps({'message': exc.body})
|
||||
|
@ -1,4 +1,4 @@
|
||||
from webagg.responseloader import WARCPathLoader, LiveWebLoader
|
||||
from webagg.responseloader import WARCPathLoader, LiveWebLoader, UpstreamProxyLoader
|
||||
from webagg.utils import MementoUtils
|
||||
from pywb.utils.wbexception import BadRequestException, WbException
|
||||
from pywb.utils.wbexception import NotFoundException
|
||||
@ -118,7 +118,8 @@ class ResourceHandler(IndexHandler):
|
||||
class DefaultResourceHandler(ResourceHandler):
|
||||
def __init__(self, index_source, warc_paths=''):
|
||||
loaders = [WARCPathLoader(warc_paths, index_source),
|
||||
LiveWebLoader()
|
||||
UpstreamProxyLoader(),
|
||||
LiveWebLoader(),
|
||||
]
|
||||
super(DefaultResourceHandler, self).__init__(index_source, loaders)
|
||||
|
||||
|
@ -51,9 +51,10 @@ class FileIndexSource(BaseIndexSource):
|
||||
|
||||
#=============================================================================
|
||||
class RemoteIndexSource(BaseIndexSource):
|
||||
def __init__(self, api_url, replay_url):
|
||||
def __init__(self, api_url, replay_url, url_field='load_url'):
|
||||
self.api_url_template = api_url
|
||||
self.replay_url = replay_url
|
||||
self.url_field = url_field
|
||||
|
||||
def load_index(self, params):
|
||||
api_url = res_template(self.api_url_template, params)
|
||||
@ -65,13 +66,19 @@ class RemoteIndexSource(BaseIndexSource):
|
||||
def do_load(lines):
|
||||
for line in lines:
|
||||
cdx = CDXObject(line)
|
||||
cdx['load_url'] = self.replay_url.format(
|
||||
timestamp=cdx['timestamp'],
|
||||
url=cdx['url'])
|
||||
cdx[self.url_field] = self.replay_url.format(
|
||||
timestamp=cdx['timestamp'],
|
||||
url=cdx['url'])
|
||||
yield cdx
|
||||
|
||||
return do_load(lines)
|
||||
|
||||
@staticmethod
|
||||
def upstream_webagg(base_url):
|
||||
api_url = base_url + '/index?url={url}'
|
||||
proxy_url = base_url + '/resource?url={url}&closest={timestamp}'
|
||||
return RemoteIndexSource(api_url, proxy_url, 'upstream_url')
|
||||
|
||||
def __str__(self):
|
||||
return 'remote'
|
||||
|
||||
@ -84,7 +91,7 @@ class LiveIndexSource(BaseIndexSource):
|
||||
cdx['timestamp'] = timestamp_now()
|
||||
cdx['url'] = params['url']
|
||||
cdx['load_url'] = params['url']
|
||||
cdx['is_live'] = True
|
||||
cdx['is_live'] = 'true'
|
||||
def live():
|
||||
yield cdx
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
from webagg.liverec import BaseRecorder
|
||||
from webagg.liverec import request as remote_request
|
||||
from requests import request
|
||||
|
||||
from webagg.utils import MementoUtils
|
||||
|
||||
@ -159,6 +160,40 @@ class HeaderRecorder(BaseRecorder):
|
||||
self.target_ip = ip[0]
|
||||
|
||||
|
||||
#=============================================================================
|
||||
class UpstreamProxyLoader(BaseLoader):
|
||||
def _load_resource(self, cdx, params):
|
||||
load_url = cdx.get('upstream_url')
|
||||
if not load_url:
|
||||
return None, None
|
||||
|
||||
input_req = params['_input_req']
|
||||
|
||||
method = input_req.get_req_method()
|
||||
data = input_req.get_req_body()
|
||||
req_headers = input_req.get_req_headers()
|
||||
|
||||
try:
|
||||
upstream_res = request(url=load_url,
|
||||
method=method,
|
||||
stream=True,
|
||||
allow_redirects=False,
|
||||
headers=req_headers,
|
||||
data=data,
|
||||
timeout=params.get('_timeout'))
|
||||
except Exception as e:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
raise LiveResourceException(load_url)
|
||||
|
||||
out_headers = upstream_res.headers
|
||||
|
||||
return out_headers, StreamIter(upstream_res.raw)
|
||||
|
||||
def __str__(self):
|
||||
return 'UpstreamProxyLoader'
|
||||
|
||||
|
||||
#=============================================================================
|
||||
class LiveWebLoader(BaseLoader):
|
||||
SKIP_HEADERS = (b'link',
|
||||
|
Loading…
x
Reference in New Issue
Block a user