mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
added 'upstream' handler for connecting to another webagg when 'upstream_url' is set
output 'is_live' as string in live index
This commit is contained in:
parent
20ebccc13e
commit
0823ff4bd0
@ -113,7 +113,7 @@ class TestResAgg(object):
|
|||||||
|
|
||||||
res = to_json_list(resp.text)
|
res = to_json_list(resp.text)
|
||||||
res[0]['timestamp'] = '2016'
|
res[0]['timestamp'] = '2016'
|
||||||
assert(res == [{'url': 'http://httpbin.org/get', 'urlkey': 'org,httpbin)/get', 'is_live': True,
|
assert(res == [{'url': 'http://httpbin.org/get', 'urlkey': 'org,httpbin)/get', 'is_live': 'true',
|
||||||
'load_url': 'http://httpbin.org/get', 'source': 'live', 'timestamp': '2016'}])
|
'load_url': 'http://httpbin.org/get', 'source': 'live', 'timestamp': '2016'}])
|
||||||
|
|
||||||
def test_live_resource(self):
|
def test_live_resource(self):
|
||||||
|
@ -46,6 +46,9 @@ def list_routes():
|
|||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
def err_handler(exc):
|
def err_handler(exc):
|
||||||
|
if bottle.debug:
|
||||||
|
print(exc)
|
||||||
|
traceback.print_exc()
|
||||||
response.status = exc.status_code
|
response.status = exc.status_code
|
||||||
response.content_type = JSON_CT
|
response.content_type = JSON_CT
|
||||||
err_msg = json.dumps({'message': exc.body})
|
err_msg = json.dumps({'message': exc.body})
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from webagg.responseloader import WARCPathLoader, LiveWebLoader
|
from webagg.responseloader import WARCPathLoader, LiveWebLoader, UpstreamProxyLoader
|
||||||
from webagg.utils import MementoUtils
|
from webagg.utils import MementoUtils
|
||||||
from pywb.utils.wbexception import BadRequestException, WbException
|
from pywb.utils.wbexception import BadRequestException, WbException
|
||||||
from pywb.utils.wbexception import NotFoundException
|
from pywb.utils.wbexception import NotFoundException
|
||||||
@ -118,7 +118,8 @@ class ResourceHandler(IndexHandler):
|
|||||||
class DefaultResourceHandler(ResourceHandler):
|
class DefaultResourceHandler(ResourceHandler):
|
||||||
def __init__(self, index_source, warc_paths=''):
|
def __init__(self, index_source, warc_paths=''):
|
||||||
loaders = [WARCPathLoader(warc_paths, index_source),
|
loaders = [WARCPathLoader(warc_paths, index_source),
|
||||||
LiveWebLoader()
|
UpstreamProxyLoader(),
|
||||||
|
LiveWebLoader(),
|
||||||
]
|
]
|
||||||
super(DefaultResourceHandler, self).__init__(index_source, loaders)
|
super(DefaultResourceHandler, self).__init__(index_source, loaders)
|
||||||
|
|
||||||
|
@ -51,9 +51,10 @@ class FileIndexSource(BaseIndexSource):
|
|||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
class RemoteIndexSource(BaseIndexSource):
|
class RemoteIndexSource(BaseIndexSource):
|
||||||
def __init__(self, api_url, replay_url):
|
def __init__(self, api_url, replay_url, url_field='load_url'):
|
||||||
self.api_url_template = api_url
|
self.api_url_template = api_url
|
||||||
self.replay_url = replay_url
|
self.replay_url = replay_url
|
||||||
|
self.url_field = url_field
|
||||||
|
|
||||||
def load_index(self, params):
|
def load_index(self, params):
|
||||||
api_url = res_template(self.api_url_template, params)
|
api_url = res_template(self.api_url_template, params)
|
||||||
@ -65,13 +66,19 @@ class RemoteIndexSource(BaseIndexSource):
|
|||||||
def do_load(lines):
|
def do_load(lines):
|
||||||
for line in lines:
|
for line in lines:
|
||||||
cdx = CDXObject(line)
|
cdx = CDXObject(line)
|
||||||
cdx['load_url'] = self.replay_url.format(
|
cdx[self.url_field] = self.replay_url.format(
|
||||||
timestamp=cdx['timestamp'],
|
timestamp=cdx['timestamp'],
|
||||||
url=cdx['url'])
|
url=cdx['url'])
|
||||||
yield cdx
|
yield cdx
|
||||||
|
|
||||||
return do_load(lines)
|
return do_load(lines)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def upstream_webagg(base_url):
|
||||||
|
api_url = base_url + '/index?url={url}'
|
||||||
|
proxy_url = base_url + '/resource?url={url}&closest={timestamp}'
|
||||||
|
return RemoteIndexSource(api_url, proxy_url, 'upstream_url')
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return 'remote'
|
return 'remote'
|
||||||
|
|
||||||
@ -84,7 +91,7 @@ class LiveIndexSource(BaseIndexSource):
|
|||||||
cdx['timestamp'] = timestamp_now()
|
cdx['timestamp'] = timestamp_now()
|
||||||
cdx['url'] = params['url']
|
cdx['url'] = params['url']
|
||||||
cdx['load_url'] = params['url']
|
cdx['load_url'] = params['url']
|
||||||
cdx['is_live'] = True
|
cdx['is_live'] = 'true'
|
||||||
def live():
|
def live():
|
||||||
yield cdx
|
yield cdx
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from webagg.liverec import BaseRecorder
|
from webagg.liverec import BaseRecorder
|
||||||
from webagg.liverec import request as remote_request
|
from webagg.liverec import request as remote_request
|
||||||
|
from requests import request
|
||||||
|
|
||||||
from webagg.utils import MementoUtils
|
from webagg.utils import MementoUtils
|
||||||
|
|
||||||
@ -159,6 +160,40 @@ class HeaderRecorder(BaseRecorder):
|
|||||||
self.target_ip = ip[0]
|
self.target_ip = ip[0]
|
||||||
|
|
||||||
|
|
||||||
|
#=============================================================================
|
||||||
|
class UpstreamProxyLoader(BaseLoader):
|
||||||
|
def _load_resource(self, cdx, params):
|
||||||
|
load_url = cdx.get('upstream_url')
|
||||||
|
if not load_url:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
input_req = params['_input_req']
|
||||||
|
|
||||||
|
method = input_req.get_req_method()
|
||||||
|
data = input_req.get_req_body()
|
||||||
|
req_headers = input_req.get_req_headers()
|
||||||
|
|
||||||
|
try:
|
||||||
|
upstream_res = request(url=load_url,
|
||||||
|
method=method,
|
||||||
|
stream=True,
|
||||||
|
allow_redirects=False,
|
||||||
|
headers=req_headers,
|
||||||
|
data=data,
|
||||||
|
timeout=params.get('_timeout'))
|
||||||
|
except Exception as e:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
raise LiveResourceException(load_url)
|
||||||
|
|
||||||
|
out_headers = upstream_res.headers
|
||||||
|
|
||||||
|
return out_headers, StreamIter(upstream_res.raw)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return 'UpstreamProxyLoader'
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
class LiveWebLoader(BaseLoader):
|
class LiveWebLoader(BaseLoader):
|
||||||
SKIP_HEADERS = (b'link',
|
SKIP_HEADERS = (b'link',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user