mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-23 06:32:24 +01:00
114 lines
3.5 KiB
Python
114 lines
3.5 KiB
Python
from webagg.app import ResAggApp
|
|
|
|
import webtest
|
|
import threading
|
|
|
|
from io import BytesIO
|
|
import requests
|
|
|
|
from webagg.handlers import DefaultResourceHandler
|
|
from webagg.indexsource import LiveIndexSource
|
|
from webagg.proxyindexsource import ProxyMementoIndexSource, UpstreamAggIndexSource
|
|
from webagg.aggregator import SimpleAggregator
|
|
|
|
from wsgiref.simple_server import make_server
|
|
|
|
from pywb.warc.recordloader import ArcWarcRecordLoader
|
|
|
|
|
|
class ServerThreadRunner(object):
|
|
def __init__(self, app):
|
|
self.httpd = make_server('', 0, app)
|
|
self.port = self.httpd.socket.getsockname()[1]
|
|
|
|
def run():
|
|
self.httpd.serve_forever()
|
|
|
|
self.thread = threading.Thread(target=run)
|
|
self.thread.daemon = True
|
|
self.thread.start()
|
|
|
|
def stop_thread(self):
|
|
self.httpd.shutdown()
|
|
|
|
|
|
server = None
|
|
|
|
|
|
def setup_module():
|
|
app = ResAggApp()
|
|
app.add_route('/live',
|
|
DefaultResourceHandler(SimpleAggregator(
|
|
{'live': LiveIndexSource()})
|
|
)
|
|
)
|
|
|
|
global server
|
|
server = ServerThreadRunner(app.application)
|
|
|
|
def teardown_module():
|
|
global server
|
|
server.stop_thread()
|
|
|
|
|
|
|
|
class TestUpstream(object):
|
|
def setup(self):
|
|
app = ResAggApp()
|
|
|
|
base_url = 'http://localhost:{0}'.format(server.port)
|
|
app.add_route('/upstream',
|
|
DefaultResourceHandler(SimpleAggregator(
|
|
{'upstream': UpstreamAggIndexSource(base_url + '/live')})
|
|
)
|
|
)
|
|
|
|
app.add_route('/upstream_opt',
|
|
DefaultResourceHandler(SimpleAggregator(
|
|
{'upstream_opt': ProxyMementoIndexSource.upstream_resource(base_url + '/live')})
|
|
)
|
|
)
|
|
|
|
self.base_url = base_url
|
|
self.testapp = webtest.TestApp(app.application)
|
|
|
|
|
|
def test_live_paths(self):
|
|
res = requests.get(self.base_url + '/')
|
|
assert set(res.json().keys()) == {'/live/postreq', '/live'}
|
|
|
|
def test_upstream_paths(self):
|
|
res = self.testapp.get('/')
|
|
assert set(res.json.keys()) == {'/upstream/postreq', '/upstream', '/upstream_opt', '/upstream_opt/postreq'}
|
|
|
|
def test_live_1(self):
|
|
resp = requests.get(self.base_url + '/live/resource?url=http://httpbin.org/get', stream=True)
|
|
assert resp.headers['WebAgg-Source-Coll'] == 'live'
|
|
|
|
record = ArcWarcRecordLoader().parse_record_stream(resp.raw, no_record_parse=False)
|
|
assert record.rec_headers.get_header('WARC-Target-URI') == 'http://httpbin.org/get'
|
|
assert record.status_headers.get_header('Date') != ''
|
|
|
|
def test_upstream_1(self):
|
|
resp = self.testapp.get('/upstream/resource?url=http://httpbin.org/get')
|
|
assert resp.headers['WebAgg-Source-Coll'] == 'upstream:live'
|
|
|
|
raw = BytesIO(resp.body)
|
|
|
|
record = ArcWarcRecordLoader().parse_record_stream(raw, no_record_parse=False)
|
|
assert record.rec_headers.get_header('WARC-Target-URI') == 'http://httpbin.org/get'
|
|
assert record.status_headers.get_header('Date') != ''
|
|
|
|
def test_upstream_2(self):
|
|
resp = self.testapp.get('/upstream_opt/resource?url=http://httpbin.org/get')
|
|
assert resp.headers['WebAgg-Source-Coll'] == 'upstream_opt:live', resp.headers
|
|
|
|
raw = BytesIO(resp.body)
|
|
|
|
record = ArcWarcRecordLoader().parse_record_stream(raw, no_record_parse=False)
|
|
assert record.rec_headers.get_header('WARC-Target-URI') == 'http://httpbin.org/get'
|
|
assert record.status_headers.get_header('Date') != ''
|
|
|
|
|
|
|