1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-20 10:49:11 +01:00
pywb/webagg/test/test_upstream.py

75 lines
2.7 KiB
Python

import webtest
from io import BytesIO
from webagg.app import ResAggApp
import requests
from webagg.handlers import DefaultResourceHandler
from webagg.aggregator import SimpleAggregator
from webagg.proxyindexsource import ProxyMementoIndexSource, UpstreamAggIndexSource
from pywb.warc.recordloader import ArcWarcRecordLoader
from .testutils import LiveServerTests, BaseTestClass
class TestUpstream(LiveServerTests, BaseTestClass):
def setup(self):
app = ResAggApp()
base_url = 'http://localhost:{0}'.format(self.server.port)
app.add_route('/upstream',
DefaultResourceHandler(SimpleAggregator(
{'upstream': UpstreamAggIndexSource(base_url + '/live')})
)
)
app.add_route('/upstream_opt',
DefaultResourceHandler(SimpleAggregator(
{'upstream_opt': ProxyMementoIndexSource.upstream_resource(base_url + '/live')})
)
)
self.base_url = base_url
self.testapp = webtest.TestApp(app)
def test_live_paths(self):
res = requests.get(self.base_url + '/')
assert set(res.json().keys()) == {'/live/postreq', '/live'}
def test_upstream_paths(self):
res = self.testapp.get('/')
assert set(res.json.keys()) == {'/upstream/postreq', '/upstream', '/upstream_opt', '/upstream_opt/postreq'}
def test_live_1(self):
resp = requests.get(self.base_url + '/live/resource?url=http://httpbin.org/get', stream=True)
assert resp.headers['WebAgg-Source-Coll'] == 'live'
record = ArcWarcRecordLoader().parse_record_stream(resp.raw, no_record_parse=False)
assert record.rec_headers.get_header('WARC-Target-URI') == 'http://httpbin.org/get'
assert record.status_headers.get_header('Date') != ''
def test_upstream_1(self):
resp = self.testapp.get('/upstream/resource?url=http://httpbin.org/get')
assert resp.headers['WebAgg-Source-Coll'] == 'upstream:live'
raw = BytesIO(resp.body)
record = ArcWarcRecordLoader().parse_record_stream(raw, no_record_parse=False)
assert record.rec_headers.get_header('WARC-Target-URI') == 'http://httpbin.org/get'
assert record.status_headers.get_header('Date') != ''
def test_upstream_2(self):
resp = self.testapp.get('/upstream_opt/resource?url=http://httpbin.org/get')
assert resp.headers['WebAgg-Source-Coll'] == 'upstream_opt:live', resp.headers
raw = BytesIO(resp.body)
record = ArcWarcRecordLoader().parse_record_stream(raw, no_record_parse=False)
assert record.rec_headers.get_header('WARC-Target-URI') == 'http://httpbin.org/get'
assert record.status_headers.get_header('Date') != ''