1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00
pywb/run-tests.py
Ilya Kreymer e4f409b2a4 simplify pywb_init config:
- add defaults dictionary, chain dictionaries rather than copying
 - allow custom classes to be loaded explicitly via yaml
 - for LineReader, assume ungzipped if first decompress fails
 - properly ignore bad local paths
 - add optional reporter object
2014-02-11 14:10:40 -08:00

170 lines
6.5 KiB
Python

import webtest
import pywb.pywb_init
from pywb.indexreader import CDXCaptureResult
class TestWb:
TEST_CONFIG = 'test_config.yaml'
def setup(self):
import pywb.wbapp
#self.app = pywb.wbapp.create_wb_app(pywb.pywb_init.pywb_config())
self.app = pywb.wbapp.create_wb_app(pywb.pywb_init.pywb_config(self.TEST_CONFIG))
self.testapp = webtest.TestApp(self.app)
def _assert_basic_html(self, resp):
assert resp.status_int == 200
assert resp.content_type == 'text/html'
assert resp.content_length > 0
def _assert_basic_text(self, resp):
assert resp.status_int == 200
assert resp.content_type == 'text/plain'
assert resp.content_length > 0
def test_home(self):
resp = self.testapp.get('/')
self._assert_basic_html(resp)
assert '/pywb' in resp.body
def test_pywb_root(self):
resp = self.testapp.get('/pywb/')
self._assert_basic_html(resp)
assert 'Search' in resp.body
def test_calendar_query(self):
resp = self.testapp.get('/pywb/*/iana.org')
self._assert_basic_html(resp)
# 3 Captures + header
assert len(resp.html.find_all('tr')) == 4
def test_calendar_query_filtered(self):
# unfiltered collection
resp = self.testapp.get('/pywb/*/http://www.iana.org/_css/2013.1/screen.css')
self._assert_basic_html(resp)
# 17 Captures + header
assert len(resp.html.find_all('tr')) == 18
# filtered collection
resp = self.testapp.get('/pywb-filt/*/http://www.iana.org/_css/2013.1/screen.css')
self._assert_basic_html(resp)
# 1 Capture (filtered) + header
assert len(resp.html.find_all('tr')) == 2
def test_cdx_query(self):
resp = self.testapp.get('/pywb/cdx_/*/http://www.iana.org/')
self._assert_basic_text(resp)
assert '20140127171238 http://www.iana.org/ warc/revisit - OSSAPWJ23L56IYVRW3GFEAR4MCJMGPTB' in resp
# check for 3 cdx lines (strip final newline)
actual_len = len(str(resp.body).rstrip().split('\n'))
assert actual_len == 3, actual_len
def test_replay_1(self):
resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/')
self._assert_basic_html(resp)
assert 'Mon, Jan 27 2014 17:12:38' in resp.body
assert 'wb.js' in resp.body
assert '/pywb/20140127171238/http://www.iana.org/time-zones' in resp.body
def test_redirect_1(self):
resp = self.testapp.get('/pywb/20140127171237/http://www.iana.org/')
assert resp.status_int == 302
assert resp.headers['Location'].endswith('/pywb/20140127171238/http://iana.org')
def test_redirect_replay_2(self):
resp = self.testapp.get('/pywb/http://example.com/')
assert resp.status_int == 302
assert resp.headers['Location'].endswith('/20140127171251/http://example.com')
resp = resp.follow()
#check resp
self._assert_basic_html(resp)
assert 'Mon, Jan 27 2014 17:12:51' in resp.body
assert '/pywb/20140127171251/http://www.iana.org/domains/example' in resp.body
def test_redirect_relative_3(self):
# first two requests should result in same redirect
target = 'http://localhost:8080/pywb/2014/http://iana.org/_css/2013.1/screen.css'
# without timestamp
resp = self.testapp.get('/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:8080/pywb/2014/http://iana.org/')])
assert resp.status_int == 302
assert resp.headers['Location'] == target, resp.headers['Location']
# with timestamp
resp = self.testapp.get('/2014/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:8080/pywb/2014/http://iana.org/')])
assert resp.status_int == 302
assert resp.headers['Location'] == target, resp.headers['Location']
resp = resp.follow()
assert resp.status_int == 302
assert resp.headers['Location'].endswith('/pywb/20140127171239/http://www.iana.org/_css/2013.1/screen.css')
resp = resp.follow()
assert resp.status_int == 200
assert resp.content_type == 'text/css'
def test_static_content(self):
resp = self.testapp.get('/static/test/route/wb.css')
assert resp.status_int == 200
assert resp.content_type == 'text/css'
assert resp.content_length > 0
# 'Simulating' proxy by settings REQUEST_URI explicitly to http:// url and no SCRIPT_NAME
# would be nice to be able to test proxy more
def test_proxy_replay(self):
resp = self.testapp.get('/x-ignore-this-x', extra_environ = dict(REQUEST_URI = 'http://www.iana.org/domains/idn-tables', SCRIPT_NAME = ''))
self._assert_basic_html(resp)
assert 'Sun, Jan 26 2014 20:11:27' in resp.body
assert 'wb.js' in resp.body
def test_proxy_pac(self):
resp = self.testapp.get('/proxy.pac', extra_environ = dict(SERVER_NAME='pywb-proxy', SERVER_PORT='8080'))
assert resp.content_type == 'application/x-ns-proxy-autoconfig'
assert '"PROXY pywb-proxy:8080"' in resp.body
assert '"localhost"' in resp.body
def test_cdx_server_filters(self):
resp = self.testapp.get('/pywb-cdx?url=http://www.iana.org/_css/2013.1/screen.css&filter=mimetype:warc/revisit&filter=filename:dupes.warc.gz')
self._assert_basic_text(resp)
actual_len = len(resp.body.rstrip().split('\n'))
assert actual_len == 1, actual_len
def test_cdx_server_advanced(self):
# combine collapsing, reversing and revisit resolving
resp = self.testapp.get('/pywb-cdx?url=http://www.iana.org/_css/2013.1/print.css&collapse_time=11&resolve_revisits=true&reverse=true')
# convert back to CDXCaptureResult
cdxs = map(CDXCaptureResult, resp.body.rstrip().split('\n'))
assert len(cdxs) == 3, len(cdxs)
# verify timestamps
timestamps = map(lambda cdx: cdx['timestamp'], cdxs)
assert timestamps == ['20140127171239', '20140126201054', '20140126200625']
# verify orig filenames (2 revisits, one non)
origfilenames = map(lambda cdx: cdx['orig.filename'], cdxs)
assert origfilenames == ['iana.warc.gz', 'iana.warc.gz', '-']
def test_error(self):
resp = self.testapp.get('/pywb/?abc', status = 400)
assert resp.status_int == 400
assert 'Bad Request Url: http://?abc' in resp.body
# Reporter callback for replay view
def print_reporter(wbrequest, cdx, response):
print wbrequest
print cdx
pass