import webtest import pywb.pywb_init from pywb.indexreader import CDXCaptureResult class TestWb: TEST_CONFIG = 'test_config.yaml' def setup(self): import pywb.wbapp #self.app = pywb.wbapp.create_wb_app(pywb.pywb_init.pywb_config()) self.app = pywb.wbapp.create_wb_app(pywb.pywb_init.pywb_config(self.TEST_CONFIG)) self.testapp = webtest.TestApp(self.app) def _assert_basic_html(self, resp): assert resp.status_int == 200 assert resp.content_type == 'text/html' assert resp.content_length > 0 def _assert_basic_text(self, resp): assert resp.status_int == 200 assert resp.content_type == 'text/plain' assert resp.content_length > 0 def test_home(self): resp = self.testapp.get('/') self._assert_basic_html(resp) assert '/pywb' in resp.body def test_pywb_root(self): resp = self.testapp.get('/pywb/') self._assert_basic_html(resp) assert 'Search' in resp.body def test_calendar_query(self): resp = self.testapp.get('/pywb/*/iana.org') self._assert_basic_html(resp) # 3 Captures + header assert len(resp.html.find_all('tr')) == 4 def test_calendar_query_filtered(self): # unfiltered collection resp = self.testapp.get('/pywb/*/http://www.iana.org/_css/2013.1/screen.css') self._assert_basic_html(resp) # 17 Captures + header assert len(resp.html.find_all('tr')) == 18 # filtered collection resp = self.testapp.get('/pywb-filt/*/http://www.iana.org/_css/2013.1/screen.css') self._assert_basic_html(resp) # 1 Capture (filtered) + header assert len(resp.html.find_all('tr')) == 2 def test_cdx_query(self): resp = self.testapp.get('/pywb/cdx_/*/http://www.iana.org/') self._assert_basic_text(resp) assert '20140127171238 http://www.iana.org/ warc/revisit - OSSAPWJ23L56IYVRW3GFEAR4MCJMGPTB' in resp # check for 3 cdx lines (strip final newline) actual_len = len(str(resp.body).rstrip().split('\n')) assert actual_len == 3, actual_len def test_replay_1(self): resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/') self._assert_basic_html(resp) assert 'Mon, Jan 27 2014 17:12:38' in resp.body assert 'wb.js' in resp.body assert '/pywb/20140127171238/http://www.iana.org/time-zones' in resp.body def test_redirect_1(self): resp = self.testapp.get('/pywb/20140127171237/http://www.iana.org/') assert resp.status_int == 302 assert resp.headers['Location'].endswith('/pywb/20140127171238/http://iana.org') def test_redirect_replay_2(self): resp = self.testapp.get('/pywb/http://example.com/') assert resp.status_int == 302 assert resp.headers['Location'].endswith('/20140127171251/http://example.com') resp = resp.follow() #check resp self._assert_basic_html(resp) assert 'Mon, Jan 27 2014 17:12:51' in resp.body assert '/pywb/20140127171251/http://www.iana.org/domains/example' in resp.body def test_static_content(self): resp = self.testapp.get('/test-static/wb.css') assert resp.status_int == 200 assert resp.content_type == 'text/css' assert resp.content_length > 0 # XX: Doesn't work as webtest does not support proxy mode # need a way to test #def test_proxy_replay(self): #resp = self.testapp.get('http://www.iana.org/domains/idn-tables') #self._assert_basic_html(resp) #assert 'Sun, Jan 26 2014 20:11:27' in resp.body #assert 'wb.js' in resp.body def test_cdx_server_filters(self): resp = self.testapp.get('/pywb-cdx?url=http://www.iana.org/_css/2013.1/screen.css&filter=mimetype:warc/revisit&filter=filename:dupes.warc.gz') self._assert_basic_text(resp) actual_len = len(resp.body.rstrip().split('\n')) assert actual_len == 1, actual_len def test_cdx_server_advanced(self): # combine collapsing, reversing and revisit resolving resp = self.testapp.get('/pywb-cdx?url=http://www.iana.org/_css/2013.1/print.css&collapse_time=11&resolve_revisits=true&reverse=true') # convert back to CDXCaptureResult cdxs = map(CDXCaptureResult, resp.body.rstrip().split('\n')) assert len(cdxs) == 3, len(cdxs) # verify timestamps timestamps = map(lambda cdx: cdx['timestamp'], cdxs) assert timestamps == ['20140127171239', '20140126201054', '20140126200625'] # verify orig filenames (2 revisits, one non) origfilenames = map(lambda cdx: cdx['orig.filename'], cdxs) assert origfilenames == ['iana.warc.gz', 'iana.warc.gz', '-'] def test_error(self): resp = self.testapp.get('/pywb/?abc', status = 400) assert resp.status_int == 400 assert 'Bad Request Url: http://?abc' in resp.body