from pytest import raises import webtest import base64 from pywb.webapp.pywb_init import create_wb_router from pywb.framework.wsgi_wrappers import init_app from pywb.cdx.cdxobject import CDXObject from pywb.utils.timeutils import timestamp_now class TestWb: TEST_CONFIG = 'tests/test_config.yaml' def setup(self): #self.app = pywb.wbapp.create_wb_app(pywb.pywb_init.pywb_config()) # save it in self - useful for debugging self.app = init_app(create_wb_router, load_yaml=True, config_file=self.TEST_CONFIG) #self.router = pywb_config(self.TEST_CONFIG) #self.app = create_wb_app(self.router) self.testapp = webtest.TestApp(self.app) def _assert_basic_html(self, resp): assert resp.status_int == 200 assert resp.content_type == 'text/html' assert resp.content_length > 0 def _assert_basic_text(self, resp): assert resp.status_int == 200 assert resp.content_type == 'text/plain' assert resp.content_length > 0 def test_home(self): resp = self.testapp.get('/') self._assert_basic_html(resp) assert '/pywb' in resp.body def test_pywb_root(self): resp = self.testapp.get('/pywb/') self._assert_basic_html(resp) assert 'Search' in resp.body def test_pywb_root_head(self): resp = self.testapp.head('/pywb/') assert resp.content_type == 'text/html' assert resp.status_int == 200 def test_pywb_invalid_path(self): resp = self.testapp.head('/blah/', status=404) assert resp.content_type == 'text/html' assert resp.status_int == 404 def test_calendar_query(self): resp = self.testapp.get('/pywb/*/iana.org') self._assert_basic_html(resp) # 3 Captures + header assert len(resp.html.find_all('tr')) == 4 def test_calendar_query_filtered(self): # unfiltered collection resp = self.testapp.get('/pywb/*/http://www.iana.org/_css/2013.1/screen.css') self._assert_basic_html(resp) # 17 Captures + header assert len(resp.html.find_all('tr')) == 18 # filtered collection resp = self.testapp.get('/pywb-filt/*/http://www.iana.org/_css/2013.1/screen.css') self._assert_basic_html(resp) # 1 Capture (filtered) + header assert len(resp.html.find_all('tr')) == 2 def test_calendar_query_fuzzy_match(self): # fuzzy match removing _= according to standard rules.yaml resp = self.testapp.get('/pywb/*/http://www.iana.org/_css/2013.1/screen.css?_=3141592653') self._assert_basic_html(resp) # 17 Captures + header assert len(resp.html.find_all('tr')) == 18 def test_calendar_not_found(self): # query with no results resp = self.testapp.get('/pywb/*/http://not-exist.example.com') self._assert_basic_html(resp) assert 'No captures found' in resp.body, resp.body assert len(resp.html.find_all('tr')) == 0 def test_cdx_query(self): resp = self.testapp.get('/pywb/cdx_/*/http://www.iana.org/') self._assert_basic_text(resp) assert '20140127171238 http://www.iana.org/ warc/revisit - OSSAPWJ23L56IYVRW3GFEAR4MCJMGPTB' in resp # check for 3 cdx lines (strip final newline) actual_len = len(str(resp.body).rstrip().split('\n')) assert actual_len == 3, actual_len def test_replay_top_frame(self): resp = self.testapp.get('/pywb/20140127171238tf_/http://www.iana.org/') assert '