from .base_config_test import BaseConfigTest from pywb.cdx.cdxobject import CDXObject # ============================================================================ class TestWbIntegration(BaseConfigTest): @classmethod def setup_class(cls): super(TestWbIntegration, cls).setup_class('config_test.yaml') def _assert_basic_html(self, resp): assert resp.status_int == 200 assert resp.content_type == 'text/html' assert resp.content_length > 0 def _assert_basic_text(self, resp): assert resp.status_int == 200 assert resp.content_type == 'text/plain' assert resp.content_length > 0 def test_home(self): resp = self.testapp.get('/') self._assert_basic_html(resp) assert '/pywb' in resp.text def test_pywb_root(self): resp = self.testapp.get('/pywb/') self._assert_basic_html(resp) assert 'Search' in resp.text def test_pywb_root_head(self): resp = self.testapp.head('/pywb/') assert resp.content_type == 'text/html' assert resp.status_int == 200 def test_pywb_invalid_path(self): resp = self.testapp.head('/blah/', status=404) assert resp.content_type == 'text/html' assert resp.status_int == 404 def test_calendar_query(self): resp = self.testapp.get('/pywb/*/iana.org') self._assert_basic_html(resp) # 3 Captures + header assert len(resp.html.find_all('tr')) == 4 def test_calendar_query_2(self): # unfiltered collection resp = self.testapp.get('/pywb/*/http://www.iana.org/_css/2013.1/screen.css') self._assert_basic_html(resp) # 17 Captures + header assert len(resp.html.find_all('tr')) == 18 # filtered collection #resp = self.testapp.get('/pywb-filt/*/http://www.iana.org/_css/2013.1/screen.css') #self._assert_basic_html(resp) # 1 Capture (filtered) + header #assert len(resp.html.find_all('tr')) == 2 def test_calendar_query_fuzzy_match(self): # fuzzy match removing _= according to standard rules.yaml resp = self.testapp.get('/pywb/*/http://www.iana.org/_css/2013.1/screen.css?_=3141592653') self._assert_basic_html(resp) # 17 Captures + header assert len(resp.html.find_all('tr')) == 18 def test_calendar_not_found(self): # query with no results resp = self.testapp.get('/pywb/*/http://not-exist.example.com') self._assert_basic_html(resp) assert 'No captures found' in resp.text, resp.text assert len(resp.html.find_all('tr')) == 0 def _test_cdx_query(self): resp = self.testapp.get('/pywb/cdx_/*/http://www.iana.org/') self._assert_basic_text(resp) assert '20140127171238 http://www.iana.org/ warc/revisit - OSSAPWJ23L56IYVRW3GFEAR4MCJMGPTB' in resp # check for 3 cdx lines (strip final newline) actual_len = len(str(resp.text).rstrip().split('\n')) assert actual_len == 3, actual_len def test_replay_top_frame(self): resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/') assert '