from .base_config_test import BaseConfigTest, fmod from .memento_fixture import * from warcio.timeutils import timestamp_to_http_date # ============================================================================ class TestMemento(MementoMixin, BaseConfigTest): @classmethod def setup_class(cls): super(TestMemento, cls).setup_class('config_test.yaml') def _assert_memento(self, resp, url, ts, fmod, dt=''): dt = dt or timestamp_to_http_date(ts) links = self.get_links(resp) assert MEMENTO_DATETIME in resp.headers assert resp.headers[MEMENTO_DATETIME] == dt # memento link memento_link = self.make_memento_link(url, ts, dt, fmod) assert memento_link in links # content location assert '/pywb/{1}{0}/{2}'.format(fmod, ts, url) in resp.headers['Content-Location'] # content location part of memento link assert resp.headers['Content-Location'] in memento_link # timegate link assert self.make_timegate_link(url, fmod) in links # timemap link assert self.make_timemap_link(url) in links # original assert self.make_original_link(url) in links # Memento Pattern 2.2 (no redirect, 200 negotiation) def test_memento_top_frame(self): resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/') # Memento Headers # no vary header assert VARY not in resp.headers assert MEMENTO_DATETIME in resp.headers # memento link dt = 'Mon, 27 Jan 2014 17:12:38 GMT' url = 'http://www.iana.org/' links = self.get_links(resp) assert self.make_memento_link(url, '20140127171238', dt, 'mp_', include_coll=False) in links #timegate link assert self.make_timegate_link(url, 'mp_') in links # Body assert '"20140127171238"' in resp.text assert '"http://www.iana.org/"' in resp.text, resp.text def test_memento_content_replay_exact(self, fmod): resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod) self._assert_memento(resp, 'http://www.iana.org/', '20140127171238', fmod) assert VARY not in resp.headers # Body assert '"20140127171238"' in resp.text assert 'wombat.js' in resp.text assert 'new _WBWombat' in resp.text, resp.text assert '/pywb/20140127171238{0}/http://www.iana.org/time-zones"'.format(fmod) in resp.text def test_memento_at_timegate_latest(self, fmod): """ TimeGate with no Accept-Datetime header """ fmod_slash = fmod + '/' if fmod else '' resp = self.get('/pywb/{0}http://www.iana.org/_css/2013.1/screen.css', fmod_slash) assert resp.headers[VARY] == 'accept-datetime' self._assert_memento(resp, 'http://www.iana.org/_css/2013.1/screen.css', '20140127171239', fmod) def test_memento_at_timegate(self, fmod): """ TimeGate with Accept-Datetime header, not matching a memento exactly, no redirect """ dt = 'Sun, 26 Jan 2014 20:08:04 GMT' request_dt = 'Sun, 26 Jan 2014 20:08:00 GMT' headers = {ACCEPT_DATETIME: request_dt} fmod_slash = fmod + '/' if fmod else '' resp = self.get('/pywb/{0}http://www.iana.org/_css/2013.1/screen.css', fmod_slash, headers=headers) assert resp.headers[VARY] == 'accept-datetime' self._assert_memento(resp, 'http://www.iana.org/_css/2013.1/screen.css', '20140126200804', fmod, dt) def test_302_memento(self, fmod): """ Memento (capture) of a 302 response """ resp = self.get('/pywb/20140128051539{0}/http://www.iana.org/domains/example', fmod) assert resp.status_int == 302 assert VARY not in resp.headers self._assert_memento(resp, 'http://www.iana.org/domains/example', '20140128051539', fmod) def test_timemap(self): """ Test application/link-format timemap """ resp = self.testapp.get('/pywb/timemap/link/http://example.com?example=1') assert resp.status_int == 200 assert resp.content_type == LINK_FORMAT resp.charset = 'utf-8' exp = """\ ; rel="self"; type="application/link-format"; from="Fri, 03 Jan 2014 03:03:21 GMT", ; rel="timegate", ; rel="original", ; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:21 GMT"; collection="pywb", ; rel="memento"; datetime="Fri, 03 Jan 2014 03:03:41 GMT"; collection="pywb" """ assert exp == resp.text def test_timemap_cdxj(self): """ Test test/x-cdxj timemap """ resp = self.testapp.get('/pywb/timemap/cdxj/http://example.com?example=1') assert resp.status_int == 200 assert resp.content_type == 'text/x-cdxj' resp.charset = 'utf-8' exp = """\ com,example)/?example=1 20140103030321 {"url": "http://example.com?example=1", "mime": "text/html", "status": "200", "digest": "B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A", "redirect": "-", "robotflags": "-", "length": "1043", "offset": "333", "filename": "example.warc.gz", "source": "pywb:example.cdx", "source-coll": "pywb"} com,example)/?example=1 20140103030341 {"url": "http://example.com?example=1", "mime": "warc/revisit", "status": "-", "digest": "B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A", "redirect": "-", "robotflags": "-", "length": "553", "offset": "1864", "filename": "example.warc.gz", "source": "pywb:example.cdx", "source-coll": "pywb"} """ assert exp == resp.text def test_timemap_2(self): """ Test application/link-format timemap total count """ resp = self.testapp.get('/pywb/timemap/link/http://example.com') assert resp.status_int == 200 assert resp.content_type == LINK_FORMAT lines = resp.text.split('\n') assert len(lines) == 7 def test_timemap_error_not_found(self): resp = self.testapp.get('/pywb/timemap/link/http://example.com/x-not-found', status=404) assert resp.body == b'' def test_timemap_error_invalid_format(self): resp = self.testapp.get('/pywb/timemap/foo/http://example.com', status=400) assert resp.json == {'message': 'output=foo not supported'} def test_error_bad_accept_datetime(self): """ 400 response for bad accept_datetime """ headers = {ACCEPT_DATETIME: 'Sun'} resp = self.testapp.get('/pywb/http://www.iana.org/_css/2013.1/screen.css', headers=headers, status=400) assert resp.status_int == 400