2017-02-27 19:07:51 -08:00
|
|
|
from .base_config_test import BaseConfigTest
|
2014-04-02 13:16:54 -07:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
from pywb.cdx.cdxobject import CDXObject
|
2014-02-05 10:10:33 -08:00
|
|
|
|
2014-03-02 19:26:06 -08:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
# ============================================================================
|
|
|
|
class TestWbIntegration(BaseConfigTest):
|
|
|
|
@classmethod
|
|
|
|
def setup_class(cls):
|
|
|
|
super(TestWbIntegration, cls).setup_class('config_test.yaml')
|
2014-01-31 19:41:44 -08:00
|
|
|
|
|
|
|
def _assert_basic_html(self, resp):
|
|
|
|
assert resp.status_int == 200
|
|
|
|
assert resp.content_type == 'text/html'
|
|
|
|
assert resp.content_length > 0
|
|
|
|
|
|
|
|
def _assert_basic_text(self, resp):
|
|
|
|
assert resp.status_int == 200
|
|
|
|
assert resp.content_type == 'text/plain'
|
|
|
|
assert resp.content_length > 0
|
|
|
|
|
|
|
|
def test_home(self):
|
|
|
|
resp = self.testapp.get('/')
|
|
|
|
self._assert_basic_html(resp)
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '/pywb' in resp.text
|
2014-01-31 19:41:44 -08:00
|
|
|
|
|
|
|
def test_pywb_root(self):
|
|
|
|
resp = self.testapp.get('/pywb/')
|
|
|
|
self._assert_basic_html(resp)
|
2016-02-23 13:26:53 -08:00
|
|
|
assert 'Search' in resp.text
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2014-04-02 13:16:54 -07:00
|
|
|
def test_pywb_root_head(self):
|
|
|
|
resp = self.testapp.head('/pywb/')
|
|
|
|
assert resp.content_type == 'text/html'
|
|
|
|
assert resp.status_int == 200
|
|
|
|
|
|
|
|
def test_pywb_invalid_path(self):
|
|
|
|
resp = self.testapp.head('/blah/', status=404)
|
|
|
|
assert resp.content_type == 'text/html'
|
|
|
|
assert resp.status_int == 404
|
|
|
|
|
2014-01-31 19:41:44 -08:00
|
|
|
def test_calendar_query(self):
|
|
|
|
resp = self.testapp.get('/pywb/*/iana.org')
|
|
|
|
self._assert_basic_html(resp)
|
|
|
|
# 3 Captures + header
|
|
|
|
assert len(resp.html.find_all('tr')) == 4
|
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def test_calendar_query_2(self):
|
2014-02-06 17:28:08 -08:00
|
|
|
# unfiltered collection
|
|
|
|
resp = self.testapp.get('/pywb/*/http://www.iana.org/_css/2013.1/screen.css')
|
|
|
|
self._assert_basic_html(resp)
|
|
|
|
# 17 Captures + header
|
|
|
|
assert len(resp.html.find_all('tr')) == 18
|
|
|
|
|
|
|
|
# filtered collection
|
2017-02-27 19:07:51 -08:00
|
|
|
#resp = self.testapp.get('/pywb-filt/*/http://www.iana.org/_css/2013.1/screen.css')
|
|
|
|
#self._assert_basic_html(resp)
|
2014-02-06 17:28:08 -08:00
|
|
|
# 1 Capture (filtered) + header
|
2017-02-27 19:07:51 -08:00
|
|
|
#assert len(resp.html.find_all('tr')) == 2
|
2014-02-06 17:28:08 -08:00
|
|
|
|
2014-02-18 14:47:48 -08:00
|
|
|
def test_calendar_query_fuzzy_match(self):
|
|
|
|
# fuzzy match removing _= according to standard rules.yaml
|
|
|
|
resp = self.testapp.get('/pywb/*/http://www.iana.org/_css/2013.1/screen.css?_=3141592653')
|
|
|
|
self._assert_basic_html(resp)
|
|
|
|
# 17 Captures + header
|
|
|
|
assert len(resp.html.find_all('tr')) == 18
|
|
|
|
|
2015-01-24 12:32:50 -08:00
|
|
|
def test_calendar_not_found(self):
|
|
|
|
# query with no results
|
|
|
|
resp = self.testapp.get('/pywb/*/http://not-exist.example.com')
|
|
|
|
self._assert_basic_html(resp)
|
2016-02-23 13:26:53 -08:00
|
|
|
assert 'No captures found' in resp.text, resp.text
|
2015-01-24 12:32:50 -08:00
|
|
|
assert len(resp.html.find_all('tr')) == 0
|
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def _test_cdx_query(self):
|
2014-01-31 19:41:44 -08:00
|
|
|
resp = self.testapp.get('/pywb/cdx_/*/http://www.iana.org/')
|
|
|
|
self._assert_basic_text(resp)
|
|
|
|
|
|
|
|
assert '20140127171238 http://www.iana.org/ warc/revisit - OSSAPWJ23L56IYVRW3GFEAR4MCJMGPTB' in resp
|
|
|
|
# check for 3 cdx lines (strip final newline)
|
2016-02-23 13:26:53 -08:00
|
|
|
actual_len = len(str(resp.text).rstrip().split('\n'))
|
2014-01-31 19:41:44 -08:00
|
|
|
assert actual_len == 3, actual_len
|
|
|
|
|
2014-06-14 18:26:19 -07:00
|
|
|
def test_replay_top_frame(self):
|
2017-02-27 19:07:51 -08:00
|
|
|
resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/')
|
2014-06-14 18:26:19 -07:00
|
|
|
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '<iframe ' in resp.text
|
2017-02-27 19:07:51 -08:00
|
|
|
assert '/pywb/20140127171238mp_/http://www.iana.org/' in resp.text, resp.text
|
2014-06-14 18:26:19 -07:00
|
|
|
|
|
|
|
def test_replay_content(self):
|
2017-02-27 19:07:51 -08:00
|
|
|
resp = self.testapp.get('/pywb/20140127171238mp_/http://www.iana.org/')
|
2014-06-14 18:26:19 -07:00
|
|
|
self._assert_basic_html(resp)
|
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
assert '"20140127171238"' in resp.text, resp.text
|
2016-02-23 13:26:53 -08:00
|
|
|
assert 'wb.js' in resp.text
|
|
|
|
assert 'new _WBWombat' in resp.text, resp.text
|
2017-02-27 19:07:51 -08:00
|
|
|
assert '/pywb/20140127171238mp_/http://www.iana.org/time-zones"' in resp.text
|
2014-06-14 18:26:19 -07:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
#def test_replay_non_frame_content(self):
|
|
|
|
# resp = self.testapp.get('/pywb-nonframe/20140127171238/http://www.iana.org/')
|
|
|
|
# self._assert_basic_html(resp)
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
# assert '"20140127171238"' in resp.text
|
|
|
|
# assert 'wb.js' in resp.text
|
|
|
|
# assert '/pywb-nonframe/20140127171238/http://www.iana.org/time-zones"' in resp.text
|
2014-02-27 18:43:55 -08:00
|
|
|
|
2017-03-20 14:41:12 -07:00
|
|
|
def test_replay_fuzzy_1(self):
|
|
|
|
resp = self.testapp.get('/pywb/20140127171238mp_/http://www.iana.org/?_=123')
|
2017-04-04 18:10:49 -07:00
|
|
|
assert resp.status_int == 307
|
2017-03-20 14:41:12 -07:00
|
|
|
assert resp.headers['Location'].endswith('/pywb/20140127171238mp_/http://www.iana.org/')
|
|
|
|
|
|
|
|
def test_replay_no_fuzzy_match(self):
|
|
|
|
resp = self.testapp.get('/pywb/20140127171238mp_/http://www.iana.org/?foo=bar', status=404)
|
|
|
|
assert resp.status_int == 404
|
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
#def test_replay_non_surt(self):
|
|
|
|
# resp = self.testapp.get('/pywb-nosurt/20140103030321/http://example.com?example=1')
|
|
|
|
# self._assert_basic_html(resp)
|
2014-05-16 21:16:50 -07:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
# assert '"20140103030321"' in resp.text
|
|
|
|
# assert 'wb.js' in resp.text
|
|
|
|
# assert '/pywb-nosurt/20140103030321/http://www.iana.org/domains/example' in resp.text
|
2014-03-04 20:12:09 +00:00
|
|
|
|
2017-04-04 18:10:49 -07:00
|
|
|
def test_no_slash_redir_1(self):
|
|
|
|
resp = self.testapp.get('/pywb/20140103030321mp_/http://example.com')
|
|
|
|
assert resp.status_int == 307
|
|
|
|
assert resp.headers['Location'].endswith('/pywb/20140103030321mp_/http://example.com/')
|
|
|
|
|
|
|
|
def test_no_slash_redir_2(self):
|
|
|
|
resp = self.testapp.get('/pywb/20140103030321mp_/http://example.com?example=1')
|
|
|
|
assert resp.status_int == 307
|
|
|
|
assert resp.headers['Location'].endswith('/pywb/20140103030321mp_/http://example.com/?example=1')
|
|
|
|
|
2015-03-19 11:20:40 -07:00
|
|
|
def test_replay_cdxj(self):
|
2017-04-04 18:10:49 -07:00
|
|
|
resp = self.testapp.get('/pywb-cdxj/20140103030321mp_/http://example.com/?example=1')
|
2015-03-19 11:20:40 -07:00
|
|
|
self._assert_basic_html(resp)
|
|
|
|
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"20140103030321"' in resp.text
|
|
|
|
assert 'wb.js' in resp.text
|
2017-02-27 19:07:51 -08:00
|
|
|
assert '/pywb-cdxj/20140103030321mp_/http://www.iana.org/domains/example' in resp.text
|
2015-03-19 11:20:40 -07:00
|
|
|
|
2015-03-19 13:29:29 -07:00
|
|
|
def test_replay_cdxj_revisit(self):
|
2017-04-04 18:10:49 -07:00
|
|
|
resp = self.testapp.get('/pywb-cdxj/20140103030341mp_/http://example.com/?example=1')
|
2015-03-19 13:29:29 -07:00
|
|
|
self._assert_basic_html(resp)
|
|
|
|
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"20140103030341"' in resp.text
|
|
|
|
assert 'wb.js' in resp.text
|
2017-02-27 19:07:51 -08:00
|
|
|
assert '/pywb-cdxj/20140103030341mp_/http://www.iana.org/domains/example' in resp.text
|
2015-03-19 13:29:29 -07:00
|
|
|
|
2014-06-25 12:11:26 -07:00
|
|
|
def test_zero_len_revisit(self):
|
2017-04-04 18:10:49 -07:00
|
|
|
resp = self.testapp.get('/pywb/20140603030341mp_/http://example.com/?example=2')
|
2014-06-25 12:11:26 -07:00
|
|
|
self._assert_basic_html(resp)
|
|
|
|
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"20140603030341"' in resp.text
|
|
|
|
assert 'wb.js' in resp.text
|
2017-02-27 19:07:51 -08:00
|
|
|
assert '/pywb/20140603030341mp_/http://www.iana.org/domains/example' in resp.text
|
2014-06-25 12:11:26 -07:00
|
|
|
|
2014-03-04 20:12:09 +00:00
|
|
|
def test_replay_url_agnostic_revisit(self):
|
2017-02-27 19:07:51 -08:00
|
|
|
resp = self.testapp.get('/pywb/20130729195151mp_/http://www.example.com/')
|
2014-03-04 20:12:09 +00:00
|
|
|
self._assert_basic_html(resp)
|
|
|
|
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"20130729195151"' in resp.text
|
|
|
|
assert 'wb.js' in resp.text
|
2017-02-27 19:07:51 -08:00
|
|
|
assert '/pywb/20130729195151mp_/http://www.iana.org/domains/example"' in resp.text
|
2014-03-04 20:12:09 +00:00
|
|
|
|
2014-12-23 14:34:59 -08:00
|
|
|
def test_video_info_not_found(self):
|
|
|
|
# not actually archived, but ensure video info path is tested
|
|
|
|
resp = self.testapp.get('/pywb/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M', status=404)
|
|
|
|
assert resp.status_int == 404
|
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def _test_replay_cdx_mod(self):
|
2014-03-27 14:46:59 -07:00
|
|
|
resp = self.testapp.get('/pywb/20140127171239cdx_/http://www.iana.org/_css/2013.1/print.css')
|
|
|
|
self._assert_basic_text(resp)
|
|
|
|
|
2016-02-23 13:26:53 -08:00
|
|
|
lines = resp.text.rstrip().split('\n')
|
2014-03-27 14:46:59 -07:00
|
|
|
assert len(lines) == 17
|
|
|
|
assert lines[0].startswith('org,iana)/_css/2013.1/print.css 20140127171239')
|
|
|
|
|
|
|
|
|
2014-07-29 12:20:22 -07:00
|
|
|
def test_replay_banner_only(self):
|
|
|
|
resp = self.testapp.get('/pywb/20140126201054bn_/http://www.iana.org/domains/reserved')
|
|
|
|
|
|
|
|
# wb.js header insertion
|
2016-02-23 13:26:53 -08:00
|
|
|
assert 'wb.js' in resp.text
|
2014-07-29 12:20:22 -07:00
|
|
|
|
|
|
|
# no wombat present
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '_WBWombat' not in resp.text
|
2014-07-29 12:20:22 -07:00
|
|
|
|
|
|
|
# url not rewritten
|
2016-02-23 13:26:53 -08:00
|
|
|
#assert '"http://www.iana.org/domains/example"' in resp.text
|
|
|
|
assert '"/_css/2013.1/screen.css"' in resp.text
|
2014-07-29 12:20:22 -07:00
|
|
|
|
2014-02-27 18:43:55 -08:00
|
|
|
def test_replay_identity_1(self):
|
2017-04-04 18:10:49 -07:00
|
|
|
resp = self.testapp.get('/pywb/20140127171251id_/http://example.com/')
|
2014-03-22 11:30:51 -07:00
|
|
|
|
|
|
|
# no wb header insertion
|
2016-02-23 13:26:53 -08:00
|
|
|
assert 'wb.js' not in resp.text
|
2014-03-22 11:30:51 -07:00
|
|
|
|
2014-12-23 11:09:19 -08:00
|
|
|
assert resp.content_length == 1270, resp.content_length
|
|
|
|
|
2014-03-22 11:30:51 -07:00
|
|
|
# original unrewritten url present
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"http://www.iana.org/domains/example"' in resp.text
|
2014-03-22 11:30:51 -07:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def _test_replay_range_cache_content(self):
|
2014-12-23 11:09:19 -08:00
|
|
|
headers = [('Range', 'bytes=0-200')]
|
2014-12-23 21:19:39 -08:00
|
|
|
resp = self.testapp.get('/pywb/20140127171250id_/http://example.com', headers=headers)
|
2014-12-23 11:09:19 -08:00
|
|
|
|
|
|
|
assert resp.status_int == 206
|
|
|
|
assert resp.headers['Accept-Ranges'] == 'bytes'
|
|
|
|
assert resp.headers['Content-Range'] == 'bytes 0-200/1270', resp.headers['Content-Range']
|
|
|
|
assert resp.content_length == 201, resp.content_length
|
|
|
|
|
2016-02-23 13:26:53 -08:00
|
|
|
assert 'wb.js' not in resp.text
|
2014-12-23 11:09:19 -08:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def _test_replay_content_ignore_range(self):
|
2014-12-23 14:34:59 -08:00
|
|
|
headers = [('Range', 'bytes=0-200')]
|
|
|
|
resp = self.testapp.get('/pywb-norange/20140127171251id_/http://example.com', headers=headers)
|
|
|
|
|
|
|
|
# range request ignored
|
|
|
|
assert resp.status_int == 200
|
|
|
|
|
|
|
|
# full response
|
|
|
|
assert resp.content_length == 1270, resp.content_length
|
|
|
|
|
|
|
|
# identity, no header insertion
|
2016-02-23 13:26:53 -08:00
|
|
|
assert 'wb.js' not in resp.text
|
2014-12-23 14:34:59 -08:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def _test_replay_range_cache_content_bound_end(self):
|
2014-12-23 11:09:19 -08:00
|
|
|
headers = [('Range', 'bytes=10-10000')]
|
2014-12-23 14:34:59 -08:00
|
|
|
resp = self.testapp.get('/pywb/20140127171251id_/http://example.com', headers=headers)
|
2014-12-23 11:09:19 -08:00
|
|
|
|
|
|
|
assert resp.status_int == 206
|
|
|
|
assert resp.headers['Accept-Ranges'] == 'bytes'
|
|
|
|
assert resp.headers['Content-Range'] == 'bytes 10-1269/1270', resp.headers['Content-Range']
|
|
|
|
assert resp.content_length == 1260, resp.content_length
|
2016-02-23 13:26:53 -08:00
|
|
|
assert len(resp.text) == resp.content_length
|
2014-12-23 11:09:19 -08:00
|
|
|
|
2016-02-23 13:26:53 -08:00
|
|
|
assert 'wb.js' not in resp.text
|
2014-12-23 11:09:19 -08:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def _test_replay_redir_no_cache(self):
|
2014-12-23 11:09:19 -08:00
|
|
|
headers = [('Range', 'bytes=10-10000')]
|
|
|
|
# Range ignored
|
2014-12-23 14:34:59 -08:00
|
|
|
resp = self.testapp.get('/pywb/20140126200927/http://www.iana.org/domains/root/db/', headers=headers)
|
2014-12-23 11:09:19 -08:00
|
|
|
assert resp.status_int == 302
|
2017-02-27 19:07:51 -08:00
|
|
|
assert resp.content_length == 0
|
2014-12-23 11:09:19 -08:00
|
|
|
|
2014-03-22 11:30:51 -07:00
|
|
|
def test_replay_identity_2_arcgz(self):
|
2017-04-04 18:10:49 -07:00
|
|
|
resp = self.testapp.get('/pywb/20140216050221id_/http://arc.gz.test.example.com/')
|
2014-03-22 11:30:51 -07:00
|
|
|
|
|
|
|
# no wb header insertion
|
2016-02-23 13:26:53 -08:00
|
|
|
assert 'wb.js' not in resp.text
|
2014-03-22 11:30:51 -07:00
|
|
|
|
|
|
|
# original unrewritten url present
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"http://www.iana.org/domains/example"' in resp.text
|
2014-03-22 11:30:51 -07:00
|
|
|
|
|
|
|
def test_replay_identity_2_arc(self):
|
2017-04-04 18:10:49 -07:00
|
|
|
resp = self.testapp.get('/pywb/20140216050221id_/http://arc.test.example.com/')
|
2014-02-27 18:43:55 -08:00
|
|
|
|
|
|
|
# no wb header insertion
|
2016-02-23 13:26:53 -08:00
|
|
|
assert 'wb.js' not in resp.text
|
2014-02-27 18:43:55 -08:00
|
|
|
|
|
|
|
# original unrewritten url present
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"http://www.iana.org/domains/example"' in resp.text
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2014-02-20 11:53:08 -08:00
|
|
|
def test_replay_content_length_1(self):
|
|
|
|
# test larger file, rewritten file (svg!)
|
2017-02-27 19:07:51 -08:00
|
|
|
resp = self.testapp.get('/pywb/20140126200654mp_/http://www.iana.org/_img/2013.1/rir-map.svg')
|
2016-02-23 13:26:53 -08:00
|
|
|
assert resp.headers['Content-Length'] == str(len(resp.text))
|
2014-02-20 11:53:08 -08:00
|
|
|
|
2014-05-16 22:43:53 -07:00
|
|
|
def test_replay_css_mod(self):
|
|
|
|
resp = self.testapp.get('/pywb/20140127171239cs_/http://www.iana.org/_css/2013.1/screen.css')
|
|
|
|
assert resp.status_int == 200
|
|
|
|
assert resp.content_type == 'text/css'
|
|
|
|
|
|
|
|
def test_replay_js_mod(self):
|
|
|
|
# an empty js file
|
|
|
|
resp = self.testapp.get('/pywb/20140126201054js_/http://www.iana.org/_js/2013.1/iana.js')
|
|
|
|
assert resp.status_int == 200
|
|
|
|
assert resp.content_length == 0
|
|
|
|
assert resp.content_type == 'application/x-javascript'
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
#def test_redirect_exact(self):
|
|
|
|
# resp = self.testapp.get('/pywb/20140127171237/http://www.iana.org/')
|
|
|
|
# assert resp.status_int == 302
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
# assert resp.headers['Location'].endswith('/pywb/20140127171238/http://iana.org')
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def test_replay_non_exact(self):
|
2015-02-17 17:47:30 -08:00
|
|
|
# non-exact mode, don't redirect to exact capture
|
2017-02-27 19:07:51 -08:00
|
|
|
resp = self.testapp.get('/pywb/20140127171237mp_/http://www.iana.org/')
|
2015-02-17 17:47:30 -08:00
|
|
|
assert resp.status_int == 200
|
|
|
|
|
|
|
|
self._assert_basic_html(resp)
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"20140127171237"' in resp.text
|
2015-02-17 17:47:30 -08:00
|
|
|
# actual timestamp set in JS
|
2016-02-23 13:26:53 -08:00
|
|
|
assert 'timestamp = "20140127171238"' in resp.text
|
2017-02-27 19:07:51 -08:00
|
|
|
assert '/pywb/20140127171237mp_/http://www.iana.org/about/' in resp.text
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def test_latest_replay(self):
|
|
|
|
resp = self.testapp.get('/pywb/mp_/http://example.com/')
|
|
|
|
self._assert_basic_html(resp)
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
assert resp.headers['Content-Location'].endswith('/20140127171251mp_/http://example.com')
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"20140127171251"' in resp.text
|
2017-02-27 19:07:51 -08:00
|
|
|
assert '/pywb/mp_/http://www.iana.org/domains/example' in resp.text
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def test_replay_non_latest_content_location_ts(self):
|
|
|
|
resp = self.testapp.get('/pywb/mp_/http://example.com/')
|
2015-07-19 00:11:25 -07:00
|
|
|
assert resp.status_int == 200
|
2015-02-17 17:47:30 -08:00
|
|
|
|
2015-07-19 00:11:25 -07:00
|
|
|
assert resp.headers['Content-Location'].endswith('/http://example.com')
|
2015-02-17 17:47:30 -08:00
|
|
|
|
|
|
|
# extract ts, which should be current time
|
2015-07-19 00:11:25 -07:00
|
|
|
ts = resp.headers['Content-Location'].rsplit('/http://')[0].rsplit('/', 1)[-1]
|
2017-02-27 19:07:51 -08:00
|
|
|
assert ts == '20140127171251mp_'
|
|
|
|
|
|
|
|
ts = ts[:-3]
|
2015-07-19 00:11:25 -07:00
|
|
|
#resp = resp.follow()
|
2015-02-17 17:47:30 -08:00
|
|
|
|
2015-07-19 00:11:25 -07:00
|
|
|
#self._assert_basic_html(resp)
|
2015-02-17 17:47:30 -08:00
|
|
|
|
|
|
|
# ensure the current ts is present in the links
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"{0}"'.format(ts) in resp.text
|
2017-02-27 19:07:51 -08:00
|
|
|
assert '/pywb/mp_/http://www.iana.org/domains/example' in resp.text
|
2015-02-17 17:47:30 -08:00
|
|
|
|
|
|
|
# ensure ts is current ts
|
2015-07-19 00:11:25 -07:00
|
|
|
#assert timestamp_now() >= ts, ts
|
2015-02-17 17:47:30 -08:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def test_refer_redirect(self):
|
2014-08-20 02:02:47 -04:00
|
|
|
# webtest uses Host: localhost:80 by default
|
2017-02-27 19:07:51 -08:00
|
|
|
target = 'http://localhost:80/pywb/2014mp_/http://iana.org/_css/2013.1/screen.css'
|
2014-02-08 20:07:16 -08:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
resp = self.testapp.get('/_css/2013.1/screen.css', headers = [('Referer', 'http://localhost:80/pywb/2014mp_/http://iana.org/')])
|
|
|
|
assert resp.status_int == 307
|
2014-02-08 20:07:16 -08:00
|
|
|
assert resp.headers['Location'] == target, resp.headers['Location']
|
|
|
|
|
|
|
|
resp = resp.follow()
|
|
|
|
assert resp.status_int == 200
|
2017-02-27 19:07:51 -08:00
|
|
|
assert resp.headers['Content-Location'].endswith('/pywb/20140127171239mp_/http://www.iana.org/_css/2013.1/screen.css')
|
2014-02-08 20:07:16 -08:00
|
|
|
assert resp.content_type == 'text/css'
|
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def test_non_exact_replay_skip_self_redir(self):
|
|
|
|
uri = '/pywb/20140126200927mp_/http://www.iana.org/domains/root/db'
|
|
|
|
resp = self.testapp.get(uri)
|
|
|
|
assert resp.status_int == 200
|
|
|
|
assert resp.headers['Content-Location'].endswith('/pywb/20140126200928mp_/http://www.iana.org/domains/root/db')
|
2014-02-08 20:07:16 -08:00
|
|
|
|
2014-10-17 08:54:03 -07:00
|
|
|
#def test_referrer_self_redirect(self):
|
2014-10-18 11:21:07 -07:00
|
|
|
# uri = '/pywb/20140127171239/http://www.iana.org/_css/2013.1/screen.css'
|
2014-10-17 08:54:03 -07:00
|
|
|
# host = 'somehost:8082'
|
|
|
|
# referrer = 'http://' + host + uri
|
2014-02-23 23:31:54 -08:00
|
|
|
|
|
|
|
# capture is normally a 200
|
2014-10-17 08:54:03 -07:00
|
|
|
# resp = self.testapp.get(uri)
|
|
|
|
# assert resp.status_int == 200
|
2014-02-23 23:31:54 -08:00
|
|
|
|
|
|
|
# redirect causes skip of this capture, redirect to next
|
2014-10-17 08:54:03 -07:00
|
|
|
# resp = self.testapp.get(uri, headers = [('Referer', referrer), ('Host', host)], status = 302)
|
|
|
|
# assert resp.status_int == 302
|
2014-02-23 23:31:54 -08:00
|
|
|
|
2014-06-25 12:32:57 -07:00
|
|
|
def test_not_existant_warc_other_capture(self):
|
2017-04-04 18:10:49 -07:00
|
|
|
resp = self.testapp.get('/pywb/20140703030321mp_/http://example.com/?example=2')
|
2017-02-27 19:07:51 -08:00
|
|
|
assert resp.status_int == 200
|
|
|
|
assert resp.headers['Content-Location'].endswith('/pywb/20140603030341mp_/http://example.com?example=2')
|
2014-06-25 12:32:57 -07:00
|
|
|
|
|
|
|
def test_missing_revisit_other_capture(self):
|
2017-04-04 18:10:49 -07:00
|
|
|
resp = self.testapp.get('/pywb/20140603030351mp_/http://example.com/?example=2')
|
2017-02-27 19:07:51 -08:00
|
|
|
assert resp.status_int == 200
|
|
|
|
assert resp.headers['Content-Location'].endswith('/pywb/20140603030341mp_/http://example.com?example=2')
|
2014-06-25 12:32:57 -07:00
|
|
|
|
|
|
|
def test_not_existant_warc_no_other(self):
|
2017-04-04 18:10:49 -07:00
|
|
|
resp = self.testapp.get('/pywb/20140703030321mp_/http://example.com/?example=3', status=503)
|
2014-06-25 12:32:57 -07:00
|
|
|
assert resp.status_int == 503
|
|
|
|
|
|
|
|
def test_missing_revisit_no_other(self):
|
2017-04-04 18:10:49 -07:00
|
|
|
resp = self.testapp.get('/pywb/20140603030351mp_/http://example.com/?example=3', status=503)
|
2014-06-25 12:32:57 -07:00
|
|
|
assert resp.status_int == 503
|
2014-02-23 23:31:54 -08:00
|
|
|
|
2014-07-20 18:25:47 -07:00
|
|
|
def test_live_frame(self):
|
2014-10-18 11:21:07 -07:00
|
|
|
resp = self.testapp.get('/live/http://example.com/?test=test')
|
2014-07-20 18:25:47 -07:00
|
|
|
assert resp.status_int == 200
|
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def _test_live_redir_1(self):
|
2015-10-14 20:34:46 -07:00
|
|
|
resp = self.testapp.get('/live/*/http://example.com/?test=test')
|
|
|
|
assert resp.status_int == 302
|
|
|
|
assert resp.headers['Location'].endswith('/live/http://example.com/?test=test')
|
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def _test_live_redir_2(self):
|
2015-10-14 20:34:46 -07:00
|
|
|
resp = self.testapp.get('/live/2010-2011/http://example.com/?test=test')
|
|
|
|
assert resp.status_int == 302
|
|
|
|
assert resp.headers['Location'].endswith('/live/http://example.com/?test=test')
|
|
|
|
|
2014-07-20 18:25:47 -07:00
|
|
|
def test_live_fallback(self):
|
2017-02-27 19:07:51 -08:00
|
|
|
resp = self.testapp.get('/pywb-fallback/mp_/http://example.com/?test=test')
|
2014-07-20 18:25:47 -07:00
|
|
|
assert resp.status_int == 200
|
|
|
|
|
2014-06-10 19:21:46 -07:00
|
|
|
def test_post_1(self):
|
2017-02-27 19:07:51 -08:00
|
|
|
resp = self.testapp.post('/pywb/mp_/httpbin.org/post', {'foo': 'bar', 'test': 'abc'})
|
2014-06-11 20:12:05 -07:00
|
|
|
|
|
|
|
# no redirects for POST, as some browsers (FF) show modal confirmation dialog!
|
|
|
|
#assert resp.status_int == 307
|
|
|
|
#assert resp.headers['Location'].endswith('/pywb/20140610000859/http://httpbin.org/post')
|
2014-06-11 11:17:06 -07:00
|
|
|
|
|
|
|
# XX webtest doesn't support 307 redirect of post
|
|
|
|
#resp = resp.follow()
|
2014-06-11 20:12:05 -07:00
|
|
|
#resp = self.testapp.post(resp.headers['Location'], {'foo': 'bar', 'test': 'abc'})
|
2014-06-10 19:21:46 -07:00
|
|
|
|
|
|
|
assert resp.status_int == 200
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"foo": "bar"' in resp.text
|
|
|
|
assert '"test": "abc"' in resp.text
|
2014-06-10 19:21:46 -07:00
|
|
|
|
2014-06-11 11:17:06 -07:00
|
|
|
def test_post_2(self):
|
2017-02-27 19:07:51 -08:00
|
|
|
resp = self.testapp.post('/pywb/20140610001255mp_/http://httpbin.org/post?foo=bar', {'data': '^'})
|
2014-06-11 11:17:06 -07:00
|
|
|
assert resp.status_int == 200
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"data": "^"' in resp.text
|
2014-06-10 19:21:46 -07:00
|
|
|
|
2017-03-20 14:41:12 -07:00
|
|
|
def test_post_invalid(self):
|
|
|
|
# not json
|
|
|
|
resp = self.testapp.post_json('/pywb/20140610001255mp_/http://httpbin.org/post?foo=bar', {'data': '^'}, status=404)
|
|
|
|
assert resp.status_int == 404
|
2017-03-14 11:39:36 -07:00
|
|
|
|
2017-03-20 14:41:12 -07:00
|
|
|
#def test_post_fuzzy_match(self):
|
|
|
|
# resp = self.testapp.post('/pywb/20140610001255mp_/http://httpbin.org/post?foo=bar', {'data': 'x'})
|
|
|
|
# assert resp.status_int == 200
|
|
|
|
# assert '"A": "1"' in resp.text
|
|
|
|
# assert '"B": "[]"' in resp.text
|
|
|
|
# assert '"C": "3"' in resp.text
|
2014-08-04 21:59:46 -07:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def test_post_referer_redirect(self):
|
|
|
|
# allowing 307 redirects
|
|
|
|
resp = self.testapp.post('/post', {'foo': 'bar', 'test': 'abc'}, headers=[('Referer', 'http://localhost:80/pywb/2014mp_/http://httpbin.org/foo')])
|
|
|
|
assert resp.status_int == 307
|
|
|
|
assert resp.headers['Location'].endswith('/pywb/2014mp_/http://httpbin.org/post')
|
2014-06-13 16:23:11 -07:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def _test_excluded_content(self):
|
|
|
|
resp = self.testapp.get('/pywb/mp_/http://www.iana.org/_img/bookmark_icon.ico', status=403)
|
2014-02-19 20:20:31 -08:00
|
|
|
assert resp.status_int == 403
|
2016-02-23 13:26:53 -08:00
|
|
|
assert 'Excluded' in resp.text
|
2014-02-19 20:20:31 -08:00
|
|
|
|
2015-01-24 12:32:50 -08:00
|
|
|
def test_replay_not_found(self):
|
2017-04-04 18:10:49 -07:00
|
|
|
resp = self.testapp.head('/pywb/mp_/http://not-exist.example.com/', status=404)
|
2015-01-24 12:32:50 -08:00
|
|
|
assert resp.content_type == 'text/html'
|
|
|
|
assert resp.status_int == 404
|
|
|
|
|
2014-02-07 19:32:58 -08:00
|
|
|
def test_static_content(self):
|
2017-02-27 19:07:51 -08:00
|
|
|
resp = self.testapp.get('/static/__pywb/wb.css')
|
2014-02-07 19:32:58 -08:00
|
|
|
assert resp.status_int == 200
|
|
|
|
assert resp.content_type == 'text/css'
|
|
|
|
assert resp.content_length > 0
|
|
|
|
|
2014-05-16 22:17:51 -07:00
|
|
|
def test_static_content_filewrapper(self):
|
|
|
|
from wsgiref.util import FileWrapper
|
2017-02-27 19:07:51 -08:00
|
|
|
resp = self.testapp.get('/static/__pywb/wb.css', extra_environ = {'wsgi.file_wrapper': FileWrapper})
|
2014-05-16 22:17:51 -07:00
|
|
|
assert resp.status_int == 200
|
|
|
|
assert resp.content_type == 'text/css'
|
|
|
|
assert resp.content_length > 0
|
|
|
|
|
|
|
|
def test_static_not_found(self):
|
2017-02-27 19:07:51 -08:00
|
|
|
resp = self.testapp.get('/static/__pywb/notfound.css', status = 404)
|
2014-05-16 22:17:51 -07:00
|
|
|
assert resp.status_int == 404
|
2014-02-07 19:32:58 -08:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def _test_cdx_server_filters(self):
|
2015-03-19 13:29:29 -07:00
|
|
|
resp = self.testapp.get('/pywb-cdx?url=http://www.iana.org/_css/2013.1/screen.css&filter=mime:warc/revisit&filter=filename:dupes.warc.gz')
|
2014-02-01 14:47:07 -08:00
|
|
|
self._assert_basic_text(resp)
|
2016-02-23 13:26:53 -08:00
|
|
|
actual_len = len(resp.text.rstrip().split('\n'))
|
2014-02-01 14:47:07 -08:00
|
|
|
assert actual_len == 1, actual_len
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def _test_cdx_server_advanced(self):
|
2014-02-01 14:47:07 -08:00
|
|
|
# combine collapsing, reversing and revisit resolving
|
2014-02-19 20:20:31 -08:00
|
|
|
resp = self.testapp.get('/pywb-cdx?url=http://www.iana.org/_css/2013.1/print.css&collapseTime=11&resolveRevisits=true&reverse=true')
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2014-02-12 13:16:07 -08:00
|
|
|
# convert back to CDXObject
|
2016-02-23 13:26:53 -08:00
|
|
|
cdxs = list(map(CDXObject, resp.body.rstrip().split(b'\n')))
|
2014-02-01 14:47:07 -08:00
|
|
|
assert len(cdxs) == 3, len(cdxs)
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2014-02-01 14:47:07 -08:00
|
|
|
# verify timestamps
|
2016-02-23 13:26:53 -08:00
|
|
|
timestamps = list(map(lambda cdx: cdx['timestamp'], cdxs))
|
2014-02-01 14:47:07 -08:00
|
|
|
assert timestamps == ['20140127171239', '20140126201054', '20140126200625']
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2014-02-01 14:47:07 -08:00
|
|
|
# verify orig filenames (2 revisits, one non)
|
2016-02-23 13:26:53 -08:00
|
|
|
origfilenames = list(map(lambda cdx: cdx['orig.filename'], cdxs))
|
2014-02-01 14:47:07 -08:00
|
|
|
assert origfilenames == ['iana.warc.gz', 'iana.warc.gz', '-']
|
2014-01-31 19:41:44 -08:00
|
|
|
|
|
|
|
|
2016-02-23 13:26:53 -08:00
|
|
|
# surt() no longer errors on this in 0.3b
|
|
|
|
#def test_error(self):
|
|
|
|
# resp = self.testapp.get('/pywb/?abc', status = 400)
|
|
|
|
# assert resp.status_int == 400
|
|
|
|
# assert 'Invalid Url: http://?abc' in resp.text
|
2014-02-11 14:10:40 -08:00
|
|
|
|
2015-11-04 15:36:44 -08:00
|
|
|
|
|
|
|
def test_coll_info_json(self):
|
|
|
|
resp = self.testapp.get('/collinfo.json')
|
|
|
|
assert resp.content_type == 'application/json'
|
2017-02-27 19:07:51 -08:00
|
|
|
value = resp.json
|
|
|
|
assert len(value['fixed']) == 4
|
|
|
|
assert len(value['dynamic']) == 0
|
2015-11-04 15:36:44 -08:00
|
|
|
|
|
|
|
#def test_invalid_config(self):
|
2015-02-25 13:18:32 -08:00
|
|
|
# with raises(IOError):
|
|
|
|
# init_app(create_wb_router,
|
|
|
|
# load_yaml=True,
|
|
|
|
# config_file='x-invalid-x')
|
2014-04-02 13:16:54 -07:00
|
|
|
|
|
|
|
|