2017-05-03 20:05:07 -07:00
|
|
|
from .base_config_test import BaseConfigTest, fmod
|
2014-04-02 13:16:54 -07:00
|
|
|
|
2017-05-23 19:08:29 -07:00
|
|
|
from pywb.warcserver.index.cdxobject import CDXObject
|
2014-02-05 10:10:33 -08:00
|
|
|
|
2014-03-02 19:26:06 -08:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
# ============================================================================
|
|
|
|
class TestWbIntegration(BaseConfigTest):
|
|
|
|
@classmethod
|
|
|
|
def setup_class(cls):
|
|
|
|
super(TestWbIntegration, cls).setup_class('config_test.yaml')
|
2014-01-31 19:41:44 -08:00
|
|
|
|
|
|
|
def test_home(self):
|
|
|
|
resp = self.testapp.get('/')
|
|
|
|
self._assert_basic_html(resp)
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '/pywb' in resp.text
|
2014-01-31 19:41:44 -08:00
|
|
|
|
|
|
|
def test_pywb_root(self):
|
|
|
|
resp = self.testapp.get('/pywb/')
|
|
|
|
self._assert_basic_html(resp)
|
2021-11-12 17:51:34 -08:00
|
|
|
assert '<link rel="stylesheet" href="/static/css/base.css"' in resp.text
|
2016-02-23 13:26:53 -08:00
|
|
|
assert 'Search' in resp.text
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2014-04-02 13:16:54 -07:00
|
|
|
def test_pywb_root_head(self):
|
|
|
|
resp = self.testapp.head('/pywb/')
|
|
|
|
assert resp.content_type == 'text/html'
|
|
|
|
assert resp.status_int == 200
|
|
|
|
|
2018-03-02 15:54:27 -08:00
|
|
|
def test_pywb_invalid_collection(self):
|
|
|
|
resp = self.testapp.get('/blah/http://example.com/', status=404)
|
2014-04-02 13:16:54 -07:00
|
|
|
assert resp.content_type == 'text/html'
|
|
|
|
assert resp.status_int == 404
|
|
|
|
|
2018-03-02 15:54:27 -08:00
|
|
|
assert 'Collection not found: <b>blah</b>' in resp.text
|
|
|
|
|
2014-01-31 19:41:44 -08:00
|
|
|
def test_calendar_query(self):
|
|
|
|
resp = self.testapp.get('/pywb/*/iana.org')
|
|
|
|
self._assert_basic_html(resp)
|
|
|
|
# 3 Captures + header
|
2017-11-14 20:47:09 -08:00
|
|
|
#assert len(resp.html.find_all('tr')) == 4
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def test_calendar_query_2(self):
|
2014-02-06 17:28:08 -08:00
|
|
|
# unfiltered collection
|
|
|
|
resp = self.testapp.get('/pywb/*/http://www.iana.org/_css/2013.1/screen.css')
|
|
|
|
self._assert_basic_html(resp)
|
|
|
|
# 17 Captures + header
|
2017-11-14 20:47:09 -08:00
|
|
|
#assert len(resp.html.find_all('tr')) == 18
|
2014-02-06 17:28:08 -08:00
|
|
|
|
|
|
|
# filtered collection
|
2017-02-27 19:07:51 -08:00
|
|
|
#resp = self.testapp.get('/pywb-filt/*/http://www.iana.org/_css/2013.1/screen.css')
|
|
|
|
#self._assert_basic_html(resp)
|
2014-02-06 17:28:08 -08:00
|
|
|
# 1 Capture (filtered) + header
|
2017-02-27 19:07:51 -08:00
|
|
|
#assert len(resp.html.find_all('tr')) == 2
|
2014-02-06 17:28:08 -08:00
|
|
|
|
2017-11-14 20:47:09 -08:00
|
|
|
def test_cdxj_query_fuzzy_match(self):
|
2014-02-18 14:47:48 -08:00
|
|
|
# fuzzy match removing _= according to standard rules.yaml
|
2017-11-14 20:47:09 -08:00
|
|
|
resp = self.testapp.get('/pywb/cdx?url=http://www.iana.org/_css/2013.1/screen.css%3F_=3141592653')
|
|
|
|
assert len(resp.text.rstrip().split('\n')) == 17
|
2014-02-18 14:47:48 -08:00
|
|
|
|
2017-11-14 20:47:09 -08:00
|
|
|
def test_cdxj_query_fuzzy_match_add_slash(self):
|
2017-11-09 20:45:15 -08:00
|
|
|
# fuzzy match removing _= according to standard rules.yaml
|
2017-11-14 20:47:09 -08:00
|
|
|
resp = self.testapp.get('/pywb/cdx?url=http://www.iana.org/_css/2013.1/screen.css/%3F_=3141592653')
|
2017-11-09 20:45:15 -08:00
|
|
|
# 17 Captures + header
|
2017-11-14 20:47:09 -08:00
|
|
|
assert len(resp.text.rstrip().split('\n')) == 17
|
2017-11-09 20:45:15 -08:00
|
|
|
|
2017-11-14 20:47:09 -08:00
|
|
|
def test_cdxj_not_found(self):
|
2015-01-24 12:32:50 -08:00
|
|
|
# query with no results
|
2017-11-14 20:47:09 -08:00
|
|
|
resp = self.testapp.get('/pywb/cdx?url=http://not-exist.example.com')
|
|
|
|
assert resp.text == ''
|
2015-01-24 12:32:50 -08:00
|
|
|
|
2017-11-14 20:47:09 -08:00
|
|
|
def test_cdxj_query(self):
|
|
|
|
resp = self.testapp.get('/pywb/cdx?url=http://www.iana.org/')
|
|
|
|
|
|
|
|
assert 'org,iana)/ 20140126200624 {"url": "http://www.iana.org/", "mime": "text/html", "status": "200", "digest": "OSSAPWJ23L56IYVRW3GFEAR4MCJMGPTB"' in resp.text
|
2014-01-31 19:41:44 -08:00
|
|
|
|
|
|
|
# check for 3 cdx lines (strip final newline)
|
2017-11-14 20:47:09 -08:00
|
|
|
assert len(resp.text.rstrip().split('\n')) == 3
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2014-06-14 18:26:19 -07:00
|
|
|
def test_replay_top_frame(self):
|
2017-02-27 19:07:51 -08:00
|
|
|
resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/')
|
2014-06-14 18:26:19 -07:00
|
|
|
|
Frame insert refactor (#246)
refactor frame/head insert templates:
ContentFrame:
- content iframe inited with new ContentFrame() which creates iframe
- wb_frame.js: contains ContentFrame system for initing, updating, closing content frame for replayed content.
- wb_frame.js: supports 'app_prefix' and 'content_prefix' or default 'prefix' for replay content
- window.location.hash passed added to init url.
- frame insert and head insert: simplify, remove 'wbrequest'
- frame insert: global wbinfo object no longer needed in top frame, each ContentFrame self-contained.
- wombat.js: next_parent() check does not assume wbinfo is present in top frame
- vidrw.js: only init if wbinfo is present
Banner:
- wb.js no longer needed, frame check/redirect folded into wombat.js
- default banner self-contained in default_banner.js/default_banner.css, handles both frame and frameless case
- rename wb.css -> default_banner.css
- banner html passed in as 'banner_html' variable to be optionally included, supports per collection banner html.
- templateview: BaseInsertView can accept an option 'banner view', used by HeadInsertView and TopFrameView
Tests:
- tests: test_auto_colls uses shared app to test dynamic changes, testing both frame and non-frame access, added per-collection banner html check.
2017-09-30 21:09:38 -07:00
|
|
|
assert 'new ContentFrame' in resp.text
|
|
|
|
assert '"20140127171238"' in resp.text
|
|
|
|
assert 'http://www.iana.org/' in resp.text, resp.text
|
2014-06-14 18:26:19 -07:00
|
|
|
|
2017-10-05 19:59:37 -07:00
|
|
|
assert 'Content-Security-Policy' not in resp.headers
|
|
|
|
|
2018-03-05 13:08:22 -08:00
|
|
|
def test_replay_content_head(self, fmod):
|
|
|
|
resp = self.head('/pywb/20140127171238{0}/http://www.iana.org/', fmod, status=200)
|
|
|
|
assert not resp.headers.get('Content-Length')
|
|
|
|
|
|
|
|
def test_replay_content_head_non_zero_content_length_match(self):
|
|
|
|
resp = self.testapp.get('/pywb/id_/http://www.iana.org/_js/2013.1/jquery.js', status=200)
|
|
|
|
length = resp.content_length
|
|
|
|
|
|
|
|
# Content-Length included if non-zero
|
|
|
|
resp = self.testapp.head('/pywb/id_/http://www.iana.org/_js/2013.1/jquery.js', status=200)
|
|
|
|
|
|
|
|
#assert resp.headers['Content-Length'] == length
|
|
|
|
assert resp.content_length == length
|
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_replay_content(self, fmod):
|
|
|
|
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod)
|
2014-06-14 18:26:19 -07:00
|
|
|
self._assert_basic_html(resp)
|
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
assert '"20140127171238"' in resp.text, resp.text
|
Frame insert refactor (#246)
refactor frame/head insert templates:
ContentFrame:
- content iframe inited with new ContentFrame() which creates iframe
- wb_frame.js: contains ContentFrame system for initing, updating, closing content frame for replayed content.
- wb_frame.js: supports 'app_prefix' and 'content_prefix' or default 'prefix' for replay content
- window.location.hash passed added to init url.
- frame insert and head insert: simplify, remove 'wbrequest'
- frame insert: global wbinfo object no longer needed in top frame, each ContentFrame self-contained.
- wombat.js: next_parent() check does not assume wbinfo is present in top frame
- vidrw.js: only init if wbinfo is present
Banner:
- wb.js no longer needed, frame check/redirect folded into wombat.js
- default banner self-contained in default_banner.js/default_banner.css, handles both frame and frameless case
- rename wb.css -> default_banner.css
- banner html passed in as 'banner_html' variable to be optionally included, supports per collection banner html.
- templateview: BaseInsertView can accept an option 'banner view', used by HeadInsertView and TopFrameView
Tests:
- tests: test_auto_colls uses shared app to test dynamic changes, testing both frame and non-frame access, added per-collection banner html check.
2017-09-30 21:09:38 -07:00
|
|
|
assert 'wombat.js' in resp.text
|
2019-03-09 15:48:45 -08:00
|
|
|
assert 'transclusions.js' in resp.text
|
2018-10-04 17:29:18 -04:00
|
|
|
assert '_WBWombatInit' in resp.text, resp.text
|
2018-10-22 17:12:22 -07:00
|
|
|
assert 'wbinfo.enable_auto_fetch = false;' in resp.text
|
2017-05-03 20:05:07 -07:00
|
|
|
assert '/pywb/20140127171238{0}/http://www.iana.org/time-zones"'.format(fmod) in resp.text
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2018-02-27 18:13:07 -08:00
|
|
|
if fmod == 'mp_':
|
|
|
|
assert 'window == window.top' in resp.text
|
|
|
|
assert 'wbinfo.is_framed = true' in resp.text
|
|
|
|
else:
|
|
|
|
assert 'window == window.top' not in resp.text
|
|
|
|
assert 'wbinfo.is_framed = false' in resp.text
|
2014-02-27 18:43:55 -08:00
|
|
|
|
2017-10-05 19:59:37 -07:00
|
|
|
csp = "default-src 'unsafe-eval' 'unsafe-inline' 'self' data: blob: mediastream: ws: wss: ; form-action 'self'"
|
|
|
|
assert resp.headers['Content-Security-Policy'] == csp
|
|
|
|
|
2017-11-30 14:13:47 -08:00
|
|
|
def test_replay_resource(self, fmod):
|
|
|
|
resp = self.get('/pywb/20171122230223{0}/http://httpbin.org/anything/resource.json', fmod)
|
|
|
|
assert resp.headers['Content-Type'] == 'application/json'
|
|
|
|
|
2017-12-07 17:46:50 -08:00
|
|
|
def test_replay_redirect(self, fmod):
|
|
|
|
resp = self.get('/pywb/2014{0}/http://www.iana.org/domains/example', fmod)
|
|
|
|
assert resp.headers['Location'].startswith('/pywb/2014{0}/'.format(fmod))
|
|
|
|
assert resp.status_code == 302
|
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_replay_fuzzy_1(self, fmod):
|
|
|
|
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/?_=123', fmod)
|
2017-04-21 15:37:21 -07:00
|
|
|
assert resp.status_int == 200
|
2017-05-03 20:05:07 -07:00
|
|
|
assert resp.headers['Content-Location'].endswith('/pywb/20140126200624{0}/http://www.iana.org/'.format(fmod))
|
2017-03-20 14:41:12 -07:00
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_replay_no_fuzzy_match(self, fmod):
|
|
|
|
resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/?foo=bar', fmod, status=404)
|
2017-03-20 14:41:12 -07:00
|
|
|
assert resp.status_int == 404
|
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_no_slash_redir_1(self, fmod):
|
|
|
|
resp = self.get('/pywb/20140103030321{0}/http://example.com', fmod)
|
2017-04-04 18:10:49 -07:00
|
|
|
assert resp.status_int == 307
|
2017-05-03 20:05:07 -07:00
|
|
|
assert resp.headers['Location'].endswith('/pywb/20140103030321{0}/http://example.com/'.format(fmod))
|
2017-04-04 18:10:49 -07:00
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_no_slash_redir_2(self, fmod):
|
|
|
|
resp = self.get('/pywb/20140103030321{0}/http://example.com?example=1', fmod)
|
2017-04-04 18:10:49 -07:00
|
|
|
assert resp.status_int == 307
|
2017-05-03 20:05:07 -07:00
|
|
|
assert resp.headers['Location'].endswith('/pywb/20140103030321{0}/http://example.com/?example=1'.format(fmod))
|
2017-04-04 18:10:49 -07:00
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_replay_cdxj(self, fmod):
|
|
|
|
resp = self.get('/pywb-cdxj/20140103030321{0}/http://example.com/?example=1', fmod)
|
2015-03-19 11:20:40 -07:00
|
|
|
self._assert_basic_html(resp)
|
|
|
|
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"20140103030321"' in resp.text
|
Frame insert refactor (#246)
refactor frame/head insert templates:
ContentFrame:
- content iframe inited with new ContentFrame() which creates iframe
- wb_frame.js: contains ContentFrame system for initing, updating, closing content frame for replayed content.
- wb_frame.js: supports 'app_prefix' and 'content_prefix' or default 'prefix' for replay content
- window.location.hash passed added to init url.
- frame insert and head insert: simplify, remove 'wbrequest'
- frame insert: global wbinfo object no longer needed in top frame, each ContentFrame self-contained.
- wombat.js: next_parent() check does not assume wbinfo is present in top frame
- vidrw.js: only init if wbinfo is present
Banner:
- wb.js no longer needed, frame check/redirect folded into wombat.js
- default banner self-contained in default_banner.js/default_banner.css, handles both frame and frameless case
- rename wb.css -> default_banner.css
- banner html passed in as 'banner_html' variable to be optionally included, supports per collection banner html.
- templateview: BaseInsertView can accept an option 'banner view', used by HeadInsertView and TopFrameView
Tests:
- tests: test_auto_colls uses shared app to test dynamic changes, testing both frame and non-frame access, added per-collection banner html check.
2017-09-30 21:09:38 -07:00
|
|
|
assert 'wombat.js' in resp.text
|
2017-05-03 20:05:07 -07:00
|
|
|
assert '/pywb-cdxj/20140103030321{0}/http://www.iana.org/domains/example'.format(fmod) in resp.text
|
2015-03-19 11:20:40 -07:00
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_replay_cdxj_revisit(self, fmod):
|
|
|
|
resp = self.get('/pywb-cdxj/20140103030341{0}/http://example.com/?example=1', fmod)
|
2015-03-19 13:29:29 -07:00
|
|
|
self._assert_basic_html(resp)
|
|
|
|
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"20140103030341"' in resp.text
|
Frame insert refactor (#246)
refactor frame/head insert templates:
ContentFrame:
- content iframe inited with new ContentFrame() which creates iframe
- wb_frame.js: contains ContentFrame system for initing, updating, closing content frame for replayed content.
- wb_frame.js: supports 'app_prefix' and 'content_prefix' or default 'prefix' for replay content
- window.location.hash passed added to init url.
- frame insert and head insert: simplify, remove 'wbrequest'
- frame insert: global wbinfo object no longer needed in top frame, each ContentFrame self-contained.
- wombat.js: next_parent() check does not assume wbinfo is present in top frame
- vidrw.js: only init if wbinfo is present
Banner:
- wb.js no longer needed, frame check/redirect folded into wombat.js
- default banner self-contained in default_banner.js/default_banner.css, handles both frame and frameless case
- rename wb.css -> default_banner.css
- banner html passed in as 'banner_html' variable to be optionally included, supports per collection banner html.
- templateview: BaseInsertView can accept an option 'banner view', used by HeadInsertView and TopFrameView
Tests:
- tests: test_auto_colls uses shared app to test dynamic changes, testing both frame and non-frame access, added per-collection banner html check.
2017-09-30 21:09:38 -07:00
|
|
|
assert 'wombat.js' in resp.text
|
2017-05-03 20:05:07 -07:00
|
|
|
assert '/pywb-cdxj/20140103030341{0}/http://www.iana.org/domains/example'.format(fmod) in resp.text
|
2015-03-19 13:29:29 -07:00
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_zero_len_revisit(self, fmod):
|
|
|
|
resp = self.get('/pywb/20140603030341{0}/http://example.com/?example=2', fmod)
|
2014-06-25 12:11:26 -07:00
|
|
|
self._assert_basic_html(resp)
|
|
|
|
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"20140603030341"' in resp.text
|
Frame insert refactor (#246)
refactor frame/head insert templates:
ContentFrame:
- content iframe inited with new ContentFrame() which creates iframe
- wb_frame.js: contains ContentFrame system for initing, updating, closing content frame for replayed content.
- wb_frame.js: supports 'app_prefix' and 'content_prefix' or default 'prefix' for replay content
- window.location.hash passed added to init url.
- frame insert and head insert: simplify, remove 'wbrequest'
- frame insert: global wbinfo object no longer needed in top frame, each ContentFrame self-contained.
- wombat.js: next_parent() check does not assume wbinfo is present in top frame
- vidrw.js: only init if wbinfo is present
Banner:
- wb.js no longer needed, frame check/redirect folded into wombat.js
- default banner self-contained in default_banner.js/default_banner.css, handles both frame and frameless case
- rename wb.css -> default_banner.css
- banner html passed in as 'banner_html' variable to be optionally included, supports per collection banner html.
- templateview: BaseInsertView can accept an option 'banner view', used by HeadInsertView and TopFrameView
Tests:
- tests: test_auto_colls uses shared app to test dynamic changes, testing both frame and non-frame access, added per-collection banner html check.
2017-09-30 21:09:38 -07:00
|
|
|
assert 'wombat.js' in resp.text
|
2017-05-03 20:05:07 -07:00
|
|
|
assert '/pywb/20140603030341{0}/http://www.iana.org/domains/example'.format(fmod) in resp.text
|
2014-06-25 12:11:26 -07:00
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_replay_url_agnostic_revisit(self, fmod):
|
|
|
|
resp = self.get('/pywb/20130729195151{0}/http://www.example.com/', fmod)
|
2014-03-04 20:12:09 +00:00
|
|
|
self._assert_basic_html(resp)
|
|
|
|
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"20130729195151"' in resp.text
|
Frame insert refactor (#246)
refactor frame/head insert templates:
ContentFrame:
- content iframe inited with new ContentFrame() which creates iframe
- wb_frame.js: contains ContentFrame system for initing, updating, closing content frame for replayed content.
- wb_frame.js: supports 'app_prefix' and 'content_prefix' or default 'prefix' for replay content
- window.location.hash passed added to init url.
- frame insert and head insert: simplify, remove 'wbrequest'
- frame insert: global wbinfo object no longer needed in top frame, each ContentFrame self-contained.
- wombat.js: next_parent() check does not assume wbinfo is present in top frame
- vidrw.js: only init if wbinfo is present
Banner:
- wb.js no longer needed, frame check/redirect folded into wombat.js
- default banner self-contained in default_banner.js/default_banner.css, handles both frame and frameless case
- rename wb.css -> default_banner.css
- banner html passed in as 'banner_html' variable to be optionally included, supports per collection banner html.
- templateview: BaseInsertView can accept an option 'banner view', used by HeadInsertView and TopFrameView
Tests:
- tests: test_auto_colls uses shared app to test dynamic changes, testing both frame and non-frame access, added per-collection banner html check.
2017-09-30 21:09:38 -07:00
|
|
|
assert 'wombat.js' in resp.text
|
2017-05-03 20:05:07 -07:00
|
|
|
assert '/pywb/20130729195151{0}/http://www.iana.org/domains/example"'.format(fmod) in resp.text
|
2014-03-04 20:12:09 +00:00
|
|
|
|
2014-12-23 14:34:59 -08:00
|
|
|
def test_video_info_not_found(self):
|
|
|
|
# not actually archived, but ensure video info path is tested
|
|
|
|
resp = self.testapp.get('/pywb/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M', status=404)
|
|
|
|
assert resp.status_int == 404
|
|
|
|
|
2017-05-14 15:10:37 -07:00
|
|
|
def test_replay_banner_only(self):
|
2014-07-29 12:20:22 -07:00
|
|
|
resp = self.testapp.get('/pywb/20140126201054bn_/http://www.iana.org/domains/reserved')
|
|
|
|
|
2018-02-27 15:52:19 -08:00
|
|
|
# wombat.js header not inserted
|
|
|
|
assert 'wombat.js' not in resp.text
|
2014-07-29 12:20:22 -07:00
|
|
|
|
|
|
|
# no wombat present
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '_WBWombat' not in resp.text
|
2014-07-29 12:20:22 -07:00
|
|
|
|
2018-02-27 18:13:07 -08:00
|
|
|
# top-frame redirect check
|
|
|
|
assert 'window == window.top' in resp.text
|
|
|
|
|
2014-07-29 12:20:22 -07:00
|
|
|
# url not rewritten
|
2016-02-23 13:26:53 -08:00
|
|
|
#assert '"http://www.iana.org/domains/example"' in resp.text
|
|
|
|
assert '"/_css/2013.1/screen.css"' in resp.text
|
2014-07-29 12:20:22 -07:00
|
|
|
|
2014-02-27 18:43:55 -08:00
|
|
|
def test_replay_identity_1(self):
|
2017-04-04 18:10:49 -07:00
|
|
|
resp = self.testapp.get('/pywb/20140127171251id_/http://example.com/')
|
2014-03-22 11:30:51 -07:00
|
|
|
|
|
|
|
# no wb header insertion
|
Frame insert refactor (#246)
refactor frame/head insert templates:
ContentFrame:
- content iframe inited with new ContentFrame() which creates iframe
- wb_frame.js: contains ContentFrame system for initing, updating, closing content frame for replayed content.
- wb_frame.js: supports 'app_prefix' and 'content_prefix' or default 'prefix' for replay content
- window.location.hash passed added to init url.
- frame insert and head insert: simplify, remove 'wbrequest'
- frame insert: global wbinfo object no longer needed in top frame, each ContentFrame self-contained.
- wombat.js: next_parent() check does not assume wbinfo is present in top frame
- vidrw.js: only init if wbinfo is present
Banner:
- wb.js no longer needed, frame check/redirect folded into wombat.js
- default banner self-contained in default_banner.js/default_banner.css, handles both frame and frameless case
- rename wb.css -> default_banner.css
- banner html passed in as 'banner_html' variable to be optionally included, supports per collection banner html.
- templateview: BaseInsertView can accept an option 'banner view', used by HeadInsertView and TopFrameView
Tests:
- tests: test_auto_colls uses shared app to test dynamic changes, testing both frame and non-frame access, added per-collection banner html check.
2017-09-30 21:09:38 -07:00
|
|
|
assert 'wombat.js' not in resp.text
|
2014-03-22 11:30:51 -07:00
|
|
|
|
2014-12-23 11:09:19 -08:00
|
|
|
assert resp.content_length == 1270, resp.content_length
|
|
|
|
|
2014-03-22 11:30:51 -07:00
|
|
|
# original unrewritten url present
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"http://www.iana.org/domains/example"' in resp.text
|
2014-03-22 11:30:51 -07:00
|
|
|
|
|
|
|
def test_replay_identity_2_arcgz(self):
|
2017-04-04 18:10:49 -07:00
|
|
|
resp = self.testapp.get('/pywb/20140216050221id_/http://arc.gz.test.example.com/')
|
2014-03-22 11:30:51 -07:00
|
|
|
|
|
|
|
# no wb header insertion
|
Frame insert refactor (#246)
refactor frame/head insert templates:
ContentFrame:
- content iframe inited with new ContentFrame() which creates iframe
- wb_frame.js: contains ContentFrame system for initing, updating, closing content frame for replayed content.
- wb_frame.js: supports 'app_prefix' and 'content_prefix' or default 'prefix' for replay content
- window.location.hash passed added to init url.
- frame insert and head insert: simplify, remove 'wbrequest'
- frame insert: global wbinfo object no longer needed in top frame, each ContentFrame self-contained.
- wombat.js: next_parent() check does not assume wbinfo is present in top frame
- vidrw.js: only init if wbinfo is present
Banner:
- wb.js no longer needed, frame check/redirect folded into wombat.js
- default banner self-contained in default_banner.js/default_banner.css, handles both frame and frameless case
- rename wb.css -> default_banner.css
- banner html passed in as 'banner_html' variable to be optionally included, supports per collection banner html.
- templateview: BaseInsertView can accept an option 'banner view', used by HeadInsertView and TopFrameView
Tests:
- tests: test_auto_colls uses shared app to test dynamic changes, testing both frame and non-frame access, added per-collection banner html check.
2017-09-30 21:09:38 -07:00
|
|
|
assert 'wombat.js' not in resp.text
|
2014-03-22 11:30:51 -07:00
|
|
|
|
|
|
|
# original unrewritten url present
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"http://www.iana.org/domains/example"' in resp.text
|
2014-03-22 11:30:51 -07:00
|
|
|
|
|
|
|
def test_replay_identity_2_arc(self):
|
2017-04-04 18:10:49 -07:00
|
|
|
resp = self.testapp.get('/pywb/20140216050221id_/http://arc.test.example.com/')
|
2014-02-27 18:43:55 -08:00
|
|
|
|
|
|
|
# no wb header insertion
|
Frame insert refactor (#246)
refactor frame/head insert templates:
ContentFrame:
- content iframe inited with new ContentFrame() which creates iframe
- wb_frame.js: contains ContentFrame system for initing, updating, closing content frame for replayed content.
- wb_frame.js: supports 'app_prefix' and 'content_prefix' or default 'prefix' for replay content
- window.location.hash passed added to init url.
- frame insert and head insert: simplify, remove 'wbrequest'
- frame insert: global wbinfo object no longer needed in top frame, each ContentFrame self-contained.
- wombat.js: next_parent() check does not assume wbinfo is present in top frame
- vidrw.js: only init if wbinfo is present
Banner:
- wb.js no longer needed, frame check/redirect folded into wombat.js
- default banner self-contained in default_banner.js/default_banner.css, handles both frame and frameless case
- rename wb.css -> default_banner.css
- banner html passed in as 'banner_html' variable to be optionally included, supports per collection banner html.
- templateview: BaseInsertView can accept an option 'banner view', used by HeadInsertView and TopFrameView
Tests:
- tests: test_auto_colls uses shared app to test dynamic changes, testing both frame and non-frame access, added per-collection banner html check.
2017-09-30 21:09:38 -07:00
|
|
|
assert 'wombat.js' not in resp.text
|
2014-02-27 18:43:55 -08:00
|
|
|
|
|
|
|
# original unrewritten url present
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"http://www.iana.org/domains/example"' in resp.text
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_replay_content_length_1(self, fmod):
|
2014-02-20 11:53:08 -08:00
|
|
|
# test larger file, rewritten file (svg!)
|
2017-05-03 20:05:07 -07:00
|
|
|
resp = self.get('/pywb/20140126200654{0}/http://www.iana.org/_img/2013.1/rir-map.svg', fmod)
|
2016-02-23 13:26:53 -08:00
|
|
|
assert resp.headers['Content-Length'] == str(len(resp.text))
|
2014-02-20 11:53:08 -08:00
|
|
|
|
2014-05-16 22:43:53 -07:00
|
|
|
def test_replay_css_mod(self):
|
|
|
|
resp = self.testapp.get('/pywb/20140127171239cs_/http://www.iana.org/_css/2013.1/screen.css')
|
|
|
|
assert resp.status_int == 200
|
|
|
|
assert resp.content_type == 'text/css'
|
|
|
|
|
2017-10-18 10:39:18 -07:00
|
|
|
def test_replay_js_mod_no_obj_proxy(self):
|
|
|
|
# an empty js file, (ie11 UA no js obj proxy)
|
|
|
|
resp = self.testapp.get('/pywb/20140126201054js_/http://www.iana.org/_js/2013.1/iana.js',
|
|
|
|
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'})
|
|
|
|
|
2014-05-16 22:43:53 -07:00
|
|
|
assert resp.status_int == 200
|
|
|
|
assert resp.content_length == 0
|
|
|
|
assert resp.content_type == 'application/x-javascript'
|
2014-01-31 19:41:44 -08:00
|
|
|
|
JS Object Proxy Override System (#224)
* Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop
Changes
- cli.py: add import os for os.chdir(self.r.directory)
- frontendapp.py: added initial support for cors requests.
- static_handler.py: add import for NotFoundException
- wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording!
- default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing
- html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing
- regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy
- wombat.js: added JS Proxy support
- remove print
* wombat proxy: simplify mixin using 'first_buff'
* js local scope rewrite/proxy work:
- add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default)
- new proxy toggleable with 'js_local_scope_rewrite: true'
- work on integrating john's proxy work
- getAllOwnProps() to generate list of functions that need to be rebound
- remove non-proxy related changes for now, remove angular special cases (for now)
* local scope proxy work:
- add back __WB_pmw() prefix for postMessage
- don't override postMessage() in proxy obj
- MessageEvent resolve proxy to original window obj
* js obj proxy: use local_init() to load local vars from proxy obj
* wombat: js object proxy improvements:
- use same object '_WB_wombat_obj_proxy' on window and document objects
- reuse default_proxy_get() for get operation from window or document
- resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that
- override MessageEvent.source to return window proxy object
* obj proxy work:
- window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception
- window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing)
- add override_prop_to_proxy() to add override to return proxy obj for attribute
- add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy
server side rewrite: generalize local proxy insert, add list for local let overrides
* js obj proxy work:
- add default '__WB_pmw' to self if undefined (for service workers)
- document.origin override
- proxy obj: improved defineProperty override to work with safari
- proxy obj: catch any exception in dummy obj setter
* client-side rewriting:
- proxy obj: catch exception (such as cross-domain access) in own props init
- proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse
- rewrite style: add 'cursor' attr for css url rewriting
* content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped)
* client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link
* client-side document override improvements:
- fix document.domain, document.referrer, forms add document.origin overrides to use only the document object
- init_doc_overrides() called as part of proxy init
- move non-document overrides to main init
rewrite: add rewrite for "Function('return this')" pattern to use proxy obj
* js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False)
live-rewrite-server: defaults to enabled js obj proxy
metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections),
or collection config for static collections
warcserver: get_coll_config() returns config for static collection
tests: use custom test dir instead of default 'collections' dir
tests: add basic test for js obj proxy
update to warcio>=1.4.0
* karma tests: update to safari >10
* client-side rewrite:
- ensure wombat.js is ES5 compatible (don't use let)
- check if Proxy obj exists before attempting to init
* js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported
content_rewriter: add overridable get_rewriter()
content_rewriter: fix elif -> if in should_rw_content()
tests: update js proxy obj test with different user agents (supported and unsupported)
karma: reset test to safari 9
* compatibility: remove shorthand notation from wombat.js
* js obj proxy: override MutationObserver.observe() to retrieve original object from proxy
wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
|
|
|
def test_replay_js_obj_proxy(self, fmod):
|
|
|
|
# test js proxy obj with jquery -- no user agent
|
2017-10-18 10:39:18 -07:00
|
|
|
resp = self.get('/pywb/20140126200625{0}/http://www.iana.org/_js/2013.1/jquery.js', fmod)
|
2014-01-31 19:41:44 -08:00
|
|
|
|
JS Object Proxy Override System (#224)
* Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop
Changes
- cli.py: add import os for os.chdir(self.r.directory)
- frontendapp.py: added initial support for cors requests.
- static_handler.py: add import for NotFoundException
- wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording!
- default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing
- html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing
- regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy
- wombat.js: added JS Proxy support
- remove print
* wombat proxy: simplify mixin using 'first_buff'
* js local scope rewrite/proxy work:
- add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default)
- new proxy toggleable with 'js_local_scope_rewrite: true'
- work on integrating john's proxy work
- getAllOwnProps() to generate list of functions that need to be rebound
- remove non-proxy related changes for now, remove angular special cases (for now)
* local scope proxy work:
- add back __WB_pmw() prefix for postMessage
- don't override postMessage() in proxy obj
- MessageEvent resolve proxy to original window obj
* js obj proxy: use local_init() to load local vars from proxy obj
* wombat: js object proxy improvements:
- use same object '_WB_wombat_obj_proxy' on window and document objects
- reuse default_proxy_get() for get operation from window or document
- resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that
- override MessageEvent.source to return window proxy object
* obj proxy work:
- window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception
- window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing)
- add override_prop_to_proxy() to add override to return proxy obj for attribute
- add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy
server side rewrite: generalize local proxy insert, add list for local let overrides
* js obj proxy work:
- add default '__WB_pmw' to self if undefined (for service workers)
- document.origin override
- proxy obj: improved defineProperty override to work with safari
- proxy obj: catch any exception in dummy obj setter
* client-side rewriting:
- proxy obj: catch exception (such as cross-domain access) in own props init
- proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse
- rewrite style: add 'cursor' attr for css url rewriting
* content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped)
* client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link
* client-side document override improvements:
- fix document.domain, document.referrer, forms add document.origin overrides to use only the document object
- init_doc_overrides() called as part of proxy init
- move non-document overrides to main init
rewrite: add rewrite for "Function('return this')" pattern to use proxy obj
* js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False)
live-rewrite-server: defaults to enabled js obj proxy
metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections),
or collection config for static collections
warcserver: get_coll_config() returns config for static collection
tests: use custom test dir instead of default 'collections' dir
tests: add basic test for js obj proxy
update to warcio>=1.4.0
* karma tests: update to safari >10
* client-side rewrite:
- ensure wombat.js is ES5 compatible (don't use let)
- check if Proxy obj exists before attempting to init
* js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported
content_rewriter: add overridable get_rewriter()
content_rewriter: fix elif -> if in should_rw_content()
tests: update js proxy obj test with different user agents (supported and unsupported)
karma: reset test to safari 9
* compatibility: remove shorthand notation from wombat.js
* js obj proxy: override MutationObserver.observe() to retrieve original object from proxy
wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
|
|
|
assert resp.status_int == 200
|
|
|
|
assert resp.content_length != 0
|
|
|
|
assert resp.content_type == 'application/x-javascript'
|
|
|
|
|
|
|
|
# test with Chrome user agent
|
2017-10-18 10:39:18 -07:00
|
|
|
resp = self.get('/pywb/20140126200625{0}/http://www.iana.org/_js/2013.1/jquery.js', fmod,
|
JS Object Proxy Override System (#224)
* Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop
Changes
- cli.py: add import os for os.chdir(self.r.directory)
- frontendapp.py: added initial support for cors requests.
- static_handler.py: add import for NotFoundException
- wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording!
- default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing
- html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing
- regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy
- wombat.js: added JS Proxy support
- remove print
* wombat proxy: simplify mixin using 'first_buff'
* js local scope rewrite/proxy work:
- add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default)
- new proxy toggleable with 'js_local_scope_rewrite: true'
- work on integrating john's proxy work
- getAllOwnProps() to generate list of functions that need to be rebound
- remove non-proxy related changes for now, remove angular special cases (for now)
* local scope proxy work:
- add back __WB_pmw() prefix for postMessage
- don't override postMessage() in proxy obj
- MessageEvent resolve proxy to original window obj
* js obj proxy: use local_init() to load local vars from proxy obj
* wombat: js object proxy improvements:
- use same object '_WB_wombat_obj_proxy' on window and document objects
- reuse default_proxy_get() for get operation from window or document
- resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that
- override MessageEvent.source to return window proxy object
* obj proxy work:
- window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception
- window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing)
- add override_prop_to_proxy() to add override to return proxy obj for attribute
- add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy
server side rewrite: generalize local proxy insert, add list for local let overrides
* js obj proxy work:
- add default '__WB_pmw' to self if undefined (for service workers)
- document.origin override
- proxy obj: improved defineProperty override to work with safari
- proxy obj: catch any exception in dummy obj setter
* client-side rewriting:
- proxy obj: catch exception (such as cross-domain access) in own props init
- proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse
- rewrite style: add 'cursor' attr for css url rewriting
* content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped)
* client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link
* client-side document override improvements:
- fix document.domain, document.referrer, forms add document.origin overrides to use only the document object
- init_doc_overrides() called as part of proxy init
- move non-document overrides to main init
rewrite: add rewrite for "Function('return this')" pattern to use proxy obj
* js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False)
live-rewrite-server: defaults to enabled js obj proxy
metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections),
or collection config for static collections
warcserver: get_coll_config() returns config for static collection
tests: use custom test dir instead of default 'collections' dir
tests: add basic test for js obj proxy
update to warcio>=1.4.0
* karma tests: update to safari >10
* client-side rewrite:
- ensure wombat.js is ES5 compatible (don't use let)
- check if Proxy obj exists before attempting to init
* js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported
content_rewriter: add overridable get_rewriter()
content_rewriter: fix elif -> if in should_rw_content()
tests: update js proxy obj test with different user agents (supported and unsupported)
karma: reset test to safari 9
* compatibility: remove shorthand notation from wombat.js
* js obj proxy: override MutationObserver.observe() to retrieve original object from proxy
wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
|
|
|
headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'})
|
|
|
|
assert 'let window = _____WB$wombat$assign$function_____(' in resp.text
|
|
|
|
|
|
|
|
def test_replay_js_ie11_no_obj_proxy(self, fmod):
|
|
|
|
# IE11 user-agent, no proxy
|
2017-10-18 10:39:18 -07:00
|
|
|
resp = self.get('/pywb/20140126200625{0}/http://www.iana.org/_js/2013.1/jquery.js', fmod,
|
JS Object Proxy Override System (#224)
* Init commit for Wombat JS Proxies off of https://github.com/ikreymer/pywb/tree/develop
Changes
- cli.py: add import os for os.chdir(self.r.directory)
- frontendapp.py: added initial support for cors requests.
- static_handler.py: add import for NotFoundException
- wbrequestresponse.py: added the intital implementation for cors requests, webrecoder needs this for recording!
- default_rewriter.py: added JSWombatProxyRewriter to default js rewriter class for internal testing
- html_rewriter.py: made JSWombatProxyRewriter to be default js rewriter class for internal testing
- regex_rewriters.py: implemented JSWombatProxyRewriter and JSWombatProxyRewriter to support wombat JS Proxy
- wombat.js: added JS Proxy support
- remove print
* wombat proxy: simplify mixin using 'first_buff'
* js local scope rewrite/proxy work:
- add DefaultHandlerWithJSProxy to enable new proxy rewrite (disabled by default)
- new proxy toggleable with 'js_local_scope_rewrite: true'
- work on integrating john's proxy work
- getAllOwnProps() to generate list of functions that need to be rebound
- remove non-proxy related changes for now, remove angular special cases (for now)
* local scope proxy work:
- add back __WB_pmw() prefix for postMessage
- don't override postMessage() in proxy obj
- MessageEvent resolve proxy to original window obj
* js obj proxy: use local_init() to load local vars from proxy obj
* wombat: js object proxy improvements:
- use same object '_WB_wombat_obj_proxy' on window and document objects
- reuse default_proxy_get() for get operation from window or document
- resolve and Window/Document object to the proxy, eg. if '_WB_wombat_obj_proxy' exists, return that
- override MessageEvent.source to return window proxy object
* obj proxy work:
- window proxy: defineProperty() override calls Reflect.defineProperty on dummy object as well as window to avoid exception
- window proxy: set() also sets on dummy object, and returns false if Reflect.set returns false (eg. altered by Reflect.defineProperty disabled writing)
- add override_prop_to_proxy() to add override to return proxy obj for attribute
- add override for Node.ownerDocument and HTMLElement.parentNode to return document proxy
server side rewrite: generalize local proxy insert, add list for local let overrides
* js obj proxy work:
- add default '__WB_pmw' to self if undefined (for service workers)
- document.origin override
- proxy obj: improved defineProperty override to work with safari
- proxy obj: catch any exception in dummy obj setter
* client-side rewriting:
- proxy obj: catch exception (such as cross-domain access) in own props init
- proxy obj: check for self reference '_WB_wombat_obj_proxy' access to avoid infinite recurse
- rewrite style: add 'cursor' attr for css url rewriting
* content rewriter: if is_ajax(), skip JS proxy obj rewriting also (html rewrite also skipped)
* client-side rewrite: rewrite 'data:text/css' as inline stylesheet when set via setAttribute() on 'href' in link
* client-side document override improvements:
- fix document.domain, document.referrer, forms add document.origin overrides to use only the document object
- init_doc_overrides() called as part of proxy init
- move non-document overrides to main init
rewrite: add rewrite for "Function('return this')" pattern to use proxy obj
* js obj proxy: now a per-collection (and even a per-request) setting 'use_js_obj_prox' (defaults to False)
live-rewrite-server: defaults to enabled js obj proxy
metadata: get_metadata() loads metadata.yaml for config settings for dynamic collections),
or collection config for static collections
warcserver: get_coll_config() returns config for static collection
tests: use custom test dir instead of default 'collections' dir
tests: add basic test for js obj proxy
update to warcio>=1.4.0
* karma tests: update to safari >10
* client-side rewrite:
- ensure wombat.js is ES5 compatible (don't use let)
- check if Proxy obj exists before attempting to init
* js proxy obj: RewriteWithProxyObj uses user-agent to determine if Proxy obj can be supported
content_rewriter: add overridable get_rewriter()
content_rewriter: fix elif -> if in should_rw_content()
tests: update js proxy obj test with different user agents (supported and unsupported)
karma: reset test to safari 9
* compatibility: remove shorthand notation from wombat.js
* js obj proxy: override MutationObserver.observe() to retrieve original object from proxy
wombat.js: cleanup, remove commented out code, label new proxy system functions, bump version to 2.40
2017-08-05 10:37:32 -07:00
|
|
|
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'})
|
|
|
|
|
|
|
|
assert 'let window = _____WB$wombat$assign$function_____(' not in resp.text
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_replay_non_exact(self, fmod):
|
2015-02-17 17:47:30 -08:00
|
|
|
# non-exact mode, don't redirect to exact capture
|
2017-05-03 20:05:07 -07:00
|
|
|
resp = self.get('/pywb/20140127171237{0}/http://www.iana.org/', fmod)
|
2015-02-17 17:47:30 -08:00
|
|
|
assert resp.status_int == 200
|
|
|
|
|
|
|
|
self._assert_basic_html(resp)
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"20140127171237"' in resp.text
|
2015-02-17 17:47:30 -08:00
|
|
|
# actual timestamp set in JS
|
2016-02-23 13:26:53 -08:00
|
|
|
assert 'timestamp = "20140127171238"' in resp.text
|
2017-05-03 20:05:07 -07:00
|
|
|
assert '/pywb/20140127171237{0}/http://www.iana.org/about/'.format(fmod) in resp.text
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_latest_replay(self, fmod):
|
|
|
|
fmod_slash = fmod + '/' if fmod else ''
|
|
|
|
resp = self.get('/pywb/{0}http://example.com/', fmod_slash)
|
2017-02-27 19:07:51 -08:00
|
|
|
self._assert_basic_html(resp)
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
assert resp.headers['Content-Location'].endswith('/20140127171251{0}/http://example.com'.format(fmod))
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"20140127171251"' in resp.text
|
2017-05-03 20:05:07 -07:00
|
|
|
assert '/pywb/{0}http://www.iana.org/domains/example'.format(fmod_slash) in resp.text, resp.text
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2020-04-30 22:39:47 -07:00
|
|
|
def test_replay_content_bad_status_text(self, fmod):
|
|
|
|
# test larger file, rewritten file (svg!)
|
|
|
|
resp = self.get('/pywb/20140127171238{0}/https://iana.org/bads', fmod)
|
|
|
|
assert resp.headers['Content-Length'] == str(len(resp.text))
|
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_replay_non_latest_content_location_ts(self, fmod):
|
|
|
|
fmod_slash = fmod + '/' if fmod else ''
|
|
|
|
resp = self.get('/pywb/{0}http://example.com/', fmod_slash)
|
2015-07-19 00:11:25 -07:00
|
|
|
assert resp.status_int == 200
|
2015-02-17 17:47:30 -08:00
|
|
|
|
2015-07-19 00:11:25 -07:00
|
|
|
assert resp.headers['Content-Location'].endswith('/http://example.com')
|
2015-02-17 17:47:30 -08:00
|
|
|
|
|
|
|
# extract ts, which should be current time
|
2015-07-19 00:11:25 -07:00
|
|
|
ts = resp.headers['Content-Location'].rsplit('/http://')[0].rsplit('/', 1)[-1]
|
2017-05-03 20:05:07 -07:00
|
|
|
assert ts == '20140127171251{0}'.format(fmod)
|
2017-02-27 19:07:51 -08:00
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
if fmod:
|
|
|
|
ts = ts.replace(fmod, '')
|
2015-02-17 17:47:30 -08:00
|
|
|
|
|
|
|
# ensure the current ts is present in the links
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"{0}"'.format(ts) in resp.text
|
2017-05-03 20:05:07 -07:00
|
|
|
assert '/pywb/{0}http://www.iana.org/domains/example'.format(fmod_slash) in resp.text
|
2015-02-17 17:47:30 -08:00
|
|
|
|
|
|
|
# ensure ts is current ts
|
2015-07-19 00:11:25 -07:00
|
|
|
#assert timestamp_now() >= ts, ts
|
2015-02-17 17:47:30 -08:00
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_refer_redirect(self, fmod):
|
2014-08-20 02:02:47 -04:00
|
|
|
# webtest uses Host: localhost:80 by default
|
2017-05-03 20:05:07 -07:00
|
|
|
target = 'http://localhost:80/pywb/2014{0}/http://iana.org/_css/2013.1/screen.css'.format(fmod)
|
2014-02-08 20:07:16 -08:00
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
resp = self.get('/_css/2013.1/screen.css', fmod, headers=[('Referer', 'http://localhost:80/pywb/2014{0}/http://iana.org/'.format(fmod))])
|
2017-02-27 19:07:51 -08:00
|
|
|
assert resp.status_int == 307
|
2014-02-08 20:07:16 -08:00
|
|
|
assert resp.headers['Location'] == target, resp.headers['Location']
|
|
|
|
|
|
|
|
resp = resp.follow()
|
|
|
|
assert resp.status_int == 200
|
2017-05-03 20:05:07 -07:00
|
|
|
assert resp.headers['Content-Location'].endswith('/pywb/20140127171239{0}/http://www.iana.org/_css/2013.1/screen.css'.format(fmod))
|
2014-02-08 20:07:16 -08:00
|
|
|
assert resp.content_type == 'text/css'
|
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_non_exact_replay_skip_self_redir(self, fmod):
|
|
|
|
uri = '/pywb/20140126200927{0}/http://www.iana.org/domains/root/db'
|
|
|
|
resp = self.get(uri, fmod)
|
2017-02-27 19:07:51 -08:00
|
|
|
assert resp.status_int == 200
|
2017-05-03 20:05:07 -07:00
|
|
|
assert resp.headers['Content-Location'].endswith('/pywb/20140126200928{0}/http://www.iana.org/domains/root/db'.format(fmod))
|
2017-11-09 21:22:11 -08:00
|
|
|
|
|
|
|
def test_non_exact_replay_skip_self_redir_slash(self, fmod):
|
|
|
|
uri = '/pywb/20140126200927{0}/http://www.iana.org/domains/root/db/'
|
|
|
|
resp = self.get(uri, fmod)
|
|
|
|
assert resp.status_int == 200
|
|
|
|
assert resp.headers['Content-Location'].endswith('/pywb/20140126200928{0}/http://www.iana.org/domains/root/db'.format(fmod))
|
2014-02-23 23:31:54 -08:00
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_not_existant_warc_other_capture(self, fmod):
|
|
|
|
resp = self.get('/pywb/20140703030321{0}/http://example.com/?example=2', fmod)
|
2017-02-27 19:07:51 -08:00
|
|
|
assert resp.status_int == 200
|
2017-05-03 20:05:07 -07:00
|
|
|
assert resp.headers['Content-Location'].endswith('/pywb/20140603030341{0}/http://example.com?example=2'.format(fmod))
|
2014-06-25 12:32:57 -07:00
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_missing_revisit_other_capture(self, fmod):
|
|
|
|
resp = self.get('/pywb/20140603030351{0}/http://example.com/?example=2', fmod)
|
2017-02-27 19:07:51 -08:00
|
|
|
assert resp.status_int == 200
|
2017-05-03 20:05:07 -07:00
|
|
|
assert resp.headers['Content-Location'].endswith('/pywb/20140603030341{0}/http://example.com?example=2'.format(fmod))
|
2014-06-25 12:32:57 -07:00
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_not_existant_warc_no_other(self, fmod):
|
|
|
|
resp = self.get('/pywb/20140703030321{0}/http://example.com/?example=3', fmod, status=503)
|
2014-06-25 12:32:57 -07:00
|
|
|
assert resp.status_int == 503
|
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_missing_revisit_no_other(self, fmod):
|
|
|
|
resp = self.get('/pywb/20140603030351{0}/http://example.com/?example=3', fmod, status=503)
|
2014-06-25 12:32:57 -07:00
|
|
|
assert resp.status_int == 503
|
2014-02-23 23:31:54 -08:00
|
|
|
|
2014-07-20 18:25:47 -07:00
|
|
|
def test_live_frame(self):
|
2014-10-18 11:21:07 -07:00
|
|
|
resp = self.testapp.get('/live/http://example.com/?test=test')
|
2014-07-20 18:25:47 -07:00
|
|
|
assert resp.status_int == 200
|
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def _test_live_redir_1(self):
|
2015-10-14 20:34:46 -07:00
|
|
|
resp = self.testapp.get('/live/*/http://example.com/?test=test')
|
|
|
|
assert resp.status_int == 302
|
|
|
|
assert resp.headers['Location'].endswith('/live/http://example.com/?test=test')
|
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def _test_live_redir_2(self):
|
2015-10-14 20:34:46 -07:00
|
|
|
resp = self.testapp.get('/live/2010-2011/http://example.com/?test=test')
|
|
|
|
assert resp.status_int == 302
|
|
|
|
assert resp.headers['Location'].endswith('/live/http://example.com/?test=test')
|
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_live_fallback(self, fmod):
|
|
|
|
fmod_slash = fmod + '/' if fmod else ''
|
|
|
|
resp = self.get('/pywb-fallback/{0}http://example.com/?test=test', fmod_slash)
|
2014-07-20 18:25:47 -07:00
|
|
|
assert resp.status_int == 200
|
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_post_1(self, fmod):
|
|
|
|
fmod_slash = fmod + '/' if fmod else ''
|
|
|
|
resp = self.post('/pywb/{0}httpbin.org/post', fmod_slash, {'foo': 'bar', 'test': 'abc'})
|
2014-06-10 19:21:46 -07:00
|
|
|
|
|
|
|
assert resp.status_int == 200
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"foo": "bar"' in resp.text
|
|
|
|
assert '"test": "abc"' in resp.text
|
2014-06-10 19:21:46 -07:00
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_post_2(self, fmod):
|
|
|
|
resp = self.post('/pywb/20140610001255{0}/http://httpbin.org/post?foo=bar', fmod, {'data': '^'})
|
2014-06-11 11:17:06 -07:00
|
|
|
assert resp.status_int == 200
|
2016-02-23 13:26:53 -08:00
|
|
|
assert '"data": "^"' in resp.text
|
2014-06-10 19:21:46 -07:00
|
|
|
|
2021-04-27 20:52:24 -07:00
|
|
|
def test_post_match_as_json(self, fmod):
|
|
|
|
# json also matches same query
|
|
|
|
resp = self.post_json('/pywb/20140610001255{0}/http://httpbin.org/post?foo=bar', fmod, {'data': '^'})
|
|
|
|
assert resp.status_int == 200
|
|
|
|
assert '"data": "^"' in resp.text
|
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_post_invalid(self, fmod):
|
2021-04-27 20:52:24 -07:00
|
|
|
# wrong param
|
|
|
|
resp = self.post('/pywb/20140610001255{0}/http://httpbin.org/post?foo=bar', fmod, {'data': '^^'}, status=404)
|
2017-03-20 14:41:12 -07:00
|
|
|
assert resp.status_int == 404
|
2017-03-14 11:39:36 -07:00
|
|
|
|
2019-09-11 09:03:55 -07:00
|
|
|
def test_post_referer_redirect(self, fmod):
|
2017-02-27 19:07:51 -08:00
|
|
|
# allowing 307 redirects
|
2017-05-03 20:05:07 -07:00
|
|
|
resp = self.post('/post', fmod,
|
|
|
|
{'foo': 'bar', 'test': 'abc'},
|
|
|
|
headers=[('Referer', 'http://localhost:80/pywb/2014{0}/http://httpbin.org/foo'.format(fmod))])
|
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
assert resp.status_int == 307
|
2017-05-03 20:05:07 -07:00
|
|
|
assert resp.headers['Location'].endswith('/pywb/2014{0}/http://httpbin.org/post'.format(fmod))
|
2014-06-13 16:23:11 -07:00
|
|
|
|
2019-09-11 09:03:55 -07:00
|
|
|
def test_get_referer_redirect(self, fmod):
|
|
|
|
resp = self.get('/get', fmod,
|
|
|
|
headers=[('Referer', 'http://localhost:80/pywb/2014{0}/http://httpbin.org/foo'.format(fmod))])
|
|
|
|
|
|
|
|
assert resp.status_int == 307
|
|
|
|
assert resp.headers['Location'].endswith('/pywb/2014{0}/http://httpbin.org/get'.format(fmod))
|
|
|
|
|
2017-02-27 19:07:51 -08:00
|
|
|
def _test_excluded_content(self):
|
2017-05-03 20:05:07 -07:00
|
|
|
fmod_slash = fmod + '/' if fmod else ''
|
|
|
|
resp = self.get('/pywb/{0}http://www.iana.org/_img/bookmark_icon.ico', fmod_slash, status=403)
|
2014-02-19 20:20:31 -08:00
|
|
|
assert resp.status_int == 403
|
2016-02-23 13:26:53 -08:00
|
|
|
assert 'Excluded' in resp.text
|
2014-02-19 20:20:31 -08:00
|
|
|
|
2017-05-03 20:05:07 -07:00
|
|
|
def test_replay_not_found(self, fmod):
|
|
|
|
fmod_slash = fmod + '/' if fmod else ''
|
2018-03-02 17:36:14 -08:00
|
|
|
resp = self.get('/pywb/{0}http://not-exist.example.com/path?A=B', fmod_slash, status=404)
|
2015-01-24 12:32:50 -08:00
|
|
|
assert resp.content_type == 'text/html'
|
|
|
|
assert resp.status_int == 404
|
|
|
|
|
2019-03-07 18:21:06 -08:00
|
|
|
assert 'URL Not Found' in resp.text, resp.text
|
2018-03-02 17:36:14 -08:00
|
|
|
assert 'The url <b>http://not-exist.example.com/path?A=B</b> could not be found in this collection.' in resp.text
|
|
|
|
|
2014-02-07 19:32:58 -08:00
|
|
|
def test_static_content(self):
|
Frame insert refactor (#246)
refactor frame/head insert templates:
ContentFrame:
- content iframe inited with new ContentFrame() which creates iframe
- wb_frame.js: contains ContentFrame system for initing, updating, closing content frame for replayed content.
- wb_frame.js: supports 'app_prefix' and 'content_prefix' or default 'prefix' for replay content
- window.location.hash passed added to init url.
- frame insert and head insert: simplify, remove 'wbrequest'
- frame insert: global wbinfo object no longer needed in top frame, each ContentFrame self-contained.
- wombat.js: next_parent() check does not assume wbinfo is present in top frame
- vidrw.js: only init if wbinfo is present
Banner:
- wb.js no longer needed, frame check/redirect folded into wombat.js
- default banner self-contained in default_banner.js/default_banner.css, handles both frame and frameless case
- rename wb.css -> default_banner.css
- banner html passed in as 'banner_html' variable to be optionally included, supports per collection banner html.
- templateview: BaseInsertView can accept an option 'banner view', used by HeadInsertView and TopFrameView
Tests:
- tests: test_auto_colls uses shared app to test dynamic changes, testing both frame and non-frame access, added per-collection banner html check.
2017-09-30 21:09:38 -07:00
|
|
|
resp = self.testapp.get('/static/default_banner.css')
|
2014-02-07 19:32:58 -08:00
|
|
|
assert resp.status_int == 200
|
|
|
|
assert resp.content_type == 'text/css'
|
|
|
|
assert resp.content_length > 0
|
|
|
|
|
2014-05-16 22:17:51 -07:00
|
|
|
def test_static_content_filewrapper(self):
|
|
|
|
from wsgiref.util import FileWrapper
|
Frame insert refactor (#246)
refactor frame/head insert templates:
ContentFrame:
- content iframe inited with new ContentFrame() which creates iframe
- wb_frame.js: contains ContentFrame system for initing, updating, closing content frame for replayed content.
- wb_frame.js: supports 'app_prefix' and 'content_prefix' or default 'prefix' for replay content
- window.location.hash passed added to init url.
- frame insert and head insert: simplify, remove 'wbrequest'
- frame insert: global wbinfo object no longer needed in top frame, each ContentFrame self-contained.
- wombat.js: next_parent() check does not assume wbinfo is present in top frame
- vidrw.js: only init if wbinfo is present
Banner:
- wb.js no longer needed, frame check/redirect folded into wombat.js
- default banner self-contained in default_banner.js/default_banner.css, handles both frame and frameless case
- rename wb.css -> default_banner.css
- banner html passed in as 'banner_html' variable to be optionally included, supports per collection banner html.
- templateview: BaseInsertView can accept an option 'banner view', used by HeadInsertView and TopFrameView
Tests:
- tests: test_auto_colls uses shared app to test dynamic changes, testing both frame and non-frame access, added per-collection banner html check.
2017-09-30 21:09:38 -07:00
|
|
|
resp = self.testapp.get('/static/default_banner.css', extra_environ = {'wsgi.file_wrapper': FileWrapper})
|
2014-05-16 22:17:51 -07:00
|
|
|
assert resp.status_int == 200
|
|
|
|
assert resp.content_type == 'text/css'
|
|
|
|
assert resp.content_length > 0
|
|
|
|
|
2018-01-15 19:54:15 -08:00
|
|
|
def test_static_nested_dir(self):
|
2019-02-18 13:26:29 -05:00
|
|
|
resp = self.testapp.get('/static/fonts/font-awesome/fa-brands-400.eot')
|
2018-01-15 19:54:15 -08:00
|
|
|
assert resp.status_int == 200
|
|
|
|
assert resp.content_length > 0
|
|
|
|
|
2014-05-16 22:17:51 -07:00
|
|
|
def test_static_not_found(self):
|
2017-04-26 12:12:34 -07:00
|
|
|
resp = self.testapp.get('/static/notfound.css', status = 404)
|
2014-05-16 22:17:51 -07:00
|
|
|
assert resp.status_int == 404
|
2014-02-07 19:32:58 -08:00
|
|
|
|
2018-03-02 15:54:27 -08:00
|
|
|
assert 'Static file not found: <b>notfound.css</b>' in resp.text
|
|
|
|
|
2017-09-06 23:25:30 -07:00
|
|
|
def test_cdx_server_filters(self):
|
|
|
|
resp = self.testapp.get('/pywb/cdx?url=http://www.iana.org/_css/2013.1/screen.css&filter=mime:warc/revisit&filter=filename:dupes.warc.gz')
|
|
|
|
assert resp.content_type == 'text/x-cdxj'
|
2016-02-23 13:26:53 -08:00
|
|
|
actual_len = len(resp.text.rstrip().split('\n'))
|
2014-02-01 14:47:07 -08:00
|
|
|
assert actual_len == 1, actual_len
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2017-09-06 23:25:30 -07:00
|
|
|
def test_cdx_server_advanced(self):
|
2014-02-01 14:47:07 -08:00
|
|
|
# combine collapsing, reversing and revisit resolving
|
2017-09-06 23:25:30 -07:00
|
|
|
resp = self.testapp.get('/pywb/cdx?url=http://www.iana.org/_css/2013.1/print.css&collapseTime=11&resolveRevisits=true&reverse=true')
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2014-02-12 13:16:07 -08:00
|
|
|
# convert back to CDXObject
|
2016-02-23 13:26:53 -08:00
|
|
|
cdxs = list(map(CDXObject, resp.body.rstrip().split(b'\n')))
|
2014-02-01 14:47:07 -08:00
|
|
|
assert len(cdxs) == 3, len(cdxs)
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2014-02-01 14:47:07 -08:00
|
|
|
# verify timestamps
|
2016-02-23 13:26:53 -08:00
|
|
|
timestamps = list(map(lambda cdx: cdx['timestamp'], cdxs))
|
2014-02-01 14:47:07 -08:00
|
|
|
assert timestamps == ['20140127171239', '20140126201054', '20140126200625']
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2014-02-01 14:47:07 -08:00
|
|
|
# verify orig filenames (2 revisits, one non)
|
2016-02-23 13:26:53 -08:00
|
|
|
origfilenames = list(map(lambda cdx: cdx['orig.filename'], cdxs))
|
2014-02-01 14:47:07 -08:00
|
|
|
assert origfilenames == ['iana.warc.gz', 'iana.warc.gz', '-']
|
2014-01-31 19:41:44 -08:00
|
|
|
|
2016-02-23 13:26:53 -08:00
|
|
|
# surt() no longer errors on this in 0.3b
|
|
|
|
#def test_error(self):
|
|
|
|
# resp = self.testapp.get('/pywb/?abc', status = 400)
|
|
|
|
# assert resp.status_int == 400
|
|
|
|
# assert 'Invalid Url: http://?abc' in resp.text
|
2014-02-11 14:10:40 -08:00
|
|
|
|
2015-11-04 15:36:44 -08:00
|
|
|
|
|
|
|
def test_coll_info_json(self):
|
|
|
|
resp = self.testapp.get('/collinfo.json')
|
|
|
|
assert resp.content_type == 'application/json'
|
2017-02-27 19:07:51 -08:00
|
|
|
value = resp.json
|
2017-10-18 10:39:18 -07:00
|
|
|
assert len(value['fixed']) == 4
|
2017-02-27 19:07:51 -08:00
|
|
|
assert len(value['dynamic']) == 0
|
2015-11-04 15:36:44 -08:00
|
|
|
|
|
|
|
#def test_invalid_config(self):
|
2015-02-25 13:18:32 -08:00
|
|
|
# with raises(IOError):
|
|
|
|
# init_app(create_wb_router,
|
|
|
|
# load_yaml=True,
|
|
|
|
# config_file='x-invalid-x')
|
2014-04-02 13:16:54 -07:00
|
|
|
|
|
|
|
|