mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Misc Fixes for RC5 (#534)
* misc fixes (rc 5): - banner: only auto init banner if not in top-frame (check for no-frame mode and replay url is set) - index: 'cdx+' fix for use as internal index: if cdx has a warc filename and offset, don't attempt default live web load - improved self-redirect: avoid www2 -> www redirect altogether, not just for second redirect - tests: update tests for improved self-redirect checking - bump version to pywb-2.4.0-rc5
This commit is contained in:
parent
93ce4f6f7a
commit
fa021eebab
@ -306,8 +306,8 @@ This file is part of pywb, https://github.com/webrecorder/pywb
|
||||
// all banners will expose themselves by adding themselves as WBBanner on window
|
||||
window.WBBanner = new DefaultBanner();
|
||||
|
||||
// if in replay frame, init immediately
|
||||
if (window.wbinfo) {
|
||||
// if wbinfo.url is set and not-framed, init banner in content frame
|
||||
if (window.wbinfo && window.wbinfo.url && !window.wbinfo.is_framed) {
|
||||
if (document.readyState === "loading") {
|
||||
document.addEventListener("DOMContentLoaded", function() {
|
||||
window.WBBanner.init();
|
||||
|
@ -1,4 +1,4 @@
|
||||
__version__ = '2.4.0rc4'
|
||||
__version__ = '2.4.0-rc5'
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(__version__)
|
||||
|
@ -139,18 +139,19 @@ class BaseLoader(object):
|
||||
request_url = request_url.split('://', 1)[-1].rstrip('/')
|
||||
|
||||
self_redir = False
|
||||
orig_key = params.get('sr-urlkey') or cdx['urlkey']
|
||||
|
||||
if request_url == location_url:
|
||||
self_redir = True
|
||||
elif params.get('sr-urlkey'):
|
||||
# if new location canonicalized matches old key, also self-redirect
|
||||
if canonicalize(location_url) == params.get('sr-urlkey'):
|
||||
self_redir = True
|
||||
|
||||
# if new location canonicalized matches old key, also self-redirect
|
||||
elif canonicalize(location_url) == orig_key:
|
||||
self_redir = True
|
||||
|
||||
if self_redir:
|
||||
msg = 'Self Redirect {0} -> {1}'
|
||||
msg = msg.format(request_url, location_url)
|
||||
params['sr-urlkey'] = cdx['urlkey']
|
||||
params['sr-urlkey'] = orig_key
|
||||
raise LiveResourceException(msg)
|
||||
|
||||
@staticmethod
|
||||
@ -267,6 +268,9 @@ class LiveWebLoader(BaseLoader):
|
||||
self.socks_proxy = None
|
||||
|
||||
def load_resource(self, cdx, params):
|
||||
if cdx.get('filename') and cdx.get('offset') is not None:
|
||||
return None
|
||||
|
||||
load_url = cdx.get('load_url')
|
||||
if not load_url:
|
||||
return None
|
||||
|
@ -220,8 +220,8 @@ class TestBaseWarcServer(HttpBinLiveTests, MementoOverrideTests, FakeRedisTests,
|
||||
buff = BytesIO(resp.body)
|
||||
record = ArcWarcRecordLoader().parse_record_stream(buff, no_record_parse=False)
|
||||
print(record.http_headers)
|
||||
assert record.http_headers.get_statuscode() == '302'
|
||||
assert record.http_headers.get_header('Location') == 'https://www.iana.org/'
|
||||
assert record.http_headers.get_statuscode() == '200'
|
||||
#assert record.http_headers.get_header('Location') == 'https://www.iana.org/'
|
||||
|
||||
@patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live'))
|
||||
def test_agg_select_live(self):
|
||||
|
@ -15,7 +15,7 @@ class TestRedirects(CollsDirMixin, BaseConfigTest):
|
||||
def setup_class(cls):
|
||||
super(TestRedirects, cls).setup_class('config_test.yaml')
|
||||
|
||||
def create_redirect_record(self, url, redirect_url, timestamp):
|
||||
def create_redirect_record(self, url, redirect_url, timestamp, status='301'):
|
||||
warc_headers = {}
|
||||
warc_headers['WARC-Date'] = timestamp_to_iso_date(timestamp)
|
||||
|
||||
@ -26,7 +26,7 @@ class TestRedirects(CollsDirMixin, BaseConfigTest):
|
||||
('Location', redirect_url)
|
||||
]
|
||||
|
||||
http_headers = StatusAndHeaders('301 Permanent Redirect', headers_list, protocol='HTTP/1.0')
|
||||
http_headers = StatusAndHeaders(status + ' Redirect', headers_list, protocol='HTTP/1.0')
|
||||
|
||||
rec = self.writer.create_warc_record(url, 'response',
|
||||
payload=BytesIO(payload),
|
||||
@ -140,4 +140,26 @@ class TestRedirects(CollsDirMixin, BaseConfigTest):
|
||||
res = self.get('/redir/20190626101112{0}/http://www.example.com/', fmod, status=200)
|
||||
assert res.text == 'Some Text'
|
||||
|
||||
def test_init_2(self):
|
||||
filename = os.path.join(self.root_dir, 'redir2.warc.gz')
|
||||
with open(filename, 'wb') as fh:
|
||||
self.writer = WARCWriter(fh, gzip=True)
|
||||
|
||||
redirect = self.create_redirect_record('http://www.example.com/path', 'https://www.example.com/path/', '20191003115920')
|
||||
redirect = self.create_redirect_record('https://www.example.com/path/', 'https://www2.example.com/path', '20191003115927', status='302')
|
||||
response = self.create_response_record('https://www2.example.com/path', '20191024125646', 'Some Text')
|
||||
revisit = self.create_revisit_record('https://www2.example.com/path', '20191024125648', 'https://www2.example.com/path', response.rec_headers['WARC-Date'])
|
||||
|
||||
wb_manager(['init', 'redir2'])
|
||||
|
||||
wb_manager(['add', 'redir2', filename])
|
||||
|
||||
assert os.path.isfile(os.path.join(self.root_dir, self.COLLS_DIR, 'redir2', 'indexes', 'index.cdxj'))
|
||||
|
||||
def test_revisit_redirect_skip_self_redir_2(self, fmod):
|
||||
res = self.get('/redir2/20191024125648{0}/http://www2.example.com/path', fmod, status=200)
|
||||
assert res.text == 'Some Text'
|
||||
|
||||
res = self.get('/redir2/20191024125648{0}/https://www.example.com/path', fmod, status=200)
|
||||
assert res.text == 'Some Text'
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user