diff --git a/pywb/static/default_banner.js b/pywb/static/default_banner.js index 34fca130..c1251dc8 100644 --- a/pywb/static/default_banner.js +++ b/pywb/static/default_banner.js @@ -306,8 +306,8 @@ This file is part of pywb, https://github.com/webrecorder/pywb // all banners will expose themselves by adding themselves as WBBanner on window window.WBBanner = new DefaultBanner(); - // if in replay frame, init immediately - if (window.wbinfo) { + // if wbinfo.url is set and not-framed, init banner in content frame + if (window.wbinfo && window.wbinfo.url && !window.wbinfo.is_framed) { if (document.readyState === "loading") { document.addEventListener("DOMContentLoaded", function() { window.WBBanner.init(); diff --git a/pywb/version.py b/pywb/version.py index 23a4653c..8042daa5 100644 --- a/pywb/version.py +++ b/pywb/version.py @@ -1,4 +1,4 @@ -__version__ = '2.4.0rc4' +__version__ = '2.4.0-rc5' if __name__ == '__main__': print(__version__) diff --git a/pywb/warcserver/resource/responseloader.py b/pywb/warcserver/resource/responseloader.py index 7f4aa297..defe906e 100644 --- a/pywb/warcserver/resource/responseloader.py +++ b/pywb/warcserver/resource/responseloader.py @@ -139,18 +139,19 @@ class BaseLoader(object): request_url = request_url.split('://', 1)[-1].rstrip('/') self_redir = False + orig_key = params.get('sr-urlkey') or cdx['urlkey'] if request_url == location_url: self_redir = True - elif params.get('sr-urlkey'): - # if new location canonicalized matches old key, also self-redirect - if canonicalize(location_url) == params.get('sr-urlkey'): - self_redir = True + + # if new location canonicalized matches old key, also self-redirect + elif canonicalize(location_url) == orig_key: + self_redir = True if self_redir: msg = 'Self Redirect {0} -> {1}' msg = msg.format(request_url, location_url) - params['sr-urlkey'] = cdx['urlkey'] + params['sr-urlkey'] = orig_key raise LiveResourceException(msg) @staticmethod @@ -267,6 +268,9 @@ class LiveWebLoader(BaseLoader): self.socks_proxy = None def load_resource(self, cdx, params): + if cdx.get('filename') and cdx.get('offset') is not None: + return None + load_url = cdx.get('load_url') if not load_url: return None diff --git a/pywb/warcserver/test/test_handlers.py b/pywb/warcserver/test/test_handlers.py index 52bf520d..74a4a0a9 100644 --- a/pywb/warcserver/test/test_handlers.py +++ b/pywb/warcserver/test/test_handlers.py @@ -220,8 +220,8 @@ class TestBaseWarcServer(HttpBinLiveTests, MementoOverrideTests, FakeRedisTests, buff = BytesIO(resp.body) record = ArcWarcRecordLoader().parse_record_stream(buff, no_record_parse=False) print(record.http_headers) - assert record.http_headers.get_statuscode() == '302' - assert record.http_headers.get_header('Location') == 'https://www.iana.org/' + assert record.http_headers.get_statuscode() == '200' + #assert record.http_headers.get_header('Location') == 'https://www.iana.org/' @patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live')) def test_agg_select_live(self): diff --git a/tests/test_redirects.py b/tests/test_redirects.py index 79ce367c..18716373 100644 --- a/tests/test_redirects.py +++ b/tests/test_redirects.py @@ -15,7 +15,7 @@ class TestRedirects(CollsDirMixin, BaseConfigTest): def setup_class(cls): super(TestRedirects, cls).setup_class('config_test.yaml') - def create_redirect_record(self, url, redirect_url, timestamp): + def create_redirect_record(self, url, redirect_url, timestamp, status='301'): warc_headers = {} warc_headers['WARC-Date'] = timestamp_to_iso_date(timestamp) @@ -26,7 +26,7 @@ class TestRedirects(CollsDirMixin, BaseConfigTest): ('Location', redirect_url) ] - http_headers = StatusAndHeaders('301 Permanent Redirect', headers_list, protocol='HTTP/1.0') + http_headers = StatusAndHeaders(status + ' Redirect', headers_list, protocol='HTTP/1.0') rec = self.writer.create_warc_record(url, 'response', payload=BytesIO(payload), @@ -140,4 +140,26 @@ class TestRedirects(CollsDirMixin, BaseConfigTest): res = self.get('/redir/20190626101112{0}/http://www.example.com/', fmod, status=200) assert res.text == 'Some Text' + def test_init_2(self): + filename = os.path.join(self.root_dir, 'redir2.warc.gz') + with open(filename, 'wb') as fh: + self.writer = WARCWriter(fh, gzip=True) + + redirect = self.create_redirect_record('http://www.example.com/path', 'https://www.example.com/path/', '20191003115920') + redirect = self.create_redirect_record('https://www.example.com/path/', 'https://www2.example.com/path', '20191003115927', status='302') + response = self.create_response_record('https://www2.example.com/path', '20191024125646', 'Some Text') + revisit = self.create_revisit_record('https://www2.example.com/path', '20191024125648', 'https://www2.example.com/path', response.rec_headers['WARC-Date']) + + wb_manager(['init', 'redir2']) + + wb_manager(['add', 'redir2', filename]) + + assert os.path.isfile(os.path.join(self.root_dir, self.COLLS_DIR, 'redir2', 'indexes', 'index.cdxj')) + + def test_revisit_redirect_skip_self_redir_2(self, fmod): + res = self.get('/redir2/20191024125648{0}/http://www2.example.com/path', fmod, status=200) + assert res.text == 'Some Text' + + res = self.get('/redir2/20191024125648{0}/https://www.example.com/path', fmod, status=200) + assert res.text == 'Some Text'