From 38b1952d342b35ac030f87d4a2048af27517cc11 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Tue, 25 Jan 2022 19:10:28 -0800 Subject: [PATCH] live route fix: (#692) - when 'redirect_to_exact' is enabled, the top-frame expects a redirect for top-frame, however, live mode does not result in redirect to top-frame, so render live top-frame same as before - tests: ensure top-frame loads correctly for live mode with redirect_to_exact enabled - tests: fix webenact index tests --- pywb/apps/rewriterapp.py | 20 +++++++------ .../warcserver/index/test/test_indexsource.py | 30 ++++++++----------- tests/test_redirect_classic.py | 4 +++ 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/pywb/apps/rewriterapp.py b/pywb/apps/rewriterapp.py index 8123cf95..d8646323 100644 --- a/pywb/apps/rewriterapp.py +++ b/pywb/apps/rewriterapp.py @@ -379,13 +379,11 @@ class RewriterApp(object): response = self.handle_query(environ, wb_url, kwargs, full_prefix) else: - # don't return top-frame response for timegate with exact redirects - if not (is_timegate and redirect_to_exact): - response = self.handle_custom_response(environ, wb_url, - full_prefix, host_prefix, - kwargs) + response = self.handle_custom_response(environ, wb_url, + full_prefix, host_prefix, + kwargs) - keep_frame_response = not kwargs.get('no_timegate_check') and is_timegate and not redirect_to_exact and not is_proxy + keep_frame_response = (not kwargs.get('no_timegate_check') and is_timegate and not is_proxy) or redirect_to_exact if response and not keep_frame_response: @@ -465,8 +463,12 @@ class RewriterApp(object): return self.send_redirect(new_path, url_parts, urlrewriter) + + # only redirect to exact if not live, otherwise set to false + redirect_to_exact = redirect_to_exact and not cdx.get('is_live') + # return top-frame timegate response, with timestamp from cdx - if response and keep_frame_response: + if response and keep_frame_response and (not redirect_to_exact or not is_timegate): no_except_close(r.raw) return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy, cdx['timestamp']) @@ -487,8 +489,8 @@ class RewriterApp(object): if target_uri != wb_url.url and cdx.get('is_fuzzy') == '1': set_content_loc = True - # if redirect to exact timestamp, bit only if not live - if redirect_to_exact and not cdx.get('is_live'): + # if redirect to exact timestamp (only set if not live) + if redirect_to_exact: if set_content_loc or is_timegate or wb_url.timestamp != cdx.get('timestamp'): new_url = urlrewriter.get_new_url(url=target_uri, timestamp=cdx['timestamp'], diff --git a/pywb/warcserver/index/test/test_indexsource.py b/pywb/warcserver/index/test/test_indexsource.py index 1eeb14ba..161ecf1a 100644 --- a/pywb/warcserver/index/test/test_indexsource.py +++ b/pywb/warcserver/index/test/test_indexsource.py @@ -26,12 +26,12 @@ class TestIndexSources(FakeRedisTests, BaseTestClass): cls.all_sources = { 'file': FileIndexSource(TEST_CDX_PATH + 'iana.cdxj'), 'redis': RedisIndexSource('redis://localhost:6379/2/test:rediscdx'), - 'remote_cdx': RemoteIndexSource('https://webenact.rhizome.org/all/cdx?url={url}', - 'https://webenact.rhizome.org/all/{timestamp}id_/{url}'), + 'remote_cdx': RemoteIndexSource('https://webenact.rhizome.org/excellences-and-perfections/cdx?url={url}', + 'https://webenact.rhizome.org/excellences-and-perfections/{timestamp}id_/{url}'), - 'memento': MementoIndexSource('https://webenact.rhizome.org/all/{url}', - 'https://webenact.rhizome.org/all/timemap/link/{url}', - 'https://webenact.rhizome.org/all/{timestamp}id_/{url}') + 'memento': MementoIndexSource('https://webenact.rhizome.org/excellences-and-perfections/{url}', + 'https://webenact.rhizome.org/excellences-and-perfections/timemap/link/{url}', + 'https://webenact.rhizome.org/excellences-and-perfections/{timestamp}id_/{url}') } @pytest.fixture(params=local_sources) @@ -99,14 +99,10 @@ org,iana)/domains/root/servers 20140126201227 iana.warc.gz""" res, errs = self.query_single_source(remote_source, dict(url=url)) expected = """\ -com,instagram)/amaliaulman 20141014150552 https://webenact.rhizome.org/all/20141014150552id_/http://instagram.com/amaliaulman -com,instagram)/amaliaulman 20141014152101 https://webenact.rhizome.org/all/20141014152101id_/http://instagram.com/amaliaulman -com,instagram)/amaliaulman 20141014155217 https://webenact.rhizome.org/all/20141014155217id_/http://instagram.com/amaliaulman -com,instagram)/amaliaulman 20141014160238 https://webenact.rhizome.org/all/20141014160238id_/http://instagram.com/amaliaulman -com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman -com,instagram)/amaliaulman 20141014163116 https://webenact.rhizome.org/all/20141014163116id_/http://instagram.com/amaliaulman -com,instagram)/amaliaulman 20141014171636 https://webenact.rhizome.org/all/20141014171636id_/http://instagram.com/amaliaulman -com,instagram)/amaliaulman 20141014171954 https://webenact.rhizome.org/all/20141014171954id_/http://instagram.com/amaliaulman""" +com,instagram)/amaliaulman 20141014150552 https://webenact.rhizome.org/excellences-and-perfections/20141014150552id_/http://instagram.com/amaliaulman +com,instagram)/amaliaulman 20141014155217 https://webenact.rhizome.org/excellences-and-perfections/20141014155217id_/http://instagram.com/amaliaulman +com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/excellences-and-perfections/20141014162333id_/http://instagram.com/amaliaulman +com,instagram)/amaliaulman 20141014171636 https://webenact.rhizome.org/excellences-and-perfections/20141014171636id_/http://instagram.com/amaliaulman""" assert(key_ts_res(res, 'load_url') == expected) assert(errs == {}) @@ -117,7 +113,7 @@ com,instagram)/amaliaulman 20141014171954 https://webenact.rhizome.org/all/20141 res, errs = self.query_single_source(remote_source, dict(url=url, closest='20141014162332', limit=1, allowFuzzy='0')) expected = """\ -com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman""" +com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/excellences-and-perfections/20141014162333id_/http://instagram.com/amaliaulman""" assert(key_ts_res(res, 'load_url') == expected) assert(errs == {}) @@ -128,21 +124,21 @@ com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141 res, errs = self.query_single_source(remote_source, dict(url=url, closest='20141014162332', limit=1)) expected = """\ -com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman""" +com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/excellences-and-perfections/20141014162333id_/http://instagram.com/amaliaulman""" assert(key_ts_res(res, 'load_url') == expected) assert(errs == {}) # Url Match -- Wb Memento def test_remote_closest_wb_memento_loader(self): - replay = 'https://webenact.rhizome.org/all/{timestamp}id_/{url}' + replay = 'https://webenact.rhizome.org/excellences-and-perfections/{timestamp}id_/{url}' source = WBMementoIndexSource(replay, '', replay) url = 'http://instagram.com/amaliaulman' res, errs = self.query_single_source(source, dict(url=url, closest='20141014162332', limit=1)) expected = """\ -com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman""" +com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/excellences-and-perfections/20141014162333id_/http://instagram.com/amaliaulman""" assert(key_ts_res(res, 'load_url') == expected) assert(errs == {}) diff --git a/tests/test_redirect_classic.py b/tests/test_redirect_classic.py index 3eea7a0b..8aecbe82 100644 --- a/tests/test_redirect_classic.py +++ b/tests/test_redirect_classic.py @@ -74,6 +74,10 @@ class TestRedirectClassic(BaseConfigTest): resp = self.get('/live/{0}http://example.com/?test=test', fmod_slash) assert resp.status_int == 200 + def test_live_top_frame(self): + resp = self.testapp.get('/live/http://example.com/?test=test') + assert 'top_url' not in resp.text + def test_replay_limit_cdx(self): resp = self.testapp.get('/pywb/cdx?url=http://www.iana.org/*&output=json') assert resp.content_type == 'text/x-ndjson'