1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

live route fix: (#692)

- when 'redirect_to_exact' is enabled, the top-frame expects a redirect for top-frame, however, live mode does not result in redirect to top-frame, so render live top-frame same as before
- tests: ensure top-frame loads correctly for live mode with redirect_to_exact enabled
- tests: fix webenact index tests
This commit is contained in:
Ilya Kreymer 2022-01-25 19:10:28 -08:00 committed by GitHub
parent c42833d4ad
commit 38b1952d34
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 28 additions and 26 deletions

View File

@ -379,13 +379,11 @@ class RewriterApp(object):
response = self.handle_query(environ, wb_url, kwargs, full_prefix)
else:
# don't return top-frame response for timegate with exact redirects
if not (is_timegate and redirect_to_exact):
response = self.handle_custom_response(environ, wb_url,
full_prefix, host_prefix,
kwargs)
keep_frame_response = not kwargs.get('no_timegate_check') and is_timegate and not redirect_to_exact and not is_proxy
keep_frame_response = (not kwargs.get('no_timegate_check') and is_timegate and not is_proxy) or redirect_to_exact
if response and not keep_frame_response:
@ -465,8 +463,12 @@ class RewriterApp(object):
return self.send_redirect(new_path, url_parts, urlrewriter)
# only redirect to exact if not live, otherwise set to false
redirect_to_exact = redirect_to_exact and not cdx.get('is_live')
# return top-frame timegate response, with timestamp from cdx
if response and keep_frame_response:
if response and keep_frame_response and (not redirect_to_exact or not is_timegate):
no_except_close(r.raw)
return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy, cdx['timestamp'])
@ -487,8 +489,8 @@ class RewriterApp(object):
if target_uri != wb_url.url and cdx.get('is_fuzzy') == '1':
set_content_loc = True
# if redirect to exact timestamp, bit only if not live
if redirect_to_exact and not cdx.get('is_live'):
# if redirect to exact timestamp (only set if not live)
if redirect_to_exact:
if set_content_loc or is_timegate or wb_url.timestamp != cdx.get('timestamp'):
new_url = urlrewriter.get_new_url(url=target_uri,
timestamp=cdx['timestamp'],

View File

@ -26,12 +26,12 @@ class TestIndexSources(FakeRedisTests, BaseTestClass):
cls.all_sources = {
'file': FileIndexSource(TEST_CDX_PATH + 'iana.cdxj'),
'redis': RedisIndexSource('redis://localhost:6379/2/test:rediscdx'),
'remote_cdx': RemoteIndexSource('https://webenact.rhizome.org/all/cdx?url={url}',
'https://webenact.rhizome.org/all/{timestamp}id_/{url}'),
'remote_cdx': RemoteIndexSource('https://webenact.rhizome.org/excellences-and-perfections/cdx?url={url}',
'https://webenact.rhizome.org/excellences-and-perfections/{timestamp}id_/{url}'),
'memento': MementoIndexSource('https://webenact.rhizome.org/all/{url}',
'https://webenact.rhizome.org/all/timemap/link/{url}',
'https://webenact.rhizome.org/all/{timestamp}id_/{url}')
'memento': MementoIndexSource('https://webenact.rhizome.org/excellences-and-perfections/{url}',
'https://webenact.rhizome.org/excellences-and-perfections/timemap/link/{url}',
'https://webenact.rhizome.org/excellences-and-perfections/{timestamp}id_/{url}')
}
@pytest.fixture(params=local_sources)
@ -99,14 +99,10 @@ org,iana)/domains/root/servers 20140126201227 iana.warc.gz"""
res, errs = self.query_single_source(remote_source, dict(url=url))
expected = """\
com,instagram)/amaliaulman 20141014150552 https://webenact.rhizome.org/all/20141014150552id_/http://instagram.com/amaliaulman
com,instagram)/amaliaulman 20141014152101 https://webenact.rhizome.org/all/20141014152101id_/http://instagram.com/amaliaulman
com,instagram)/amaliaulman 20141014155217 https://webenact.rhizome.org/all/20141014155217id_/http://instagram.com/amaliaulman
com,instagram)/amaliaulman 20141014160238 https://webenact.rhizome.org/all/20141014160238id_/http://instagram.com/amaliaulman
com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman
com,instagram)/amaliaulman 20141014163116 https://webenact.rhizome.org/all/20141014163116id_/http://instagram.com/amaliaulman
com,instagram)/amaliaulman 20141014171636 https://webenact.rhizome.org/all/20141014171636id_/http://instagram.com/amaliaulman
com,instagram)/amaliaulman 20141014171954 https://webenact.rhizome.org/all/20141014171954id_/http://instagram.com/amaliaulman"""
com,instagram)/amaliaulman 20141014150552 https://webenact.rhizome.org/excellences-and-perfections/20141014150552id_/http://instagram.com/amaliaulman
com,instagram)/amaliaulman 20141014155217 https://webenact.rhizome.org/excellences-and-perfections/20141014155217id_/http://instagram.com/amaliaulman
com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/excellences-and-perfections/20141014162333id_/http://instagram.com/amaliaulman
com,instagram)/amaliaulman 20141014171636 https://webenact.rhizome.org/excellences-and-perfections/20141014171636id_/http://instagram.com/amaliaulman"""
assert(key_ts_res(res, 'load_url') == expected)
assert(errs == {})
@ -117,7 +113,7 @@ com,instagram)/amaliaulman 20141014171954 https://webenact.rhizome.org/all/20141
res, errs = self.query_single_source(remote_source, dict(url=url, closest='20141014162332', limit=1, allowFuzzy='0'))
expected = """\
com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman"""
com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/excellences-and-perfections/20141014162333id_/http://instagram.com/amaliaulman"""
assert(key_ts_res(res, 'load_url') == expected)
assert(errs == {})
@ -128,21 +124,21 @@ com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141
res, errs = self.query_single_source(remote_source, dict(url=url, closest='20141014162332', limit=1))
expected = """\
com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman"""
com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/excellences-and-perfections/20141014162333id_/http://instagram.com/amaliaulman"""
assert(key_ts_res(res, 'load_url') == expected)
assert(errs == {})
# Url Match -- Wb Memento
def test_remote_closest_wb_memento_loader(self):
replay = 'https://webenact.rhizome.org/all/{timestamp}id_/{url}'
replay = 'https://webenact.rhizome.org/excellences-and-perfections/{timestamp}id_/{url}'
source = WBMementoIndexSource(replay, '', replay)
url = 'http://instagram.com/amaliaulman'
res, errs = self.query_single_source(source, dict(url=url, closest='20141014162332', limit=1))
expected = """\
com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman"""
com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/excellences-and-perfections/20141014162333id_/http://instagram.com/amaliaulman"""
assert(key_ts_res(res, 'load_url') == expected)
assert(errs == {})

View File

@ -74,6 +74,10 @@ class TestRedirectClassic(BaseConfigTest):
resp = self.get('/live/{0}http://example.com/?test=test', fmod_slash)
assert resp.status_int == 200
def test_live_top_frame(self):
resp = self.testapp.get('/live/http://example.com/?test=test')
assert 'top_url' not in resp.text
def test_replay_limit_cdx(self):
resp = self.testapp.get('/pywb/cdx?url=http://www.iana.org/*&output=json')
assert resp.content_type == 'text/x-ndjson'