Merge branch 'main' into new-ui-work

2025-03-15 00:03:28 +01:00 · 2022-01-25 23:20:24 -08:00 · 2022-01-25 23:20:24 -08:00 · 08826f886b
commit 08826f886b
parent 581e4601bb 0f05dbde55
9 changed files with 52 additions and 31 deletions
--- a/CHANGES.rst
+++ b/CHANGES.rst
@ -1,3 +1,15 @@
 pywb 2.6.4 changelist
 ~~~~~~~~~~~~~~~~~~~~~
 * wombat.js: actually update to 3.3.6, update built wombat.js
 * Fix live mode when ``redirect_to_exact`` is enabled `#692 <https://github.com/webrecorder/pywb/pull/692>`_
 * Rules: additional fuzzy ignore of facebook query param: `#691 <https://github.com/webrecorder/pywb/pull/691>`_
 * Docs: typo fixes: `#669 <https://github.com/webrecorder/pywb/pull/669>`_, `#670 <https://github.com/webrecorder/pywb/pull/670>`_
 pywb 2.6.3 changelist
 ~~~~~~~~~~~~~~~~~~~~~
--- a/docs/manual/access-control.rst
+++ b/docs/manual/access-control.rst
@ -113,8 +113,8 @@ The available access types are as follows:
 - ``exclude`` - when matched, results are excluded from the index, as if they do not exist. User will receive a 404.
 - ``block`` - when matched, results are not excluded from the index, but access to the actual content is blocked. User will see a 451.
- ``allow`` - full access to the index and the resource, but may be overriden by embargo
+- ``allow`` - full access to the index and the resource, but may be overriden by embargo.
- ``allow_ignore_embargo`` - full access to the index and resource, overriding any embargo settings
+- ``allow_ignore_embargo`` - full access to the index and resource, overriding any embargo settings.
 The difference between ``exclude`` and ``block`` is that when blocked, the user can be notified that access is blocked, while
 with exclude, no trace of the resource is presented to the user.
--- a/pywb/apps/rewriterapp.py
+++ b/pywb/apps/rewriterapp.py
@ -379,13 +379,11 @@ class RewriterApp(object):
                response = self.handle_query(environ, wb_url, kwargs, full_prefix)
            else:
-                # don't return top-frame response for timegate with exact redirects
+                response = self.handle_custom_response(environ, wb_url,
-                if not (is_timegate and redirect_to_exact):
+                                                       full_prefix, host_prefix,
-                    response = self.handle_custom_response(environ, wb_url,
+                                                       kwargs)
                                                           full_prefix, host_prefix,
                                                           kwargs)
-                    keep_frame_response = not kwargs.get('no_timegate_check') and is_timegate and not redirect_to_exact and not is_proxy
+                keep_frame_response = (not kwargs.get('no_timegate_check') and is_timegate and not is_proxy) or redirect_to_exact
        if response and not keep_frame_response:
@ -465,8 +463,12 @@ class RewriterApp(object):
            return self.send_redirect(new_path, url_parts, urlrewriter)
        # only redirect to exact if not live, otherwise set to false
        redirect_to_exact = redirect_to_exact and not cdx.get('is_live')
        # return top-frame timegate response, with timestamp from cdx
-        if response and keep_frame_response:
+        if response and keep_frame_response and (not redirect_to_exact or not is_timegate):
            no_except_close(r.raw)
            return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy, cdx['timestamp'])
@ -487,8 +489,8 @@ class RewriterApp(object):
        if target_uri != wb_url.url and cdx.get('is_fuzzy') == '1':
            set_content_loc = True
-        # if redirect to exact timestamp, bit only if not live
+        # if redirect to exact timestamp (only set if not live)
-        if redirect_to_exact and not cdx.get('is_live'):
+        if redirect_to_exact:
            if set_content_loc or is_timegate or wb_url.timestamp != cdx.get('timestamp'):
                new_url = urlrewriter.get_new_url(url=target_uri,
                                                  timestamp=cdx['timestamp'],
--- a/pywb/rules.yaml
+++ b/pywb/rules.yaml
@ -50,6 +50,13 @@ default_filters:
        - match: '[?&](\w*(bust|ts)\w*=1[\d]{12,15})(?=&|$)'
          replace: ''
        # remove facbook link ID when pywb urls are shared on facebook
        - match: '[?&](fbclid)=(.*)+(?=&|$)'
          replace: ''
 rules:
    # twitter rules
--- a/pywb/utils/test/test_binsearch.py
+++ b/pywb/utils/test/test_binsearch.py
@ -105,7 +105,7 @@ def test_rev_merge():
    # check reverse merge: verify merging of lists, than reversing
-    # eqauls merging with reverse=True of reversed lists
+    # equals merging with reverse=True of reversed lists
    assert (list(reversed(list(merge(lines1, lines2)))) ==
            list(merge(reversed(lines1), reversed(lines2), reverse=True)))
--- a/pywb/warcserver/index/test/test_indexsource.py
+++ b/pywb/warcserver/index/test/test_indexsource.py
@ -26,12 +26,12 @@ class TestIndexSources(FakeRedisTests, BaseTestClass):
        cls.all_sources = {
            'file': FileIndexSource(TEST_CDX_PATH + 'iana.cdxj'),
            'redis': RedisIndexSource('redis://localhost:6379/2/test:rediscdx'),
-            'remote_cdx': RemoteIndexSource('https://webenact.rhizome.org/all/cdx?url={url}',
+            'remote_cdx': RemoteIndexSource('https://webenact.rhizome.org/excellences-and-perfections/cdx?url={url}',
-                              'https://webenact.rhizome.org/all/{timestamp}id_/{url}'),
+                              'https://webenact.rhizome.org/excellences-and-perfections/{timestamp}id_/{url}'),
-            'memento': MementoIndexSource('https://webenact.rhizome.org/all/{url}',
+            'memento': MementoIndexSource('https://webenact.rhizome.org/excellences-and-perfections/{url}',
-                               'https://webenact.rhizome.org/all/timemap/link/{url}',
+                               'https://webenact.rhizome.org/excellences-and-perfections/timemap/link/{url}',
-                               'https://webenact.rhizome.org/all/{timestamp}id_/{url}')
+                               'https://webenact.rhizome.org/excellences-and-perfections/{timestamp}id_/{url}')
        }
    @pytest.fixture(params=local_sources)
@ -99,14 +99,10 @@ org,iana)/domains/root/servers 20140126201227 iana.warc.gz"""
        res, errs = self.query_single_source(remote_source, dict(url=url))
        expected = """\
-com,instagram)/amaliaulman 20141014150552 https://webenact.rhizome.org/all/20141014150552id_/http://instagram.com/amaliaulman
+com,instagram)/amaliaulman 20141014150552 https://webenact.rhizome.org/excellences-and-perfections/20141014150552id_/http://instagram.com/amaliaulman
-com,instagram)/amaliaulman 20141014152101 https://webenact.rhizome.org/all/20141014152101id_/http://instagram.com/amaliaulman
+com,instagram)/amaliaulman 20141014155217 https://webenact.rhizome.org/excellences-and-perfections/20141014155217id_/http://instagram.com/amaliaulman
-com,instagram)/amaliaulman 20141014155217 https://webenact.rhizome.org/all/20141014155217id_/http://instagram.com/amaliaulman
+com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/excellences-and-perfections/20141014162333id_/http://instagram.com/amaliaulman
-com,instagram)/amaliaulman 20141014160238 https://webenact.rhizome.org/all/20141014160238id_/http://instagram.com/amaliaulman
+com,instagram)/amaliaulman 20141014171636 https://webenact.rhizome.org/excellences-and-perfections/20141014171636id_/http://instagram.com/amaliaulman"""
 com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman
 com,instagram)/amaliaulman 20141014163116 https://webenact.rhizome.org/all/20141014163116id_/http://instagram.com/amaliaulman
 com,instagram)/amaliaulman 20141014171636 https://webenact.rhizome.org/all/20141014171636id_/http://instagram.com/amaliaulman
 com,instagram)/amaliaulman 20141014171954 https://webenact.rhizome.org/all/20141014171954id_/http://instagram.com/amaliaulman"""
        assert(key_ts_res(res, 'load_url') == expected)
        assert(errs == {})
@ -117,7 +113,7 @@ com,instagram)/amaliaulman 20141014171954 https://webenact.rhizome.org/all/20141
        res, errs = self.query_single_source(remote_source, dict(url=url, closest='20141014162332', limit=1, allowFuzzy='0'))
        expected = """\
-com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman"""
+com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/excellences-and-perfections/20141014162333id_/http://instagram.com/amaliaulman"""
        assert(key_ts_res(res, 'load_url') == expected)
        assert(errs == {})
@ -128,21 +124,21 @@ com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141
        res, errs = self.query_single_source(remote_source, dict(url=url, closest='20141014162332', limit=1))
        expected = """\
-com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman"""
+com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/excellences-and-perfections/20141014162333id_/http://instagram.com/amaliaulman"""
        assert(key_ts_res(res, 'load_url') == expected)
        assert(errs == {})
    # Url Match -- Wb Memento
    def test_remote_closest_wb_memento_loader(self):
-        replay = 'https://webenact.rhizome.org/all/{timestamp}id_/{url}'
+        replay = 'https://webenact.rhizome.org/excellences-and-perfections/{timestamp}id_/{url}'
        source = WBMementoIndexSource(replay, '', replay)
        url = 'http://instagram.com/amaliaulman'
        res, errs = self.query_single_source(source, dict(url=url, closest='20141014162332', limit=1))
        expected = """\
-com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman"""
+com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/excellences-and-perfections/20141014162333id_/http://instagram.com/amaliaulman"""
        assert(key_ts_res(res, 'load_url') == expected)
        assert(errs == {})
--- a/pywb/warcserver/resource/responseloader.py
+++ b/pywb/warcserver/resource/responseloader.py
@ -230,7 +230,7 @@ class WARCPathLoader(DefaultResolverMixin, BaseLoader):
                http_headers_buff = http_headers.to_bytes()
                # if new http_headers_buff is different length,
-                # attempt to adjust content-lenghth on the WARC record
+                # attempt to adjust content-length on the WARC record
                if orig_size and len(http_headers_buff) != orig_size:
                    orig_cl = payload.rec_headers.get_header('Content-Length')
                    if orig_cl:
--- a/pywb/warcserver/resource/test/test_pathresolvers.py
+++ b/pywb/warcserver/resource/test/test_pathresolvers.py
@ -161,7 +161,7 @@ class TestPathIndex(object):
        res = DefaultResolverMixin.make_best_resolver(a_file)
        assert isinstance(res, PathIndexResolver)
-        # a dir -- asume prefix
+        # a dir -- assume prefix
        res = DefaultResolverMixin.make_best_resolver(a_dir)
        assert isinstance(res, PrefixResolver)
--- a/tests/test_redirect_classic.py
+++ b/tests/test_redirect_classic.py
@ -74,6 +74,10 @@ class TestRedirectClassic(BaseConfigTest):
        resp = self.get('/live/{0}http://example.com/?test=test', fmod_slash)
        assert resp.status_int == 200
    def test_live_top_frame(self):
        resp = self.testapp.get('/live/http://example.com/?test=test')
        assert 'top_url' not in resp.text
    def test_replay_limit_cdx(self):
        resp = self.testapp.get('/pywb/cdx?url=http://www.iana.org/*&output=json')
        assert resp.content_type == 'text/x-ndjson'