diff --git a/CHANGES.rst b/CHANGES.rst index 020c35ba..496500a6 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,15 @@ +pywb 2.6.4 changelist +~~~~~~~~~~~~~~~~~~~~~ + +* wombat.js: actually update to 3.3.6, update built wombat.js + +* Fix live mode when ``redirect_to_exact`` is enabled `#692 `_ + +* Rules: additional fuzzy ignore of facebook query param: `#691 `_ + +* Docs: typo fixes: `#669 `_, `#670 `_ + + pywb 2.6.3 changelist ~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/manual/access-control.rst b/docs/manual/access-control.rst index 8b1c5763..03727249 100644 --- a/docs/manual/access-control.rst +++ b/docs/manual/access-control.rst @@ -113,8 +113,8 @@ The available access types are as follows: - ``exclude`` - when matched, results are excluded from the index, as if they do not exist. User will receive a 404. - ``block`` - when matched, results are not excluded from the index, but access to the actual content is blocked. User will see a 451. -- ``allow`` - full access to the index and the resource, but may be overriden by embargo -- ``allow_ignore_embargo`` - full access to the index and resource, overriding any embargo settings +- ``allow`` - full access to the index and the resource, but may be overriden by embargo. +- ``allow_ignore_embargo`` - full access to the index and resource, overriding any embargo settings. The difference between ``exclude`` and ``block`` is that when blocked, the user can be notified that access is blocked, while with exclude, no trace of the resource is presented to the user. diff --git a/pywb/apps/rewriterapp.py b/pywb/apps/rewriterapp.py index b334610f..63f16124 100644 --- a/pywb/apps/rewriterapp.py +++ b/pywb/apps/rewriterapp.py @@ -379,13 +379,11 @@ class RewriterApp(object): response = self.handle_query(environ, wb_url, kwargs, full_prefix) else: - # don't return top-frame response for timegate with exact redirects - if not (is_timegate and redirect_to_exact): - response = self.handle_custom_response(environ, wb_url, - full_prefix, host_prefix, - kwargs) + response = self.handle_custom_response(environ, wb_url, + full_prefix, host_prefix, + kwargs) - keep_frame_response = not kwargs.get('no_timegate_check') and is_timegate and not redirect_to_exact and not is_proxy + keep_frame_response = (not kwargs.get('no_timegate_check') and is_timegate and not is_proxy) or redirect_to_exact if response and not keep_frame_response: @@ -465,8 +463,12 @@ class RewriterApp(object): return self.send_redirect(new_path, url_parts, urlrewriter) + + # only redirect to exact if not live, otherwise set to false + redirect_to_exact = redirect_to_exact and not cdx.get('is_live') + # return top-frame timegate response, with timestamp from cdx - if response and keep_frame_response: + if response and keep_frame_response and (not redirect_to_exact or not is_timegate): no_except_close(r.raw) return self.format_response(response, wb_url, full_prefix, is_timegate, is_proxy, cdx['timestamp']) @@ -487,8 +489,8 @@ class RewriterApp(object): if target_uri != wb_url.url and cdx.get('is_fuzzy') == '1': set_content_loc = True - # if redirect to exact timestamp, bit only if not live - if redirect_to_exact and not cdx.get('is_live'): + # if redirect to exact timestamp (only set if not live) + if redirect_to_exact: if set_content_loc or is_timegate or wb_url.timestamp != cdx.get('timestamp'): new_url = urlrewriter.get_new_url(url=target_uri, timestamp=cdx['timestamp'], diff --git a/pywb/rules.yaml b/pywb/rules.yaml index a02ea151..c421c9a4 100644 --- a/pywb/rules.yaml +++ b/pywb/rules.yaml @@ -50,6 +50,13 @@ default_filters: - match: '[?&](\w*(bust|ts)\w*=1[\d]{12,15})(?=&|$)' replace: '' + # remove facbook link ID when pywb urls are shared on facebook + - match: '[?&](fbclid)=(.*)+(?=&|$)' + replace: '' + + + + rules: # twitter rules diff --git a/pywb/utils/test/test_binsearch.py b/pywb/utils/test/test_binsearch.py index 18e1cb19..01712dfc 100644 --- a/pywb/utils/test/test_binsearch.py +++ b/pywb/utils/test/test_binsearch.py @@ -105,7 +105,7 @@ def test_rev_merge(): # check reverse merge: verify merging of lists, than reversing - # eqauls merging with reverse=True of reversed lists + # equals merging with reverse=True of reversed lists assert (list(reversed(list(merge(lines1, lines2)))) == list(merge(reversed(lines1), reversed(lines2), reverse=True))) diff --git a/pywb/warcserver/index/test/test_indexsource.py b/pywb/warcserver/index/test/test_indexsource.py index 1eeb14ba..161ecf1a 100644 --- a/pywb/warcserver/index/test/test_indexsource.py +++ b/pywb/warcserver/index/test/test_indexsource.py @@ -26,12 +26,12 @@ class TestIndexSources(FakeRedisTests, BaseTestClass): cls.all_sources = { 'file': FileIndexSource(TEST_CDX_PATH + 'iana.cdxj'), 'redis': RedisIndexSource('redis://localhost:6379/2/test:rediscdx'), - 'remote_cdx': RemoteIndexSource('https://webenact.rhizome.org/all/cdx?url={url}', - 'https://webenact.rhizome.org/all/{timestamp}id_/{url}'), + 'remote_cdx': RemoteIndexSource('https://webenact.rhizome.org/excellences-and-perfections/cdx?url={url}', + 'https://webenact.rhizome.org/excellences-and-perfections/{timestamp}id_/{url}'), - 'memento': MementoIndexSource('https://webenact.rhizome.org/all/{url}', - 'https://webenact.rhizome.org/all/timemap/link/{url}', - 'https://webenact.rhizome.org/all/{timestamp}id_/{url}') + 'memento': MementoIndexSource('https://webenact.rhizome.org/excellences-and-perfections/{url}', + 'https://webenact.rhizome.org/excellences-and-perfections/timemap/link/{url}', + 'https://webenact.rhizome.org/excellences-and-perfections/{timestamp}id_/{url}') } @pytest.fixture(params=local_sources) @@ -99,14 +99,10 @@ org,iana)/domains/root/servers 20140126201227 iana.warc.gz""" res, errs = self.query_single_source(remote_source, dict(url=url)) expected = """\ -com,instagram)/amaliaulman 20141014150552 https://webenact.rhizome.org/all/20141014150552id_/http://instagram.com/amaliaulman -com,instagram)/amaliaulman 20141014152101 https://webenact.rhizome.org/all/20141014152101id_/http://instagram.com/amaliaulman -com,instagram)/amaliaulman 20141014155217 https://webenact.rhizome.org/all/20141014155217id_/http://instagram.com/amaliaulman -com,instagram)/amaliaulman 20141014160238 https://webenact.rhizome.org/all/20141014160238id_/http://instagram.com/amaliaulman -com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman -com,instagram)/amaliaulman 20141014163116 https://webenact.rhizome.org/all/20141014163116id_/http://instagram.com/amaliaulman -com,instagram)/amaliaulman 20141014171636 https://webenact.rhizome.org/all/20141014171636id_/http://instagram.com/amaliaulman -com,instagram)/amaliaulman 20141014171954 https://webenact.rhizome.org/all/20141014171954id_/http://instagram.com/amaliaulman""" +com,instagram)/amaliaulman 20141014150552 https://webenact.rhizome.org/excellences-and-perfections/20141014150552id_/http://instagram.com/amaliaulman +com,instagram)/amaliaulman 20141014155217 https://webenact.rhizome.org/excellences-and-perfections/20141014155217id_/http://instagram.com/amaliaulman +com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/excellences-and-perfections/20141014162333id_/http://instagram.com/amaliaulman +com,instagram)/amaliaulman 20141014171636 https://webenact.rhizome.org/excellences-and-perfections/20141014171636id_/http://instagram.com/amaliaulman""" assert(key_ts_res(res, 'load_url') == expected) assert(errs == {}) @@ -117,7 +113,7 @@ com,instagram)/amaliaulman 20141014171954 https://webenact.rhizome.org/all/20141 res, errs = self.query_single_source(remote_source, dict(url=url, closest='20141014162332', limit=1, allowFuzzy='0')) expected = """\ -com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman""" +com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/excellences-and-perfections/20141014162333id_/http://instagram.com/amaliaulman""" assert(key_ts_res(res, 'load_url') == expected) assert(errs == {}) @@ -128,21 +124,21 @@ com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141 res, errs = self.query_single_source(remote_source, dict(url=url, closest='20141014162332', limit=1)) expected = """\ -com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman""" +com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/excellences-and-perfections/20141014162333id_/http://instagram.com/amaliaulman""" assert(key_ts_res(res, 'load_url') == expected) assert(errs == {}) # Url Match -- Wb Memento def test_remote_closest_wb_memento_loader(self): - replay = 'https://webenact.rhizome.org/all/{timestamp}id_/{url}' + replay = 'https://webenact.rhizome.org/excellences-and-perfections/{timestamp}id_/{url}' source = WBMementoIndexSource(replay, '', replay) url = 'http://instagram.com/amaliaulman' res, errs = self.query_single_source(source, dict(url=url, closest='20141014162332', limit=1)) expected = """\ -com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/all/20141014162333id_/http://instagram.com/amaliaulman""" +com,instagram)/amaliaulman 20141014162333 https://webenact.rhizome.org/excellences-and-perfections/20141014162333id_/http://instagram.com/amaliaulman""" assert(key_ts_res(res, 'load_url') == expected) assert(errs == {}) diff --git a/pywb/warcserver/resource/responseloader.py b/pywb/warcserver/resource/responseloader.py index c576d4b7..4f5c4440 100644 --- a/pywb/warcserver/resource/responseloader.py +++ b/pywb/warcserver/resource/responseloader.py @@ -230,7 +230,7 @@ class WARCPathLoader(DefaultResolverMixin, BaseLoader): http_headers_buff = http_headers.to_bytes() # if new http_headers_buff is different length, - # attempt to adjust content-lenghth on the WARC record + # attempt to adjust content-length on the WARC record if orig_size and len(http_headers_buff) != orig_size: orig_cl = payload.rec_headers.get_header('Content-Length') if orig_cl: diff --git a/pywb/warcserver/resource/test/test_pathresolvers.py b/pywb/warcserver/resource/test/test_pathresolvers.py index a9ab6747..c28fe2ea 100644 --- a/pywb/warcserver/resource/test/test_pathresolvers.py +++ b/pywb/warcserver/resource/test/test_pathresolvers.py @@ -161,7 +161,7 @@ class TestPathIndex(object): res = DefaultResolverMixin.make_best_resolver(a_file) assert isinstance(res, PathIndexResolver) - # a dir -- asume prefix + # a dir -- assume prefix res = DefaultResolverMixin.make_best_resolver(a_dir) assert isinstance(res, PrefixResolver) diff --git a/tests/test_redirect_classic.py b/tests/test_redirect_classic.py index 3eea7a0b..8aecbe82 100644 --- a/tests/test_redirect_classic.py +++ b/tests/test_redirect_classic.py @@ -74,6 +74,10 @@ class TestRedirectClassic(BaseConfigTest): resp = self.get('/live/{0}http://example.com/?test=test', fmod_slash) assert resp.status_int == 200 + def test_live_top_frame(self): + resp = self.testapp.get('/live/http://example.com/?test=test') + assert 'top_url' not in resp.text + def test_replay_limit_cdx(self): resp = self.testapp.get('/pywb/cdx?url=http://www.iana.org/*&output=json') assert resp.content_type == 'text/x-ndjson'