1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

2.6.1 Release Work (#679)

* rules: add custom twitter video rewriting to capture non-chunked twitter video (max bitrate of 5000000)

* autoescaping regression fix: don't escape URL in frame_insert.html, use as is

* html rewriting:
- don't rewrite 'data-' attributes, no longer necessary for best fidelity
- do rewrite <link rel='alternate'> as main page (mp_)
- update html rewriting test

* feature: support customizing the static path used in pywb via 'static_prefix' config option (defaults to 'static')

* update to latest wombat (3.3.4)

* bump to 2.6.1, update CHANGES for 2.6.1
This commit is contained in:
Ilya Kreymer 2021-11-11 22:30:54 -08:00 committed by GitHub
parent 96de80f83e
commit a6be76642a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 65 additions and 18 deletions

View File

@ -1,3 +1,17 @@
pywb 2.6.1 changelist
~~~~~~~~~~~~~~~~~~~~~
* Domain-Specific Rewriting Rules: Rewrite twitter video to capture full videos.
* Disable rewriting ``data-`` attributes, better fidelity without rewriting, fixes `#676 <https://github.com/webrecorder/pywb/pull/676>`_
* Fix regression in autoescaping URL in frame_insert.html
* Feature: ability to set path used to serve static assets (default ``static``) via ``static_prefix`` config option.
* Update wombat.js 3.3.4 (includes various rewriting fixes)
pywb 2.6.0 changelist
~~~~~~~~~~~~~~~~~~~~~

View File

@ -97,6 +97,8 @@ class RewriterApp(object):
self.enable_memento = self.config.get('enable_memento')
self.static_prefix = self.config.get('static_prefix', 'static')
csp_header = self.config.get('csp-header', self.DEFAULT_CSP)
if csp_header:
self.csp_header = ('Content-Security-Policy', csp_header)
@ -323,8 +325,9 @@ class RewriterApp(object):
rel_prefix = self.get_rel_prefix(environ)
full_prefix = host_prefix + rel_prefix
environ['pywb.host_prefix'] = host_prefix
pywb_static_prefix = host_prefix + environ.get('pywb.app_prefix', '') + environ.get(
'pywb.static_prefix', '/static/')
pywb_static_prefix = host_prefix + environ.get('pywb.app_prefix', '') + '/' + self.static_prefix
environ['pywb.static_prefix'] = pywb_static_prefix
pywb_static_prefix += '/'
is_proxy = ('wsgiprox.proxy_host' in environ)
# if OPTIONS in proxy mode, just generate the proxy responss

View File

@ -416,12 +416,6 @@ class HTMLRewriterMixin(StreamingRewriter):
rw_mod = handler.get(attr_name)
attr_value = self._rewrite_url(attr_value, rw_mod)
# special case: data- attrs, conditional rewrite
elif attr_name and attr_value and attr_name.startswith('data-'):
if attr_value.startswith(self.DATA_RW_PROTOCOLS):
rw_mod = 'oe_'
attr_value = self._rewrite_url(attr_value, rw_mod)
# special case: base tag
elif (tag == 'base') and (attr_name == 'href') and attr_value:
rw_mod = handler.get(attr_name)
@ -469,7 +463,7 @@ class HTMLRewriterMixin(StreamingRewriter):
rw_mod = self.PRELOAD_TYPES.get(preload, rw_mod)
# for html imports with an optional as (google exclusive)
elif rel == 'import':
elif rel == 'import' or rel == 'alternate':
rw_mod = 'mp_'
elif rel == 'stylesheet':

View File

@ -86,3 +86,29 @@ def rewrite_fb_dash(string, *args):
string += json.dumps(best_ids)
return string
def rewrite_tw_dash(string, *args):
try:
best_variant = None
best_bitrate = 0
max_bitrate = 5000000
data = json.loads(string)
for variant in data["variants"]:
if variant["content_type"] != "video/mp4":
continue
bitrate = variant.get("bitrate")
if bitrate and bitrate > best_bitrate and bitrate <= max_bitrate:
best_variant = variant
best_bitrate = bitrate
if best_variant:
data["variants"] = [best_variant]
string = json.dumps(data)
except Exception as e:
print(e)
return string

View File

@ -322,7 +322,7 @@ class BaseInsertView(object):
kwargs.update(params)
kwargs['env'] = env
kwargs['static_prefix'] = env.get('pywb.host_prefix', '') + env.get('pywb.app_prefix', '') + '/static'
kwargs['static_prefix'] = env.get('pywb.static_prefix')
return template.render(**kwargs)

View File

@ -138,9 +138,9 @@ r"""
>>> parse('<meta http-equiv="Content-Security-Policy" content="default-src http://example.com" />')
<meta http-equiv="Content-Security-Policy" _content="default-src http://example.com"/>
# Custom -data attribs
# Don't rewrite Custom -data attribs
>>> parse('<div data-url="http://example.com/a/b/c.html" data-some-other-value="http://example.com/img.gif">')
<div data-url="/web/20131226101010oe_/http://example.com/a/b/c.html" data-some-other-value="/web/20131226101010oe_/http://example.com/img.gif">
<div data-url="http://example.com/a/b/c.html" data-some-other-value="http://example.com/img.gif">
# param tag -- rewrite conditionally if url
>>> parse('<param value="http://example.com/"/>')

View File

@ -68,6 +68,16 @@ rules:
fuzzy_lookup: '()'
- url_prefix: ['com,twitter,api)/2/', 'com,twitter)/i/api/2/', 'com,twitter)/i/api/graphql/']
rewrite:
js_regexs:
- match: 'video_info":(.*?}]})'
group: 1
function: 'pywb.rewrite.rewrite_dash:rewrite_tw_dash'
# facebook rules
#=================================================================

File diff suppressed because one or more lines are too long

View File

@ -18,8 +18,6 @@ html, body
{{ banner_html }}
{% endautoescape %}
</head>
<body style="margin: 0px; padding: 0px;">
@ -35,3 +33,5 @@ html, body
</script>
</body>
</html>
{% endautoescape %}

View File

@ -1,4 +1,4 @@
__version__ = '2.6.0'
__version__ = '2.6.1'
if __name__ == '__main__':
print(__version__)

View File

@ -111,7 +111,7 @@ class MementoOverrideTests(object):
# Load expected link headers
MementoOverrideTests.link_header_data = None
with open(to_path(get_test_dir() + '/text_content/link_headers.yaml')) as fh:
MementoOverrideTests.link_header_data = yaml.load(fh)
MementoOverrideTests.link_header_data = yaml.load(fh, Loader=yaml.Loader)
MementoOverrideTests.orig_get_timegate_links = MementoIndexSource.get_timegate_links

2
wombat

@ -1 +1 @@
Subproject commit 4edfa768fd20195644e6bff96c52aa0f50baa21f
Subproject commit e0567d5f7bbecd1687d46ee2c7883c0bfd39a6c2