1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

Merge branch 'main' into new-ui-work

This commit is contained in:
Ilya Kreymer 2021-12-13 19:20:25 -08:00
commit d83c92ef8a
14 changed files with 117 additions and 24 deletions

View File

@ -1,3 +1,23 @@
pywb 2.6.2 changelist
~~~~~~~~~~~~~~~~~~~~~
Fix regression caused by 2.6.1, with static files not being loaded correctly. `#678 <https://github.com/webrecorder/pywb/pull/678>`_
pywb 2.6.1 changelist
~~~~~~~~~~~~~~~~~~~~~
* Domain-Specific Rewriting Rules: Rewrite twitter video to capture full videos.
* Disable rewriting ``data-`` attributes, better fidelity without rewriting, fixes `#676 <https://github.com/webrecorder/pywb/pull/676>`_
* Fix regression in autoescaping URL in frame_insert.html
* Feature: ability to set path used to serve static assets (default ``static``) via ``static_prefix`` config option.
* Update wombat.js 3.3.4 (includes various rewriting fixes)
pywb 2.6.0 changelist
~~~~~~~~~~~~~~~~~~~~~

View File

@ -30,7 +30,7 @@ redirect_to_exact: true
# default_locale: en
# uncomment to set available locales
# locales:
# - en
# - es
locales:
- en
- fr

View File

@ -97,6 +97,8 @@ class RewriterApp(object):
self.enable_memento = self.config.get('enable_memento')
self.static_prefix = self.config.get('static_prefix', 'static')
csp_header = self.config.get('csp-header', self.DEFAULT_CSP)
if csp_header:
self.csp_header = ('Content-Security-Policy', csp_header)
@ -323,16 +325,15 @@ class RewriterApp(object):
rel_prefix = self.get_rel_prefix(environ)
full_prefix = host_prefix + rel_prefix
environ['pywb.host_prefix'] = host_prefix
pywb_static_prefix = host_prefix + environ.get('pywb.app_prefix', '') + environ.get(
'pywb.static_prefix', '/static/')
pywb_static_prefix = host_prefix + environ.get('pywb.app_prefix', '') + '/' + self.static_prefix
environ['pywb.static_prefix'] = pywb_static_prefix
pywb_static_prefix += '/'
is_proxy = ('wsgiprox.proxy_host' in environ)
# if OPTIONS in proxy mode, just generate the proxy responss
if is_proxy and self.is_preflight(environ):
return WbResponse.options_response(environ)
environ['pywb.host_prefix'] = host_prefix
if self.use_js_obj_proxy:
content_rw = self.js_proxy_rw
else:

View File

@ -331,13 +331,13 @@ are supported.
Some examples:
* Create "example.cdx" index from example.warc.gz
{0} ./cdx/example.cdx ./warcs/example.warc.gz
{0} --output ./cdx/example.cdx ./warcs/example.warc.gz
* Create "combined.cdx", a combined, sorted index of all warcs in ./warcs/
{0} --sort combined.cdx ./warcs/
{0} --sort --output combined.cdx ./warcs/
* Create a sorted cdx per file in ./cdx/ for each archive file in ./warcs/
{0} --sort ./cdx/ ./warcs/
{0} --sort --output ./cdx/ ./warcs/
""".format(os.path.basename(sys.argv[0]))
sort_help = """

View File

@ -416,12 +416,6 @@ class HTMLRewriterMixin(StreamingRewriter):
rw_mod = handler.get(attr_name)
attr_value = self._rewrite_url(attr_value, rw_mod)
# special case: data- attrs, conditional rewrite
elif attr_name and attr_value and attr_name.startswith('data-'):
if attr_value.startswith(self.DATA_RW_PROTOCOLS):
rw_mod = 'oe_'
attr_value = self._rewrite_url(attr_value, rw_mod)
# special case: base tag
elif (tag == 'base') and (attr_name == 'href') and attr_value:
rw_mod = handler.get(attr_name)
@ -469,7 +463,7 @@ class HTMLRewriterMixin(StreamingRewriter):
rw_mod = self.PRELOAD_TYPES.get(preload, rw_mod)
# for html imports with an optional as (google exclusive)
elif rel == 'import':
elif rel == 'import' or rel == 'alternate':
rw_mod = 'mp_'
elif rel == 'stylesheet':

View File

@ -86,3 +86,29 @@ def rewrite_fb_dash(string, *args):
string += json.dumps(best_ids)
return string
def rewrite_tw_dash(string, *args):
try:
best_variant = None
best_bitrate = 0
max_bitrate = 5000000
data = json.loads(string)
for variant in data["variants"]:
if variant["content_type"] != "video/mp4":
continue
bitrate = variant.get("bitrate")
if bitrate and bitrate > best_bitrate and bitrate <= max_bitrate:
best_variant = variant
best_bitrate = bitrate
if best_variant:
data["variants"] = [best_variant]
string = json.dumps(data)
except Exception as e:
print(e)
return string

View File

@ -322,7 +322,7 @@ class BaseInsertView(object):
kwargs.update(params)
kwargs['env'] = env
kwargs['static_prefix'] = env.get('pywb.host_prefix', '') + env.get('pywb.app_prefix', '') + '/static'
kwargs['static_prefix'] = env.get('pywb.static_prefix', '/static')
return template.render(**kwargs)

View File

@ -138,9 +138,9 @@ r"""
>>> parse('<meta http-equiv="Content-Security-Policy" content="default-src http://example.com" />')
<meta http-equiv="Content-Security-Policy" _content="default-src http://example.com"/>
# Custom -data attribs
# Don't rewrite Custom -data attribs
>>> parse('<div data-url="http://example.com/a/b/c.html" data-some-other-value="http://example.com/img.gif">')
<div data-url="/web/20131226101010oe_/http://example.com/a/b/c.html" data-some-other-value="/web/20131226101010oe_/http://example.com/img.gif">
<div data-url="http://example.com/a/b/c.html" data-some-other-value="http://example.com/img.gif">
# param tag -- rewrite conditionally if url
>>> parse('<param value="http://example.com/"/>')

View File

@ -67,6 +67,16 @@ rules:
- url_prefix: 'com,twitter)/i/videos/tweet'
fuzzy_lookup: '()'
- url_prefix: ['com,twitter,api)/2/', 'com,twitter)/i/api/2/', 'com,twitter)/i/api/graphql/']
rewrite:
js_regexs:
- match: 'video_info":(.*?}]})'
group: 1
function: 'pywb.rewrite.rewrite_dash:rewrite_tw_dash'
# facebook rules

41
pywb/static/css/base.css Normal file
View File

@ -0,0 +1,41 @@
header {
display: flex;
display: -webkit-box;
display: -moz-box;
display: -webkit-flex;
display: -ms-flexbox;
justify-content: space-between;
-webkit-box-pack: justify;
-moz-box-pack: justify;
-ms-flex-pack: justify;
}
header .language-select {
position: absolute;
top: 10px;
right: 10px;
}
header .language-select ul {
display: inline-block;
list-style-type: none;
margin: 0;
padding: 0;
}
header .language-select ul li {
display: inline-block;
}
header .language-select ul li:not(:last-child):after {
content: ' / ';
}
header .language-select a:link,
header .language-select a:visited,
header .language-select a:active {
text-decoration: none;
}
header .language-select a:hover {
text-decoration: underline;
}

File diff suppressed because one or more lines are too long

View File

@ -18,8 +18,6 @@ html, body
{{ banner_html }}
{% endautoescape %}
</head>
<body style="margin: 0px; padding: 0px;">
@ -35,3 +33,5 @@ html, body
</script>
</body>
</html>
{% endautoescape %}

View File

@ -111,7 +111,7 @@ class MementoOverrideTests(object):
# Load expected link headers
MementoOverrideTests.link_header_data = None
with open(to_path(get_test_dir() + '/text_content/link_headers.yaml')) as fh:
MementoOverrideTests.link_header_data = yaml.load(fh)
MementoOverrideTests.link_header_data = yaml.load(fh, Loader=yaml.Loader)
MementoOverrideTests.orig_get_timegate_links = MementoIndexSource.get_timegate_links

View File

@ -17,6 +17,7 @@ class TestWbIntegration(BaseConfigTest):
def test_pywb_root(self):
resp = self.testapp.get('/pywb/')
self._assert_basic_html(resp)
assert '<link rel="stylesheet" href="/static/css/base.css"' in resp.text
assert 'Search' in resp.text
def test_pywb_root_head(self):