1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

ServiceWorker Rewrite Improvements (#339)

* service worker rewrite work:
- use sw_ modifier to add Server-Worker-Allowed: <domain root>
- force scope if none set to domain url
- resolve sw url to absolute url

* wombat: don't reinit wombat paths if already inited (eg. from imported documents)

* service-worker rewrite test: add test to verify sw rewrite is identity, Service-Worker-Allowed header is added
This commit is contained in:
Ilya Kreymer 2018-05-31 08:57:51 -07:00 committed by GitHub
parent bd329aaa76
commit dc1982784e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 29 additions and 6 deletions

View File

@ -381,6 +381,9 @@ class RewriteInfo(object):
def _resolve_text_type(self, text_type):
mod = self.url_rewriter.wburl.mod
if mod == 'sw_':
return None
if text_type == 'css' and mod == 'js_':
text_type = 'css'
@ -446,7 +449,7 @@ class RewriteInfo(object):
return True
def is_url_rw(self):
if self.url_rewriter.wburl.mod in ('id_', 'bn_'):
if self.url_rewriter.wburl.mod in ('id_', 'bn_', 'sw_'):
return False
return True

View File

@ -1,6 +1,7 @@
from warcio.statusandheaders import StatusAndHeaders
from warcio.timeutils import datetime_to_http_date
from datetime import datetime, timedelta
from six.moves.urllib.parse import urlsplit
#=============================================================================
@ -94,6 +95,12 @@ class DefaultHeaderRewriter(object):
else:
new_headers_list.append(new_header)
if self.rwinfo.url_rewriter.wburl.mod == 'sw_':
parts = urlsplit(self.rwinfo.url_rewriter.wburl.url)
new_url = parts.scheme + '://' + parts.netloc + '/'
rw_origin = self.rwinfo.url_rewriter.rewrite(new_url, mod='mp_')
new_headers_list.append(('Service-Worker-Allowed', rw_origin))
return StatusAndHeaders(self.http_headers.statusline,
headers=new_headers_list,
protocol=self.http_headers.protocol)

View File

@ -143,6 +143,18 @@ class TestContentRewriter(object):
exp = 'function() { WB_wombat_location.href = "http://example.com/"; }'
assert b''.join(gen).decode('utf-8') == exp
def test_rewrite_sw_add_headers(self):
headers = {'Content-Type': 'application/x-javascript'}
content = 'function() { location.href = "http://example.com/"; }'
headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701sw_')
assert ('Content-Type', 'application/x-javascript') in headers.headers
assert ('Service-Worker-Allowed', 'http://localhost:8080/prefix/201701mp_/http://example.com/') in headers.headers
exp = 'function() { location.href = "http://example.com/"; }'
assert b''.join(gen).decode('utf-8') == exp
def test_banner_only_no_cookie_rewrite(self):
headers = {'Set-Cookie': 'foo=bar; Expires=Wed, 13 Jan 2021 22:23:01 GMT; Path=/',
'Content-Type': 'text/javascript'}

View File

@ -1361,9 +1361,12 @@ var _WBWombat = function($wbwindow, wbinfo) {
var orig_register = $wbwindow.ServiceWorkerContainer.prototype.register;
$wbwindow.ServiceWorkerContainer.prototype.register = function(scriptURL, options) {
scriptURL = rewrite_url(scriptURL, false, "id_");
scriptURL = new URL(scriptURL, $wbwindow.document.baseURI).href;
scriptURL = rewrite_url(scriptURL, false, "sw_");
if (options && options.scope) {
options.scope = rewrite_url(options.scope, false, "id_");
options.scope = rewrite_url(options.scope, false, "mp_");
} else {
options = {scope: rewrite_url("/", false, "mp_")};
}
return orig_register.call(this, scriptURL, options);
}

View File

@ -38,9 +38,7 @@
if (window && window._WBWombat && !window._wb_wombat) {
window._wb_wombat = new _WBWombat(window, wbinfo);
} else if (window._wb_wombat) {
window._wb_wombat.init_paths(wbinfo);
} else {
} else if (!window._wb_wombat) {
console.warn("_wb_wombat missing!");
}
</script>