1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

improved the rewrite_html_full check in wombat: (#398)

- FullHTMLRegex: performs a case insensitive check for <html, <body, <head and <!doctype html>

updated rewrite_elem to:
- rewrite meta tags that deliever csp policies
- check for additional attributes that could contain un-rewritten URLs (form.style, iframe.style)

Made check for full html into regex
This commit is contained in:
John Berlin 2018-10-23 18:36:04 -04:00 committed by Ilya Kreymer
parent 82f2dace64
commit cb8b269539

View File

@ -137,6 +137,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
var STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/gi; var STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/gi;
var IMPORT_REGEX = /(@import\s+[\\"']*)([^)'";]+)([\\"']*\s*;?)/gi; var IMPORT_REGEX = /(@import\s+[\\"']*)([^)'";]+)([\\"']*\s*;?)/gi;
var SRCSET_REGEX = /\s*(\S*\s+[\d\.]+[wx]),|(?:\s*,(?:\s+|(?=https?:)))/; var SRCSET_REGEX = /\s*(\S*\s+[\d\.]+[wx]),|(?:\s*,(?:\s+|(?=https?:)))/;
var FullHTMLRegex = /^\s*<(?:html|head|body|!doctype html)/i;
function rwModForElement(elem, attrName) { function rwModForElement(elem, attrName) {
// this function was created to help add in retrial of element attribute rewrite modifiers // this function was created to help add in retrial of element attribute rewrite modifiers
@ -1767,12 +1768,19 @@ var _WBWombat = function($wbwindow, wbinfo) {
return; return;
} }
var changed; var changed = false;
// we use a switch now cause perf and complexity // we use a switch now cause perf and complexity
switch (elem.tagName) { switch (elem.tagName) {
case 'META':
var maybeCSP = wb_getAttribute.call(elem, 'http-equiv');
if (maybeCSP && maybeCSP.toLowerCase() === 'content-security-policy') {
wb_setAttribute.call(elem, 'http-equiv', '_' + maybeCSP);
changed = true;
}
break;
case 'STYLE': case 'STYLE':
var new_content = rewrite_style(elem.textContent); var new_content = rewrite_style(elem.textContent);
if (elem.textContent !== new_content) { if (elem.textContent != new_content) {
elem.textContent = new_content; elem.textContent = new_content;
changed = true; changed = true;
if (wbUseAFWorker && elem.sheet != null) { if (wbUseAFWorker && elem.sheet != null) {
@ -1799,10 +1807,12 @@ var _WBWombat = function($wbwindow, wbinfo) {
break; break;
case 'FORM': case 'FORM':
changed = rewrite_attr(elem, "action", true); changed = rewrite_attr(elem, "action", true);
changed = rewrite_attr(elem, 'style') || changed;
break; break;
case 'IFRAME': case 'IFRAME':
case 'FRAME': case 'FRAME':
changed = rewrite_frame_src(elem, "src"); changed = rewrite_frame_src(elem, "src");
changed = rewrite_attr(elem, 'style') || changed;
break; break;
case 'SCRIPT': case 'SCRIPT':
changed = rewrite_script(elem); changed = rewrite_script(elem);
@ -1813,6 +1823,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
default: default:
if (elem instanceof SVGElement && elem.hasAttribute('filter')) { if (elem instanceof SVGElement && elem.hasAttribute('filter')) {
changed = rewrite_attr(elem, 'filter'); changed = rewrite_attr(elem, 'filter');
changed = rewrite_attr(elem, 'style') || changed;
} else { } else {
changed = rewrite_attr(elem, 'src'); changed = rewrite_attr(elem, 'src');
changed = rewrite_attr(elem, 'srcset') || changed; changed = rewrite_attr(elem, 'srcset') || changed;
@ -1839,7 +1850,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
var write_buff = ""; var write_buff = "";
//============================================ //===========================================
function rewrite_html(string, check_end_tag) { function rewrite_html(string, check_end_tag) {
if (!string) { if (!string) {
return string; return string;
@ -1859,7 +1870,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
string = string.replace(/((id|class)=".*)WB_wombat_([^"]+)/, '$1$3'); string = string.replace(/((id|class)=".*)WB_wombat_([^"]+)/, '$1$3');
} }
if (!$wbwindow.HTMLTemplateElement || starts_with(string, ["<html", "<head", "<body"])) { if (!$wbwindow.HTMLTemplateElement || FullHTMLRegex.test(string)) {
return rewrite_html_full(string, check_end_tag); return rewrite_html_full(string, check_end_tag);
} }