mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
improved the rewrite_html_full check in wombat: (#398)
- FullHTMLRegex: performs a case insensitive check for <html, <body, <head and <!doctype html> updated rewrite_elem to: - rewrite meta tags that deliever csp policies - check for additional attributes that could contain un-rewritten URLs (form.style, iframe.style) Made check for full html into regex
This commit is contained in:
parent
82f2dace64
commit
cb8b269539
@ -137,6 +137,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
var STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/gi;
|
var STYLE_REGEX = /(url\s*\(\s*[\\"']*)([^)'"]+)([\\"']*\s*\))/gi;
|
||||||
var IMPORT_REGEX = /(@import\s+[\\"']*)([^)'";]+)([\\"']*\s*;?)/gi;
|
var IMPORT_REGEX = /(@import\s+[\\"']*)([^)'";]+)([\\"']*\s*;?)/gi;
|
||||||
var SRCSET_REGEX = /\s*(\S*\s+[\d\.]+[wx]),|(?:\s*,(?:\s+|(?=https?:)))/;
|
var SRCSET_REGEX = /\s*(\S*\s+[\d\.]+[wx]),|(?:\s*,(?:\s+|(?=https?:)))/;
|
||||||
|
var FullHTMLRegex = /^\s*<(?:html|head|body|!doctype html)/i;
|
||||||
|
|
||||||
function rwModForElement(elem, attrName) {
|
function rwModForElement(elem, attrName) {
|
||||||
// this function was created to help add in retrial of element attribute rewrite modifiers
|
// this function was created to help add in retrial of element attribute rewrite modifiers
|
||||||
@ -1767,12 +1768,19 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
var changed;
|
var changed = false;
|
||||||
// we use a switch now cause perf and complexity
|
// we use a switch now cause perf and complexity
|
||||||
switch (elem.tagName) {
|
switch (elem.tagName) {
|
||||||
|
case 'META':
|
||||||
|
var maybeCSP = wb_getAttribute.call(elem, 'http-equiv');
|
||||||
|
if (maybeCSP && maybeCSP.toLowerCase() === 'content-security-policy') {
|
||||||
|
wb_setAttribute.call(elem, 'http-equiv', '_' + maybeCSP);
|
||||||
|
changed = true;
|
||||||
|
}
|
||||||
|
break;
|
||||||
case 'STYLE':
|
case 'STYLE':
|
||||||
var new_content = rewrite_style(elem.textContent);
|
var new_content = rewrite_style(elem.textContent);
|
||||||
if (elem.textContent !== new_content) {
|
if (elem.textContent != new_content) {
|
||||||
elem.textContent = new_content;
|
elem.textContent = new_content;
|
||||||
changed = true;
|
changed = true;
|
||||||
if (wbUseAFWorker && elem.sheet != null) {
|
if (wbUseAFWorker && elem.sheet != null) {
|
||||||
@ -1799,10 +1807,12 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
break;
|
break;
|
||||||
case 'FORM':
|
case 'FORM':
|
||||||
changed = rewrite_attr(elem, "action", true);
|
changed = rewrite_attr(elem, "action", true);
|
||||||
|
changed = rewrite_attr(elem, 'style') || changed;
|
||||||
break;
|
break;
|
||||||
case 'IFRAME':
|
case 'IFRAME':
|
||||||
case 'FRAME':
|
case 'FRAME':
|
||||||
changed = rewrite_frame_src(elem, "src");
|
changed = rewrite_frame_src(elem, "src");
|
||||||
|
changed = rewrite_attr(elem, 'style') || changed;
|
||||||
break;
|
break;
|
||||||
case 'SCRIPT':
|
case 'SCRIPT':
|
||||||
changed = rewrite_script(elem);
|
changed = rewrite_script(elem);
|
||||||
@ -1813,6 +1823,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
default:
|
default:
|
||||||
if (elem instanceof SVGElement && elem.hasAttribute('filter')) {
|
if (elem instanceof SVGElement && elem.hasAttribute('filter')) {
|
||||||
changed = rewrite_attr(elem, 'filter');
|
changed = rewrite_attr(elem, 'filter');
|
||||||
|
changed = rewrite_attr(elem, 'style') || changed;
|
||||||
} else {
|
} else {
|
||||||
changed = rewrite_attr(elem, 'src');
|
changed = rewrite_attr(elem, 'src');
|
||||||
changed = rewrite_attr(elem, 'srcset') || changed;
|
changed = rewrite_attr(elem, 'srcset') || changed;
|
||||||
@ -1839,7 +1850,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
|
|
||||||
var write_buff = "";
|
var write_buff = "";
|
||||||
|
|
||||||
//============================================
|
//===========================================
|
||||||
function rewrite_html(string, check_end_tag) {
|
function rewrite_html(string, check_end_tag) {
|
||||||
if (!string) {
|
if (!string) {
|
||||||
return string;
|
return string;
|
||||||
@ -1859,7 +1870,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
|||||||
string = string.replace(/((id|class)=".*)WB_wombat_([^"]+)/, '$1$3');
|
string = string.replace(/((id|class)=".*)WB_wombat_([^"]+)/, '$1$3');
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!$wbwindow.HTMLTemplateElement || starts_with(string, ["<html", "<head", "<body"])) {
|
if (!$wbwindow.HTMLTemplateElement || FullHTMLRegex.test(string)) {
|
||||||
return rewrite_html_full(string, check_end_tag);
|
return rewrite_html_full(string, check_end_tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user