diff --git a/pywb/rewrite/content_rewriter.py b/pywb/rewrite/content_rewriter.py
index 7594efdf..8b8d0593 100644
--- a/pywb/rewrite/content_rewriter.py
+++ b/pywb/rewrite/content_rewriter.py
@@ -20,6 +20,13 @@ class BaseContentRewriter(object):
TITLE = re.compile(r'<\s*title\s*>(.*)<\s*\/\s*title\s*>', re.M | re.I | re.S)
+ # set via html_rewriter since it overrides the default one
+ html_unescape = None
+
+ @classmethod
+ def set_unescape(cls, unescape):
+ cls.html_unescape = unescape
+
@classmethod
def _extract_title(cls, gen):
title_res = list(gen)
@@ -31,7 +38,13 @@ class BaseContentRewriter(object):
return
title_res = m.group(1)
- return title_res.strip()
+ title_res = title_res.strip()
+ try:
+ title_res = cls.html_unescape(title_res)
+ except Exception as e:
+ pass
+
+ return title_res
def __init__(self, rules_file, replay_mod=''):
self.rules = []
diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py
index 8c91989e..f3376fc2 100644
--- a/pywb/rewrite/html_rewriter.py
+++ b/pywb/rewrite/html_rewriter.py
@@ -11,7 +11,7 @@ from six.moves.urllib.parse import urljoin, urlsplit, urlunsplit
from pywb.rewrite.url_rewriter import UrlRewriter
from pywb.rewrite.regex_rewriters import JSRewriter, CSSRewriter
-from pywb.rewrite.content_rewriter import StreamingRewriter
+from pywb.rewrite.content_rewriter import StreamingRewriter, BaseContentRewriter
from six import text_type
@@ -20,9 +20,16 @@ import six.moves.html_parser
try:
orig_unescape = six.moves.html_parser.unescape
six.moves.html_parser.unescape = lambda x: x
+ BaseContentRewriter.set_unescape(orig_unescape)
except:
orig_unescape = None
+ @staticmethod
+ def __unescape(x):
+ return HTMLParser().unescape(x)
+
+ BaseContentRewriter.set_unescape(__unescape)
+
try:
import _markupbase as markupbase
diff --git a/pywb/static/wombatProxyMode.js b/pywb/static/wombatProxyMode.js
index 6557ce49..2ee4f844 100644
--- a/pywb/static/wombatProxyMode.js
+++ b/pywb/static/wombatProxyMode.js
@@ -16,4 +16,4 @@ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with pywb. If not, see .
*/
-(function(){function autobind(clazz){for(var prop,propValue,proto=clazz.__proto__||clazz.constructor.prototype||clazz.prototype,clazzProps=Object.getOwnPropertyNames(proto),len=clazzProps.length,i=0;i source[srcset], picture > source[data-srcset], picture > source[data-src], video > source[srcset], video > source[data-srcset], video > source[data-src], audio > source[srcset], audio > source[data-srcset], audio > source[data-src]",autobind(this),this._init(config,true)):new AutoFetcherProxyMode(wombat,config)}function WombatLite($wbwindow,wbinfo){return this instanceof WombatLite?void(this.wb_info=wbinfo,this.$wbwindow=$wbwindow,this.wb_info.top_host=this.wb_info.top_host||"*",this.wb_info.wombat_opts=this.wb_info.wombat_opts||{},this.WBAutoFetchWorker=null,this.historyCB=null):new WombatLite($wbwindow,wbinfo)}AutoFetcherProxyMode.prototype._init=function(config,first){var afwpm=this,wombat=this.wombat;if(document.readyState==="complete")return this.styleTag=document.createElement("style"),this.styleTag.id="$wrStyleParser$",document.head.appendChild(this.styleTag),void(config.isTop?fetch(config.workerURL).then(function(res){res.text().then(function(text){var blob=new Blob([text],{type:"text/javascript"});afwpm.worker=new wombat.$wbwindow.Worker(URL.createObjectURL(blob),{type:"classic",credentials:"include"}),afwpm.startChecking()}).catch(error=>{console.error("Could not create the backing worker for AutoFetchWorkerProxyMode"),console.error(error)})}):(this.worker={postMessage:function(msg){msg.wb_type||(msg={wb_type:"aaworker",msg:msg}),wombat.$wbwindow.top.postMessage(msg,"*")},terminate:function(){}},this.startChecking()));if(first)var i=setInterval(function(){document.readyState==="complete"&&(afwpm._init(config),clearInterval(i))},1e3)},AutoFetcherProxyMode.prototype.startChecking=function(){for(;this.worker&&this.msgQ.length;)this.postMessage(this.msgQ.shift());this.extractFromLocalDoc(),this.mutationObz=new MutationObserver(this.mutationCB),this.mutationObz.observe(document.documentElement,{characterData:false,characterDataOldValue:false,attributes:true,attributeOldValue:true,subtree:true,childList:true,attributeFilter:["src","srcset"]})},AutoFetcherProxyMode.prototype.terminate=function(){this.worker&&this.worker.terminate()},AutoFetcherProxyMode.prototype.justFetch=function(urls){this.postMessage({type:"fetch-all",values:urls})},AutoFetcherProxyMode.prototype.fetchAsPage=function(url,title){if(url){var headers={"X-Wombat-History-Page":url};if(title){var encodedTitle=encodeURIComponent(title.trim());title&&(headers["X-Wombat-History-Title"]=encodedTitle)}var fetchData={url:url,options:{headers:headers}};this.justFetch([fetchData])}},AutoFetcherProxyMode.prototype.postMessage=function(msg){this.worker?this.worker.postMessage(msg):this.msgQ.push(msg)},AutoFetcherProxyMode.prototype.handleMutatedStyleElem=function(elem,accum,text){var checkNode,baseURI=document.baseURI;if(text){if(!elem.parentNode||elem.parentNode.localName!=="style")return;checkNode=elem.parentNode}else checkNode=elem;try{var extractedMedia=this.extractMediaRules(checkNode.sheet,baseURI);if(extractedMedia.length)return void(accum.media=accum.media.concat(extractedMedia))}catch(e){}!text&&checkNode.href&&accum.deferred.push(this.fetchCSSAndExtract(checkNode.href))},AutoFetcherProxyMode.prototype.handleMutatedElem=function(elem,accum){var baseURI=document.baseURI;if(elem.nodeType===Node.TEXT_NODE)return this.handleMutatedStyleElem(elem,accum,true);switch(elem.localName){case"img":case"video":case"audio":case"source":return this.handleDomElement(elem,baseURI,accum);case"style":return this.handleMutatedStyleElem(elem,accum);case"link":if(elem.rel==="stylesheet"||elem.rel==="preload"&&elem.as==="style")return this.handleMutatedStyleElem(elem,accum);}return this.extractSrcSrcsetFrom(elem,baseURI,accum)},AutoFetcherProxyMode.prototype.mutationCB=function(mutationList,observer){for(var accum={type:"values",srcset:[],src:[],media:[],deferred:[]},i=0;i source[srcset], picture > source[data-srcset], picture > source[data-src], video > source[srcset], video > source[data-srcset], video > source[data-src], audio > source[srcset], audio > source[data-srcset], audio > source[data-src]",autobind(this),this._init(config,true)):new AutoFetcherProxyMode(wombat,config)}function WombatLite($wbwindow,wbinfo){return this instanceof WombatLite?void(this.wb_info=wbinfo,this.$wbwindow=$wbwindow,this.wb_info.top_host=this.wb_info.top_host||"*",this.wb_info.wombat_opts=this.wb_info.wombat_opts||{},this.WBAutoFetchWorker=null,this.historyCB=null):new WombatLite($wbwindow,wbinfo)}AutoFetcherProxyMode.prototype._init=function(config,first){var afwpm=this,wombat=this.wombat;if(document.readyState==="complete")return this.styleTag=document.createElement("style"),this.styleTag.id="$wrStyleParser$",document.head.appendChild(this.styleTag),void(config.isTop?fetch(config.workerURL).then(function(res){res.text().then(function(text){var blob=new Blob([text],{type:"text/javascript"});afwpm.worker=new wombat.$wbwindow.Worker(URL.createObjectURL(blob),{type:"classic",credentials:"include"}),afwpm.startChecking()}).catch(error=>{console.error("Could not create the backing worker for AutoFetchWorkerProxyMode"),console.error(error)})}):(this.worker={postMessage:function(msg){msg.wb_type||(msg={wb_type:"aaworker",msg:msg}),wombat.$wbwindow.top.postMessage(msg,"*")},terminate:function(){}},this.startChecking()));if(first)var i=setInterval(function(){document.readyState==="complete"&&(afwpm._init(config),clearInterval(i))},1e3)},AutoFetcherProxyMode.prototype.startChecking=function(){for(;this.worker&&this.msgQ.length;)this.postMessage(this.msgQ.shift());this.extractFromLocalDoc(),this.mutationObz=new MutationObserver(this.mutationCB),this.mutationObz.observe(document.documentElement,{characterData:false,characterDataOldValue:false,attributes:true,attributeOldValue:true,subtree:true,childList:true,attributeFilter:["src","srcset"]})},AutoFetcherProxyMode.prototype.terminate=function(){this.worker&&this.worker.terminate()},AutoFetcherProxyMode.prototype.justFetch=function(urls){this.postMessage({type:"fetch-all",values:urls})},AutoFetcherProxyMode.prototype.fetchAsPage=function(url,title){if(url){var headers={"X-Wombat-History-Page":url};if(title){var encodedTitle=encodeURIComponent(title.trim());title&&(headers["X-Wombat-History-Title"]=encodedTitle)}var fetchData={url:url,options:{headers:headers}};this.justFetch([fetchData])}},AutoFetcherProxyMode.prototype.postMessage=function(msg){this.worker?this.worker.postMessage(msg):this.msgQ.push(msg)},AutoFetcherProxyMode.prototype.handleMutatedStyleElem=function(elem,accum,text){var checkNode,baseURI=document.baseURI;if(text){if(!elem.parentNode||elem.parentNode.localName!=="style")return;checkNode=elem.parentNode}else checkNode=elem;try{var extractedMedia=this.extractMediaRules(checkNode.sheet,baseURI);if(extractedMedia.length)return void(accum.media=accum.media.concat(extractedMedia))}catch(e){}!text&&checkNode.href&&accum.deferred.push(this.fetchCSSAndExtract(checkNode.href))},AutoFetcherProxyMode.prototype.handleMutatedElem=function(elem,accum){var baseURI=document.baseURI;if(elem.nodeType===Node.TEXT_NODE)return this.handleMutatedStyleElem(elem,accum,true);switch(elem.localName){case"img":case"video":case"audio":case"source":return this.handleDomElement(elem,baseURI,accum);case"style":return this.handleMutatedStyleElem(elem,accum);case"link":if(elem.rel==="stylesheet"||elem.rel==="preload"&&elem.as==="style")return this.handleMutatedStyleElem(elem,accum);}return this.extractSrcSrcsetFrom(elem,baseURI,accum)},AutoFetcherProxyMode.prototype.mutationCB=function(mutationList,observer){for(var accum={type:"values",srcset:[],src:[],media:[],deferred:[]},i=0;iTest'Title