mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Updated html_rewriter.py to correctly handle self-closing <script> elements: (#392)
- adding the 'xlink:href' attribute to script element attributes to rewrite Updated html_rewriter.py to better handle self closing tags: - added boolean set_parsing_context arg to _rewrite_tag_attrs to indicate if the parsing context is to be set - the call to _rewrite_tag_attrs from handle_startendtag now sets set_parsing_context to false Added a test to test_html_rewriter.py for rewriting SVGScriptElements
This commit is contained in:
parent
1c7badf117
commit
c28e38718c
@ -63,7 +63,7 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
'param': {'value': 'oe_'},
|
||||
'q': {'cite': defmod},
|
||||
'ref': {'href': 'oe_'},
|
||||
'script': {'src': 'js_'},
|
||||
'script': {'src': 'js_', 'xlink:href': 'js_'}, # covers both HTML and SVG script tags
|
||||
'source': {'src': 'oe_'},
|
||||
'video': {'src': 'oe_',
|
||||
'poster': 'im_'},
|
||||
@ -310,7 +310,22 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
|
||||
return None
|
||||
|
||||
def _rewrite_tag_attrs(self, tag, tag_attrs):
|
||||
def _rewrite_tag_attrs(self, tag, tag_attrs, set_parsing_context=True):
|
||||
"""Rewrite a tags attributes.
|
||||
|
||||
If set_parsing_context is false then the parsing context will not set.
|
||||
If the head insert has not been added to the HTML being rewritten, there
|
||||
is no parsing context and the tag is not in BEFORE_HEAD_TAGS then the
|
||||
head_insert will be "inserted" and set to None
|
||||
|
||||
:param str tag: The name of the tag to be rewritten
|
||||
:param list[tuple[str, str]] tag_attrs: A list of tuples representing
|
||||
the tags attributes
|
||||
:param bool set_parsing_context: Boolean indicating if the parsing
|
||||
context should be set
|
||||
:return: True
|
||||
:rtype: bool
|
||||
"""
|
||||
# special case: head insertion, before-head tags
|
||||
if (self.head_insert and
|
||||
not self._wb_parse_context
|
||||
@ -318,7 +333,8 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
self.out.write(self.head_insert)
|
||||
self.head_insert = None
|
||||
|
||||
self._set_parse_context(tag, tag_attrs)
|
||||
if set_parsing_context:
|
||||
self._set_parse_context(tag, tag_attrs)
|
||||
|
||||
# attr rewriting
|
||||
handler = self.rewrite_tags.get(tag)
|
||||
@ -604,7 +620,7 @@ class HTMLRewriter(HTMLRewriterMixin, HTMLParser):
|
||||
self.out.write('>')
|
||||
|
||||
def handle_startendtag(self, tag, attrs):
|
||||
self._rewrite_tag_attrs(tag, attrs)
|
||||
self._rewrite_tag_attrs(tag, attrs, False)
|
||||
|
||||
if tag != 'head' or not self._rewrite_head(True):
|
||||
self.out.write('/>')
|
||||
|
@ -223,6 +223,14 @@ r"""
|
||||
>>> parse('<script>window.location = "http://example.com/a/b/c.html"</sc>')
|
||||
<script>window.WB_wombat_location = "/web/20131226101010/http://example.com/a/b/c.html"</sc></script>
|
||||
|
||||
# SVG Script tag
|
||||
>>> parse('<script xlink:href="/js/scripts.js"/>')
|
||||
<script xlink:href="/web/20131226101010js_/http://example.com/js/scripts.js"/>
|
||||
|
||||
# SVG Script tag with other elements
|
||||
>>> parse('<svg><defs><script xlink:href="/js/scripts.js"/><defs/><title>I\'m a title tag in svg!</title></svg>')
|
||||
<svg><defs><script xlink:href="/web/20131226101010js_/http://example.com/js/scripts.js"/><defs/><title>I'm a title tag in svg!</title></svg>
|
||||
|
||||
>>> parse('<script>/*<![CDATA[*/window.location = "http://example.com/a/b/c.html;/*]]>*/"</script>')
|
||||
<script>/*<![CDATA[*/window.WB_wombat_location = "/web/20131226101010/http://example.com/a/b/c.html;/*]]>*/"</script>
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user