mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
tests: add parse_comment test for html_rewriter
This commit is contained in:
parent
8ae692d630
commit
effd618bb3
@ -159,6 +159,14 @@ ur"""
|
|||||||
>>> parse('<!DOCTYPE html>Some Text without any tags <!-- comments -->', head_insert = '<script>load_stuff();</script>')
|
>>> parse('<!DOCTYPE html>Some Text without any tags <!-- comments -->', head_insert = '<script>load_stuff();</script>')
|
||||||
<!DOCTYPE html>Some Text without any tags <!-- comments --><script>load_stuff();</script>
|
<!DOCTYPE html>Some Text without any tags <!-- comments --><script>load_stuff();</script>
|
||||||
|
|
||||||
|
# no parse comments
|
||||||
|
>>> parse('<html><!-- <a href="/foo.html"> --></html>')
|
||||||
|
<html><!-- <a href="/foo.html"> --></html>
|
||||||
|
|
||||||
|
# with parse comments
|
||||||
|
>>> parse('<html><!-- <a href="/foo.html"> --></html>', parse_comments=True)
|
||||||
|
<html><!-- <a href="/web/20131226101010/http://example.com/foo.html"> --></html>
|
||||||
|
|
||||||
# rel=canonical: rewrite (default)
|
# rel=canonical: rewrite (default)
|
||||||
>>> parse('<link rel=canonical href="http://example.com/">')
|
>>> parse('<link rel=canonical href="http://example.com/">')
|
||||||
<link rel="canonical" href="/web/20131226101010oe_/http://example.com/">
|
<link rel="canonical" href="/web/20131226101010oe_/http://example.com/">
|
||||||
@ -236,8 +244,10 @@ urlrewriter_pencode = new_rewriter(rewrite_opts=dict(punycode_links=True))
|
|||||||
no_base_canon_rewriter = new_rewriter(rewrite_opts=dict(rewrite_rel_canon=False,
|
no_base_canon_rewriter = new_rewriter(rewrite_opts=dict(rewrite_rel_canon=False,
|
||||||
rewrite_base=False))
|
rewrite_base=False))
|
||||||
|
|
||||||
def parse(data, head_insert=None, urlrewriter=urlrewriter):
|
def parse(data, head_insert=None, urlrewriter=urlrewriter, parse_comments=False):
|
||||||
parser = HTMLRewriter(urlrewriter, head_insert = head_insert, url = ORIGINAL_URL)
|
parser = HTMLRewriter(urlrewriter, head_insert=head_insert,
|
||||||
|
url=ORIGINAL_URL,
|
||||||
|
parse_comments=parse_comments)
|
||||||
|
|
||||||
if isinstance(data, unicode):
|
if isinstance(data, unicode):
|
||||||
data = data.encode('utf-8')
|
data = data.encode('utf-8')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user