1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

tests: add parse_comment test for html_rewriter

This commit is contained in:
Ilya Kreymer 2016-03-10 10:10:51 -08:00
parent 8ae692d630
commit effd618bb3

View File

@ -159,6 +159,14 @@ ur"""
>>> parse('<!DOCTYPE html>Some Text without any tags <!-- comments -->', head_insert = '<script>load_stuff();</script>')
<!DOCTYPE html>Some Text without any tags <!-- comments --><script>load_stuff();</script>
# no parse comments
>>> parse('<html><!-- <a href="/foo.html"> --></html>')
<html><!-- <a href="/foo.html"> --></html>
# with parse comments
>>> parse('<html><!-- <a href="/foo.html"> --></html>', parse_comments=True)
<html><!-- <a href="/web/20131226101010/http://example.com/foo.html"> --></html>
# rel=canonical: rewrite (default)
>>> parse('<link rel=canonical href="http://example.com/">')
<link rel="canonical" href="/web/20131226101010oe_/http://example.com/">
@ -236,8 +244,10 @@ urlrewriter_pencode = new_rewriter(rewrite_opts=dict(punycode_links=True))
no_base_canon_rewriter = new_rewriter(rewrite_opts=dict(rewrite_rel_canon=False,
rewrite_base=False))
def parse(data, head_insert=None, urlrewriter=urlrewriter):
parser = HTMLRewriter(urlrewriter, head_insert = head_insert, url = ORIGINAL_URL)
def parse(data, head_insert=None, urlrewriter=urlrewriter, parse_comments=False):
parser = HTMLRewriter(urlrewriter, head_insert=head_insert,
url=ORIGINAL_URL,
parse_comments=parse_comments)
if isinstance(data, unicode):
data = data.encode('utf-8')