mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
tests: add parse_comment test for html_rewriter
This commit is contained in:
parent
8ae692d630
commit
effd618bb3
@ -159,6 +159,14 @@ ur"""
|
||||
>>> parse('<!DOCTYPE html>Some Text without any tags <!-- comments -->', head_insert = '<script>load_stuff();</script>')
|
||||
<!DOCTYPE html>Some Text without any tags <!-- comments --><script>load_stuff();</script>
|
||||
|
||||
# no parse comments
|
||||
>>> parse('<html><!-- <a href="/foo.html"> --></html>')
|
||||
<html><!-- <a href="/foo.html"> --></html>
|
||||
|
||||
# with parse comments
|
||||
>>> parse('<html><!-- <a href="/foo.html"> --></html>', parse_comments=True)
|
||||
<html><!-- <a href="/web/20131226101010/http://example.com/foo.html"> --></html>
|
||||
|
||||
# rel=canonical: rewrite (default)
|
||||
>>> parse('<link rel=canonical href="http://example.com/">')
|
||||
<link rel="canonical" href="/web/20131226101010oe_/http://example.com/">
|
||||
@ -236,8 +244,10 @@ urlrewriter_pencode = new_rewriter(rewrite_opts=dict(punycode_links=True))
|
||||
no_base_canon_rewriter = new_rewriter(rewrite_opts=dict(rewrite_rel_canon=False,
|
||||
rewrite_base=False))
|
||||
|
||||
def parse(data, head_insert=None, urlrewriter=urlrewriter):
|
||||
parser = HTMLRewriter(urlrewriter, head_insert = head_insert, url = ORIGINAL_URL)
|
||||
def parse(data, head_insert=None, urlrewriter=urlrewriter, parse_comments=False):
|
||||
parser = HTMLRewriter(urlrewriter, head_insert=head_insert,
|
||||
url=ORIGINAL_URL,
|
||||
parse_comments=parse_comments)
|
||||
|
||||
if isinstance(data, unicode):
|
||||
data = data.encode('utf-8')
|
||||
|
Loading…
x
Reference in New Issue
Block a user