tests: add parse_comment test for html_rewriter

2025-03-15 00:03:28 +01:00 · 2016-03-10 10:10:51 -08:00 · 2016-03-10 10:10:51 -08:00 · effd618bb3
commit effd618bb3
parent 8ae692d630
1 changed files with 12 additions and 2 deletions
--- a/pywb/rewrite/test/test_html_rewriter.py
+++ b/pywb/rewrite/test/test_html_rewriter.py
@ -159,6 +159,14 @@ ur"""
 >>> parse('<!DOCTYPE html>Some Text without any tags <!-- comments -->', head_insert = '<script>load_stuff();</script>')
 <!DOCTYPE html>Some Text without any tags <!-- comments --><script>load_stuff();</script>

+# no parse comments
+>>> parse('<html><!-- <a href="/foo.html"> --></html>')
+<html><!-- <a href="/foo.html"> --></html>
+
+# with parse comments
+>>> parse('<html><!-- <a href="/foo.html"> --></html>', parse_comments=True)
+<html><!-- <a href="/web/20131226101010/http://example.com/foo.html"> --></html>
+
 # rel=canonical: rewrite (default)
 >>> parse('<link rel=canonical href="http://example.com/">')
 <link rel="canonical" href="/web/20131226101010oe_/http://example.com/">
@ -236,8 +244,10 @@ urlrewriter_pencode = new_rewriter(rewrite_opts=dict(punycode_links=True))
 no_base_canon_rewriter = new_rewriter(rewrite_opts=dict(rewrite_rel_canon=False,
                                                        rewrite_base=False))

-def parse(data, head_insert=None, urlrewriter=urlrewriter):
-    parser = HTMLRewriter(urlrewriter, head_insert = head_insert, url = ORIGINAL_URL)
+def parse(data, head_insert=None, urlrewriter=urlrewriter, parse_comments=False):
+    parser = HTMLRewriter(urlrewriter, head_insert=head_insert,
+                          url=ORIGINAL_URL,
+                          parse_comments=parse_comments)

    if isinstance(data, unicode):
        data = data.encode('utf-8')