diff --git a/pywb/rewrite/lxml_html_rewriter.py b/pywb/rewrite/lxml_html_rewriter.py
new file mode 100644
index 00000000..b245d055
--- /dev/null
+++ b/pywb/rewrite/lxml_html_rewriter.py
@@ -0,0 +1,94 @@
+import lxml.etree
+import cgi
+import re
+
+from regex_rewriters import JSRewriter, CSSRewriter
+from url_rewriter import UrlRewriter
+from html_rewriter import HTMLRewriterMixin
+
+
+#=================================================================
+class LXMLHTMLRewriter(HTMLRewriterMixin):
+ END_HTML = re.compile(r'\s*html\s*>', re.IGNORECASE)
+
+ def __init__(self, url_rewriter,
+ head_insert=None,
+ js_rewriter_class=JSRewriter,
+ css_rewriter_class=CSSRewriter):
+
+ super(LXMLHTMLRewriter, self).__init__(url_rewriter,
+ head_insert,
+ js_rewriter_class,
+ css_rewriter_class)
+
+ self.target = RewriterTarget(self)
+ self.parser = lxml.etree.HTMLParser(remove_pis=False,
+ remove_blank_text=False,
+ remove_comments=False,
+ strip_cdata=False,
+ compact=True,
+ target=self.target,
+ recover=True,
+ )
+
+ def feed(self, string):
+ string = self.END_HTML.sub(u'', string)
+ #string = string.replace(u'