diff --git a/.travis.yml b/.travis.yml
index 5d6a7359..65fd91ab 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -36,6 +36,9 @@ after_success:
- codecov
matrix:
+ allow_failures:
+ - env: WR_TEST=yes
+
exclude:
- env: WR_TEST=yes
python: "2.7"
diff --git a/pywb/rewrite/content_rewriter.py b/pywb/rewrite/content_rewriter.py
index 6887959e..8c98f2fa 100644
--- a/pywb/rewrite/content_rewriter.py
+++ b/pywb/rewrite/content_rewriter.py
@@ -347,7 +347,7 @@ class StreamingRewriter(object):
# ============================================================================
class RewriteInfo(object):
- TAG_REGEX = re.compile(b'^\s*\<')
+ TAG_REGEX = re.compile(b'^(\xef\xbb\xbf)?\s*\<')
TAG_REGEX2 = re.compile(b'^.*<\w+[\s>]')
JSON_REGEX = re.compile(b'^\s*[{[][{"]') # if it starts with this then highly likely not HTML
diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py
index 41be3364..91dbf986 100644
--- a/pywb/rewrite/html_rewriter.py
+++ b/pywb/rewrite/html_rewriter.py
@@ -13,6 +13,8 @@ from pywb.rewrite.regex_rewriters import JSRewriter, CSSRewriter
from pywb.rewrite.content_rewriter import StreamingRewriter
+from six import text_type
+
import six.moves.html_parser
try:
@@ -21,7 +23,15 @@ try:
except:
orig_unescape = None
-from six import text_type
+
+try:
+ import _markupbase as markupbase
+except:
+ import markupbase as markupbase
+
+# ensure invalid cond ending ']-->' closing decl
+# is treated same as ']>'
+markupbase._msmarkedsectionclose = re.compile(r']\s*-{0,2}>')
#=================================================================
diff --git a/pywb/rewrite/test/test_html_rewriter.py b/pywb/rewrite/test/test_html_rewriter.py
index 371867a8..74bc1a99 100644
--- a/pywb/rewrite/test/test_html_rewriter.py
+++ b/pywb/rewrite/test/test_html_rewriter.py
@@ -286,6 +286,10 @@ r"""
>>> parse('Some Text without any tags ', head_insert = '')
Some Text without any tags
+# UTF-8 BOM
+>>> parse('\ufeffSome Text without any tags ', head_insert = '')
+\ufeffSome Text without any tags
+
# no parse comments
>>> parse('')
@@ -395,6 +399,13 @@ r"""
>>> parse('', js_proxy=True)
+# IE conditional
+>>> parse('