From da0623fbbb378e761421ceb4a05dae31407b3657 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 26 Mar 2014 14:05:02 -0700 Subject: [PATCH] lxml: ensure lxml support is optional: if not available, use_lxml_parser() will return false and doctests/pytest collection won't test the lxml parser --- pywb/rewrite/lxml_html_rewriter.py | 8 +++++++- pywb/rewrite/rewriterules.py | 13 ++++++++----- pywb/rewrite/test/test_lxml_html_rewriter.py | 17 +++++++++++------ 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/pywb/rewrite/lxml_html_rewriter.py b/pywb/rewrite/lxml_html_rewriter.py index ef809cf8..3a8e05dd 100644 --- a/pywb/rewrite/lxml_html_rewriter.py +++ b/pywb/rewrite/lxml_html_rewriter.py @@ -1,4 +1,10 @@ -import lxml.etree +try: + import lxml.etree + LXML_SUPPORTED = True +except ImportError: + LXML_SUPPORTED = False + pass + import cgi import re diff --git a/pywb/rewrite/rewriterules.py b/pywb/rewrite/rewriterules.py index 007e925a..083bf129 100644 --- a/pywb/rewrite/rewriterules.py +++ b/pywb/rewrite/rewriterules.py @@ -13,14 +13,17 @@ HTML = HTMLRewriter #================================================================= def use_lxml_parser(): - try: - import logging - from lxml_html_rewriter import LXMLHTMLRewriter + import logging + from lxml_html_rewriter import LXMLHTMLRewriter, LXML_SUPPORTED + + if LXML_SUPPORTED: global HTML HTML = LXMLHTMLRewriter logging.debug('Using LXML Parser') - except ImportError: - logging.debug('Error Loading LXML Parser') + return True + else: + logging.debug('LXML Parser not available') + return False #================================================================= diff --git a/pywb/rewrite/test/test_lxml_html_rewriter.py b/pywb/rewrite/test/test_lxml_html_rewriter.py index 2d6df8f7..6beb7c7c 100644 --- a/pywb/rewrite/test/test_lxml_html_rewriter.py +++ b/pywb/rewrite/test/test_lxml_html_rewriter.py @@ -114,10 +114,7 @@ ur""" from pywb.rewrite.url_rewriter import UrlRewriter -try: - from pywb.rewrite.lxml_html_rewriter import LXMLHTMLRewriter -except ImportError: - pass +from pywb.rewrite.lxml_html_rewriter import LXMLHTMLRewriter, LXML_SUPPORTED urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/web/') @@ -127,5 +124,13 @@ def parse(data, head_insert=None): print parser.rewrite(data) + parser.close() if __name__ == "__main__": - import doctest - doctest.testmod() + if LXML_SUPPORTED: + import doctest + doctest.testmod() +else: + # skip if not supported and lxml not available + if not LXML_SUPPORTED: + import pytest + lxml = pytest.importorskip('lxml.etree') + +