diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py index 4ec3e530..2c5a18b1 100644 --- a/pywb/rewrite/html_rewriter.py +++ b/pywb/rewrite/html_rewriter.py @@ -169,7 +169,9 @@ class HTMLRewriterMixin(object): else: # special case: base tag if (tag == 'base') and (attr_name == 'href') and attr_value: - self.url_rewriter.set_base_url(attr_value) + #self.url_rewriter.set_base_url(attr_value) + self.url_rewriter = (self.url_rewriter. + rebase_rewriter(attr_value)) rw_mod = handler.get(attr_name) if rw_mod is not None: diff --git a/pywb/rewrite/lxml_parser.py b/pywb/rewrite/lxml_parser.py index 6c9e13c9..4956c872 100644 --- a/pywb/rewrite/lxml_parser.py +++ b/pywb/rewrite/lxml_parser.py @@ -1,18 +1,18 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import lxml.html +#import lxml.html import lxml.etree import cgi +import re from regex_rewriters import JSRewriter, CSSRewriter from url_rewriter import UrlRewriter from html_rewriter import HTMLRewriterMixin -from StringIO import StringIO class LXMLHTMLRewriter(HTMLRewriterMixin): - r""" + ur""" >>> parse('Text')
Text @@ -45,7 +45,7 @@ class LXMLHTMLRewriter(HTMLRewriterMixin): # Unicode - #>>> parse('испытание') + >>> parse('испытание') испытание # Meta tag @@ -106,6 +106,8 @@ class LXMLHTMLRewriter(HTMLRewriterMixin):