diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py
index 7c9020be..50629514 100644
--- a/pywb/rewrite/html_rewriter.py
+++ b/pywb/rewrite/html_rewriter.py
@@ -5,7 +5,7 @@ import sys
import re
from HTMLParser import HTMLParser, HTMLParseError
-from urlparse import urlsplit, urlunsplit
+from urlparse import urljoin, urlsplit, urlunsplit
from url_rewriter import UrlRewriter
from regex_rewriters import JSRewriter, CSSRewriter
@@ -276,9 +276,18 @@ class HTMLRewriterMixin(object):
# special case: if rewrite_canon not set,
# don't rewrite rel=canonical
elif tag == 'link' and attr_name == 'href':
- if (self.opts.get('rewrite_rel_canon', True) or
- not self.has_attr(tag_attrs, ('rel', 'canonical'))):
- rw_mod = handler.get(attr_name)
+ rw_mod = handler.get(attr_name)
+
+ if self.has_attr(tag_attrs, ('rel', 'canonical')):
+ if self.opts.get('rewrite_rel_canon', True):
+ attr_value = self._rewrite_url(attr_value, rw_mod)
+ else:
+ # resolve relative rel=canonical URLs so that they
+ # refer to the same absolute URL as on the original
+ # page (see https://github.com/hypothesis/via/issues/65
+ # for context)
+ attr_value = urljoin(self.orig_url, attr_value)
+ else:
attr_value = self._rewrite_url(attr_value, rw_mod)
# special case: meta tag
diff --git a/pywb/rewrite/test/test_html_rewriter.py b/pywb/rewrite/test/test_html_rewriter.py
index 820124f0..0ceface3 100644
--- a/pywb/rewrite/test/test_html_rewriter.py
+++ b/pywb/rewrite/test/test_html_rewriter.py
@@ -158,8 +158,12 @@ ur"""
# rel=canonical: no_rewrite
->>> parse('', urlrewriter=no_base_canon_rewriter)
-
+>>> parse('', urlrewriter=no_base_canon_rewriter)
+
+
+# rel=canonical: no_rewrite
+>>> parse('', urlrewriter=no_base_canon_rewriter)
+
# doctype
>>> parse('')
@@ -210,26 +214,24 @@ from pywb.rewrite.html_rewriter import HTMLRewriter
import pprint
import urllib
-urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html',
- '/web/',
- rewrite_opts=dict(punycode_links=False))
+ORIGINAL_URL = 'http://example.com/some/path/index.html'
-full_path_urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html',
- 'http://localhost:80/web/',
- rewrite_opts=dict(punycode_links=False))
+def new_rewriter(prefix='/web/', rewrite_opts=dict()):
+ PROXY_PATH = '20131226101010/{0}'.format(ORIGINAL_URL)
+ return UrlRewriter(PROXY_PATH, prefix, rewrite_opts=rewrite_opts)
-urlrewriter_pencode = UrlRewriter('20131226101010/http://example.com/some/path/index.html',
- '/web/',
- rewrite_opts=dict(punycode_links=True))
+urlrewriter = new_rewriter(rewrite_opts=dict(punycode_links=False))
+full_path_urlrewriter = new_rewriter(prefix='http://localhost:80/web/',
+ rewrite_opts=dict(punycode_links=False))
-no_base_canon_rewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html',
- '/web/',
- rewrite_opts=dict(rewrite_rel_canon=False,
- rewrite_base=False))
+urlrewriter_pencode = new_rewriter(rewrite_opts=dict(punycode_links=True))
+
+no_base_canon_rewriter = new_rewriter(rewrite_opts=dict(rewrite_rel_canon=False,
+ rewrite_base=False))
def parse(data, head_insert=None, urlrewriter=urlrewriter):
- parser = HTMLRewriter(urlrewriter, head_insert = head_insert)
+ parser = HTMLRewriter(urlrewriter, head_insert = head_insert, url = ORIGINAL_URL)
if isinstance(data, unicode):
data = data.encode('utf-8')