diff --git a/pywb/framework/wbrequestresponse.py b/pywb/framework/wbrequestresponse.py index 8cbabc49..808563ea 100644 --- a/pywb/framework/wbrequestresponse.py +++ b/pywb/framework/wbrequestresponse.py @@ -78,6 +78,8 @@ class WbRequest(object): rel_prefix, env.get('SCRIPT_NAME', '/'), cookie_scope) + + self.urlrewriter.deprefix_url() else: # no wb_url, just store blank wb_url self.wb_url = None diff --git a/pywb/rewrite/test/test_url_rewriter.py b/pywb/rewrite/test/test_url_rewriter.py index 7d3d1e41..be0ca7da 100644 --- a/pywb/rewrite/test/test_url_rewriter.py +++ b/pywb/rewrite/test/test_url_rewriter.py @@ -74,6 +74,18 @@ >>> UrlRewriter('2013id_/example.com/file/path/blah.html', '/123/').get_new_url(timestamp='20131024') '/123/20131024id_/http://example.com/file/path/blah.html' +# deprefix tests +>>> do_deprefix('2013id_/http://example.com/file/path/blah.html?param=http://localhost:8080/pywb/20141226/http://example.com/', '/pywb/', 'http://localhost:8080/pywb/') +'http://example.com/file/path/blah.html?param=http://example.com/' + +>>> do_deprefix('2013id_/http://example.com/file/path/blah.html?param=http://localhost:8080/pywb/if_/https://example.com/filename.html', '/pywb/', 'http://localhost:8080/pywb/') +'http://example.com/file/path/blah.html?param=https://example.com/filename.html' + +>>> do_deprefix('2013id_/http://example.com/file/path/blah.html?param=http://localhost:8080/pywb/https://example.com/filename.html', '/pywb/', 'http://localhost:8080/pywb/') +'http://example.com/file/path/blah.html?param=https://example.com/filename.html' + +>>> do_deprefix('http://example.com/file.html?param=http://localhost:8080/pywb/https%3A//example.com/filename.html&other=value&a=b¶m2=http://localhost:8080/pywb/http://test.example.com', '/pywb/', 'http://localhost:8080/pywb/') +'http://example.com/file.html?param=https://example.com/filename.html&other=value&a=b¶m2=http://test.example.com' # HttpsUrlRewriter tests >>> HttpsUrlRewriter('http://example.com/', None).rewrite('https://example.com/abc') @@ -86,13 +98,22 @@ from pywb.rewrite.url_rewriter import UrlRewriter, HttpsUrlRewriter - +import urllib def do_rewrite(rel_url, base_url, prefix, mod=None, full_prefix=None): rewriter = UrlRewriter(base_url, prefix, full_prefix=full_prefix) return rewriter.rewrite(rel_url, mod) +def do_deprefix(url, rel_prefix, full_prefix): + encoded = urllib.quote_plus(full_prefix) + url = url.replace(full_prefix, encoded) + + rewriter = UrlRewriter(url, rel_prefix, full_prefix) + url = rewriter.deprefix_url() + return urllib.unquote_plus(url) + + if __name__ == "__main__": import doctest doctest.testmod() diff --git a/pywb/rewrite/url_rewriter.py b/pywb/rewrite/url_rewriter.py index 5fc3a18e..61a48e50 100644 --- a/pywb/rewrite/url_rewriter.py +++ b/pywb/rewrite/url_rewriter.py @@ -1,4 +1,3 @@ -import copy import urlparse from wburl import WbUrl @@ -88,6 +87,9 @@ class UrlRewriter(object): cls = get_cookie_rewriter(scope) return cls(self) + def deprefix_url(self): + return self.wburl.deprefix_url(self.full_prefix) + def __repr__(self): return "UrlRewriter('{0}', '{1}')".format(self.wburl, self.prefix) @@ -150,3 +152,6 @@ class HttpsUrlRewriter(UrlRewriter): def get_cookie_rewriter(self, scope=None): return None + + def deprefix_url(self): + return self.wburl.url diff --git a/pywb/rewrite/wburl.py b/pywb/rewrite/wburl.py index de789ac0..91d36455 100644 --- a/pywb/rewrite/wburl.py +++ b/pywb/rewrite/wburl.py @@ -39,7 +39,7 @@ wayback url format. """ import re - +import urllib #================================================================= class BaseWbUrl(object): @@ -149,6 +149,14 @@ class WbUrl(BaseWbUrl): self.timestamp = timestamp self.type = self.REPLAY + + def deprefix_url(self, prefix): + prefix = urllib.quote_plus(prefix) + rex_query = '=' + re.escape(prefix) + '([0-9])*([\w]{2}_)?/?' + new_url = re.sub(rex_query, '=', self.url) + self.url = new_url + return self.url + # Str Representation # ==================== def to_str(self, **overrides):