mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
rewrite: add 'deprefix' support to remove wburl prefix from any query
params
This commit is contained in:
parent
037cf35eb8
commit
c9273ee5ed
@ -78,6 +78,8 @@ class WbRequest(object):
|
||||
rel_prefix,
|
||||
env.get('SCRIPT_NAME', '/'),
|
||||
cookie_scope)
|
||||
|
||||
self.urlrewriter.deprefix_url()
|
||||
else:
|
||||
# no wb_url, just store blank wb_url
|
||||
self.wb_url = None
|
||||
|
@ -74,6 +74,18 @@
|
||||
>>> UrlRewriter('2013id_/example.com/file/path/blah.html', '/123/').get_new_url(timestamp='20131024')
|
||||
'/123/20131024id_/http://example.com/file/path/blah.html'
|
||||
|
||||
# deprefix tests
|
||||
>>> do_deprefix('2013id_/http://example.com/file/path/blah.html?param=http://localhost:8080/pywb/20141226/http://example.com/', '/pywb/', 'http://localhost:8080/pywb/')
|
||||
'http://example.com/file/path/blah.html?param=http://example.com/'
|
||||
|
||||
>>> do_deprefix('2013id_/http://example.com/file/path/blah.html?param=http://localhost:8080/pywb/if_/https://example.com/filename.html', '/pywb/', 'http://localhost:8080/pywb/')
|
||||
'http://example.com/file/path/blah.html?param=https://example.com/filename.html'
|
||||
|
||||
>>> do_deprefix('2013id_/http://example.com/file/path/blah.html?param=http://localhost:8080/pywb/https://example.com/filename.html', '/pywb/', 'http://localhost:8080/pywb/')
|
||||
'http://example.com/file/path/blah.html?param=https://example.com/filename.html'
|
||||
|
||||
>>> do_deprefix('http://example.com/file.html?param=http://localhost:8080/pywb/https%3A//example.com/filename.html&other=value&a=b¶m2=http://localhost:8080/pywb/http://test.example.com', '/pywb/', 'http://localhost:8080/pywb/')
|
||||
'http://example.com/file.html?param=https://example.com/filename.html&other=value&a=b¶m2=http://test.example.com'
|
||||
|
||||
# HttpsUrlRewriter tests
|
||||
>>> HttpsUrlRewriter('http://example.com/', None).rewrite('https://example.com/abc')
|
||||
@ -86,13 +98,22 @@
|
||||
|
||||
|
||||
from pywb.rewrite.url_rewriter import UrlRewriter, HttpsUrlRewriter
|
||||
|
||||
import urllib
|
||||
|
||||
def do_rewrite(rel_url, base_url, prefix, mod=None, full_prefix=None):
|
||||
rewriter = UrlRewriter(base_url, prefix, full_prefix=full_prefix)
|
||||
return rewriter.rewrite(rel_url, mod)
|
||||
|
||||
|
||||
def do_deprefix(url, rel_prefix, full_prefix):
|
||||
encoded = urllib.quote_plus(full_prefix)
|
||||
url = url.replace(full_prefix, encoded)
|
||||
|
||||
rewriter = UrlRewriter(url, rel_prefix, full_prefix)
|
||||
url = rewriter.deprefix_url()
|
||||
return urllib.unquote_plus(url)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
|
@ -1,4 +1,3 @@
|
||||
import copy
|
||||
import urlparse
|
||||
|
||||
from wburl import WbUrl
|
||||
@ -88,6 +87,9 @@ class UrlRewriter(object):
|
||||
cls = get_cookie_rewriter(scope)
|
||||
return cls(self)
|
||||
|
||||
def deprefix_url(self):
|
||||
return self.wburl.deprefix_url(self.full_prefix)
|
||||
|
||||
def __repr__(self):
|
||||
return "UrlRewriter('{0}', '{1}')".format(self.wburl, self.prefix)
|
||||
|
||||
@ -150,3 +152,6 @@ class HttpsUrlRewriter(UrlRewriter):
|
||||
|
||||
def get_cookie_rewriter(self, scope=None):
|
||||
return None
|
||||
|
||||
def deprefix_url(self):
|
||||
return self.wburl.url
|
||||
|
@ -39,7 +39,7 @@ wayback url format.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
import urllib
|
||||
|
||||
#=================================================================
|
||||
class BaseWbUrl(object):
|
||||
@ -149,6 +149,14 @@ class WbUrl(BaseWbUrl):
|
||||
self.timestamp = timestamp
|
||||
self.type = self.REPLAY
|
||||
|
||||
|
||||
def deprefix_url(self, prefix):
|
||||
prefix = urllib.quote_plus(prefix)
|
||||
rex_query = '=' + re.escape(prefix) + '([0-9])*([\w]{2}_)?/?'
|
||||
new_url = re.sub(rex_query, '=', self.url)
|
||||
self.url = new_url
|
||||
return self.url
|
||||
|
||||
# Str Representation
|
||||
# ====================
|
||||
def to_str(self, **overrides):
|
||||
|
Loading…
x
Reference in New Issue
Block a user