mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
rewrite: add 'deprefix' support to remove wburl prefix from any query
params
This commit is contained in:
parent
037cf35eb8
commit
c9273ee5ed
@ -78,6 +78,8 @@ class WbRequest(object):
|
|||||||
rel_prefix,
|
rel_prefix,
|
||||||
env.get('SCRIPT_NAME', '/'),
|
env.get('SCRIPT_NAME', '/'),
|
||||||
cookie_scope)
|
cookie_scope)
|
||||||
|
|
||||||
|
self.urlrewriter.deprefix_url()
|
||||||
else:
|
else:
|
||||||
# no wb_url, just store blank wb_url
|
# no wb_url, just store blank wb_url
|
||||||
self.wb_url = None
|
self.wb_url = None
|
||||||
|
@ -74,6 +74,18 @@
|
|||||||
>>> UrlRewriter('2013id_/example.com/file/path/blah.html', '/123/').get_new_url(timestamp='20131024')
|
>>> UrlRewriter('2013id_/example.com/file/path/blah.html', '/123/').get_new_url(timestamp='20131024')
|
||||||
'/123/20131024id_/http://example.com/file/path/blah.html'
|
'/123/20131024id_/http://example.com/file/path/blah.html'
|
||||||
|
|
||||||
|
# deprefix tests
|
||||||
|
>>> do_deprefix('2013id_/http://example.com/file/path/blah.html?param=http://localhost:8080/pywb/20141226/http://example.com/', '/pywb/', 'http://localhost:8080/pywb/')
|
||||||
|
'http://example.com/file/path/blah.html?param=http://example.com/'
|
||||||
|
|
||||||
|
>>> do_deprefix('2013id_/http://example.com/file/path/blah.html?param=http://localhost:8080/pywb/if_/https://example.com/filename.html', '/pywb/', 'http://localhost:8080/pywb/')
|
||||||
|
'http://example.com/file/path/blah.html?param=https://example.com/filename.html'
|
||||||
|
|
||||||
|
>>> do_deprefix('2013id_/http://example.com/file/path/blah.html?param=http://localhost:8080/pywb/https://example.com/filename.html', '/pywb/', 'http://localhost:8080/pywb/')
|
||||||
|
'http://example.com/file/path/blah.html?param=https://example.com/filename.html'
|
||||||
|
|
||||||
|
>>> do_deprefix('http://example.com/file.html?param=http://localhost:8080/pywb/https%3A//example.com/filename.html&other=value&a=b¶m2=http://localhost:8080/pywb/http://test.example.com', '/pywb/', 'http://localhost:8080/pywb/')
|
||||||
|
'http://example.com/file.html?param=https://example.com/filename.html&other=value&a=b¶m2=http://test.example.com'
|
||||||
|
|
||||||
# HttpsUrlRewriter tests
|
# HttpsUrlRewriter tests
|
||||||
>>> HttpsUrlRewriter('http://example.com/', None).rewrite('https://example.com/abc')
|
>>> HttpsUrlRewriter('http://example.com/', None).rewrite('https://example.com/abc')
|
||||||
@ -86,13 +98,22 @@
|
|||||||
|
|
||||||
|
|
||||||
from pywb.rewrite.url_rewriter import UrlRewriter, HttpsUrlRewriter
|
from pywb.rewrite.url_rewriter import UrlRewriter, HttpsUrlRewriter
|
||||||
|
import urllib
|
||||||
|
|
||||||
def do_rewrite(rel_url, base_url, prefix, mod=None, full_prefix=None):
|
def do_rewrite(rel_url, base_url, prefix, mod=None, full_prefix=None):
|
||||||
rewriter = UrlRewriter(base_url, prefix, full_prefix=full_prefix)
|
rewriter = UrlRewriter(base_url, prefix, full_prefix=full_prefix)
|
||||||
return rewriter.rewrite(rel_url, mod)
|
return rewriter.rewrite(rel_url, mod)
|
||||||
|
|
||||||
|
|
||||||
|
def do_deprefix(url, rel_prefix, full_prefix):
|
||||||
|
encoded = urllib.quote_plus(full_prefix)
|
||||||
|
url = url.replace(full_prefix, encoded)
|
||||||
|
|
||||||
|
rewriter = UrlRewriter(url, rel_prefix, full_prefix)
|
||||||
|
url = rewriter.deprefix_url()
|
||||||
|
return urllib.unquote_plus(url)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import doctest
|
import doctest
|
||||||
doctest.testmod()
|
doctest.testmod()
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
import copy
|
|
||||||
import urlparse
|
import urlparse
|
||||||
|
|
||||||
from wburl import WbUrl
|
from wburl import WbUrl
|
||||||
@ -88,6 +87,9 @@ class UrlRewriter(object):
|
|||||||
cls = get_cookie_rewriter(scope)
|
cls = get_cookie_rewriter(scope)
|
||||||
return cls(self)
|
return cls(self)
|
||||||
|
|
||||||
|
def deprefix_url(self):
|
||||||
|
return self.wburl.deprefix_url(self.full_prefix)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "UrlRewriter('{0}', '{1}')".format(self.wburl, self.prefix)
|
return "UrlRewriter('{0}', '{1}')".format(self.wburl, self.prefix)
|
||||||
|
|
||||||
@ -150,3 +152,6 @@ class HttpsUrlRewriter(UrlRewriter):
|
|||||||
|
|
||||||
def get_cookie_rewriter(self, scope=None):
|
def get_cookie_rewriter(self, scope=None):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def deprefix_url(self):
|
||||||
|
return self.wburl.url
|
||||||
|
@ -39,7 +39,7 @@ wayback url format.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import urllib
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class BaseWbUrl(object):
|
class BaseWbUrl(object):
|
||||||
@ -149,6 +149,14 @@ class WbUrl(BaseWbUrl):
|
|||||||
self.timestamp = timestamp
|
self.timestamp = timestamp
|
||||||
self.type = self.REPLAY
|
self.type = self.REPLAY
|
||||||
|
|
||||||
|
|
||||||
|
def deprefix_url(self, prefix):
|
||||||
|
prefix = urllib.quote_plus(prefix)
|
||||||
|
rex_query = '=' + re.escape(prefix) + '([0-9])*([\w]{2}_)?/?'
|
||||||
|
new_url = re.sub(rex_query, '=', self.url)
|
||||||
|
self.url = new_url
|
||||||
|
return self.url
|
||||||
|
|
||||||
# Str Representation
|
# Str Representation
|
||||||
# ====================
|
# ====================
|
||||||
def to_str(self, **overrides):
|
def to_str(self, **overrides):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user