mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
rewrite deprefix: improve query deprefix to also test url-encoded params, closes #119
This commit is contained in:
parent
d2e17bf505
commit
93d49ae24b
@ -114,6 +114,14 @@
|
|||||||
>>> do_deprefix('http://example.com/file.html?param=http://localhost:8080/pywb/https%3A//example.com/filename.html&other=value&a=b¶m2=http://localhost:8080/pywb/http://test.example.com', '/pywb/', 'http://localhost:8080/pywb/')
|
>>> do_deprefix('http://example.com/file.html?param=http://localhost:8080/pywb/https%3A//example.com/filename.html&other=value&a=b¶m2=http://localhost:8080/pywb/http://test.example.com', '/pywb/', 'http://localhost:8080/pywb/')
|
||||||
'http://example.com/file.html?param=https://example.com/filename.html&other=value&a=b¶m2=http://test.example.com'
|
'http://example.com/file.html?param=https://example.com/filename.html&other=value&a=b¶m2=http://test.example.com'
|
||||||
|
|
||||||
|
# urlencoded
|
||||||
|
>>> do_deprefix('http://example.com/file.html?foo=bar&url=' + urllib.quote_plus('http://localhost:8080/pywb/http://example.com/filename.html') + '&foo2=bar2', '/pywb/', 'http://localhost:8080/pywb/')
|
||||||
|
'http://example.com/file.html?foo=bar&url=http://example.com/filename.html&foo2=bar2'
|
||||||
|
|
||||||
|
# with extra path
|
||||||
|
>>> do_deprefix('http://example.com/file.html?foo=bar&url=' + urllib.quote_plus('http://localhost:8080/pywb/extra/path/http://example.com/filename.html') + '&foo2=bar2', '/pywb/', 'http://localhost:8080/pywb/')
|
||||||
|
'http://example.com/file.html?foo=bar&url=http://example.com/filename.html&foo2=bar2'
|
||||||
|
|
||||||
# HttpsUrlRewriter tests
|
# HttpsUrlRewriter tests
|
||||||
>>> httpsrewriter = HttpsUrlRewriter('http://example.com/', None)
|
>>> httpsrewriter = HttpsUrlRewriter('http://example.com/', None)
|
||||||
>>> httpsrewriter.rewrite('https://example.com/abc')
|
>>> httpsrewriter.rewrite('https://example.com/abc')
|
||||||
|
@ -243,6 +243,7 @@ class WbUrl(BaseWbUrl):
|
|||||||
self.timestamp = res[0]
|
self.timestamp = res[0]
|
||||||
self.mod = res[1]
|
self.mod = res[1]
|
||||||
self.url = res[2]
|
self.url = res[2]
|
||||||
|
|
||||||
if self.timestamp:
|
if self.timestamp:
|
||||||
self.type = self.REPLAY
|
self.type = self.REPLAY
|
||||||
else:
|
else:
|
||||||
@ -256,8 +257,11 @@ class WbUrl(BaseWbUrl):
|
|||||||
|
|
||||||
def deprefix_url(self, prefix):
|
def deprefix_url(self, prefix):
|
||||||
rex_query = '=' + re.escape(prefix) + '([0-9])*([\w]{2}_)?/?'
|
rex_query = '=' + re.escape(prefix) + '([0-9])*([\w]{2}_)?/?'
|
||||||
new_url = re.sub(rex_query, '=', self.url)
|
self.url = re.sub(rex_query, '=', self.url)
|
||||||
self.url = new_url
|
|
||||||
|
rex_query = '=(' + urllib.quote_plus(prefix) + '.*?)((?:https?%3A)?%2F%2F[^&]+)'
|
||||||
|
self.url = re.sub(rex_query, '=\\2', self.url)
|
||||||
|
|
||||||
return self.url
|
return self.url
|
||||||
|
|
||||||
def get_url(self, url=None):
|
def get_url(self, url=None):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user