mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 08:04:49 +01:00
rewrite deprefix: improve query deprefix to also test url-encoded params, closes #119
This commit is contained in:
parent
d2e17bf505
commit
93d49ae24b
@ -114,6 +114,14 @@
|
||||
>>> do_deprefix('http://example.com/file.html?param=http://localhost:8080/pywb/https%3A//example.com/filename.html&other=value&a=b¶m2=http://localhost:8080/pywb/http://test.example.com', '/pywb/', 'http://localhost:8080/pywb/')
|
||||
'http://example.com/file.html?param=https://example.com/filename.html&other=value&a=b¶m2=http://test.example.com'
|
||||
|
||||
# urlencoded
|
||||
>>> do_deprefix('http://example.com/file.html?foo=bar&url=' + urllib.quote_plus('http://localhost:8080/pywb/http://example.com/filename.html') + '&foo2=bar2', '/pywb/', 'http://localhost:8080/pywb/')
|
||||
'http://example.com/file.html?foo=bar&url=http://example.com/filename.html&foo2=bar2'
|
||||
|
||||
# with extra path
|
||||
>>> do_deprefix('http://example.com/file.html?foo=bar&url=' + urllib.quote_plus('http://localhost:8080/pywb/extra/path/http://example.com/filename.html') + '&foo2=bar2', '/pywb/', 'http://localhost:8080/pywb/')
|
||||
'http://example.com/file.html?foo=bar&url=http://example.com/filename.html&foo2=bar2'
|
||||
|
||||
# HttpsUrlRewriter tests
|
||||
>>> httpsrewriter = HttpsUrlRewriter('http://example.com/', None)
|
||||
>>> httpsrewriter.rewrite('https://example.com/abc')
|
||||
|
@ -243,6 +243,7 @@ class WbUrl(BaseWbUrl):
|
||||
self.timestamp = res[0]
|
||||
self.mod = res[1]
|
||||
self.url = res[2]
|
||||
|
||||
if self.timestamp:
|
||||
self.type = self.REPLAY
|
||||
else:
|
||||
@ -256,8 +257,11 @@ class WbUrl(BaseWbUrl):
|
||||
|
||||
def deprefix_url(self, prefix):
|
||||
rex_query = '=' + re.escape(prefix) + '([0-9])*([\w]{2}_)?/?'
|
||||
new_url = re.sub(rex_query, '=', self.url)
|
||||
self.url = new_url
|
||||
self.url = re.sub(rex_query, '=', self.url)
|
||||
|
||||
rex_query = '=(' + urllib.quote_plus(prefix) + '.*?)((?:https?%3A)?%2F%2F[^&]+)'
|
||||
self.url = re.sub(rex_query, '=\\2', self.url)
|
||||
|
||||
return self.url
|
||||
|
||||
def get_url(self, url=None):
|
||||
|
Loading…
x
Reference in New Issue
Block a user