mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
rewrite: fix WbUrl parsing for urls that start with a digit, eg. 1234.example.com
split latest replay url from timestamped replay regex add additional rewrite tests
This commit is contained in:
parent
6acac67d3c
commit
4aa6512b05
@ -5,6 +5,7 @@ omit =
|
||||
*.html
|
||||
*.js
|
||||
*.css
|
||||
pywb/__init__.py
|
||||
|
||||
[report]
|
||||
exclude_lines =
|
||||
|
@ -20,6 +20,18 @@ ur"""
|
||||
>>> repr(WbUrl('cs_/example.com'))
|
||||
"('latest_replay', '', 'cs_', 'http://example.com', 'cs_/http://example.com')"
|
||||
|
||||
>>> repr(WbUrl('im_/20130102.org'))
|
||||
"('latest_replay', '', 'im_', 'http://20130102.org', 'im_/http://20130102.org')"
|
||||
|
||||
>>> repr(WbUrl('20130102.example.com'))
|
||||
"('latest_replay', '', '', 'http://20130102.example.com', 'http://20130102.example.com')"
|
||||
|
||||
>>> repr(WbUrl('20130102.org/1'))
|
||||
"('latest_replay', '', '', 'http://20130102.org/1', 'http://20130102.org/1')"
|
||||
|
||||
>>> repr(WbUrl('20130102/1.com'))
|
||||
"('replay', '20130102', '', 'http://1.com', '20130102/http://1.com')"
|
||||
|
||||
>>> repr(WbUrl('https://example.com/xyz'))
|
||||
"('latest_replay', '', '', 'https://example.com/xyz', 'https://example.com/xyz')"
|
||||
|
||||
|
@ -86,7 +86,8 @@ class WbUrl(BaseWbUrl):
|
||||
# Regexs
|
||||
# ======================
|
||||
QUERY_REGEX = re.compile('^(?:([\w\-:]+)/)?(\d*)(?:-(\d+))?\*/?(.+)$')
|
||||
REPLAY_REGEX = re.compile('^(\d*)([a-z]+_)?/{0,3}(.+)$')
|
||||
REPLAY_REGEX = re.compile('^(\d*)([a-z]+_)?/{1,3}(.+)$')
|
||||
#LATEST_REPLAY_REGEX = re.compile('^\w_)')
|
||||
|
||||
DEFAULT_SCHEME = 'http://'
|
||||
|
||||
@ -221,7 +222,14 @@ class WbUrl(BaseWbUrl):
|
||||
def _init_replay(self, url):
|
||||
replay = self.REPLAY_REGEX.match(url)
|
||||
if not replay:
|
||||
return None
|
||||
if not url:
|
||||
return None
|
||||
|
||||
self.timestamp = ''
|
||||
self.mod = ''
|
||||
self.url = url
|
||||
self.type = self.LATEST_REPLAY
|
||||
return True
|
||||
|
||||
res = replay.groups('')
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user