1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

rewrite: fix WbUrl parsing for urls that start with a digit, eg. 1234.example.com

split latest replay url from timestamped replay regex
add additional rewrite tests
This commit is contained in:
Ilya Kreymer 2015-03-23 15:38:10 -07:00
parent 6acac67d3c
commit 4aa6512b05
3 changed files with 24 additions and 3 deletions

View File

@ -5,6 +5,7 @@ omit =
*.html
*.js
*.css
pywb/__init__.py
[report]
exclude_lines =

View File

@ -20,6 +20,18 @@ ur"""
>>> repr(WbUrl('cs_/example.com'))
"('latest_replay', '', 'cs_', 'http://example.com', 'cs_/http://example.com')"
>>> repr(WbUrl('im_/20130102.org'))
"('latest_replay', '', 'im_', 'http://20130102.org', 'im_/http://20130102.org')"
>>> repr(WbUrl('20130102.example.com'))
"('latest_replay', '', '', 'http://20130102.example.com', 'http://20130102.example.com')"
>>> repr(WbUrl('20130102.org/1'))
"('latest_replay', '', '', 'http://20130102.org/1', 'http://20130102.org/1')"
>>> repr(WbUrl('20130102/1.com'))
"('replay', '20130102', '', 'http://1.com', '20130102/http://1.com')"
>>> repr(WbUrl('https://example.com/xyz'))
"('latest_replay', '', '', 'https://example.com/xyz', 'https://example.com/xyz')"

View File

@ -86,7 +86,8 @@ class WbUrl(BaseWbUrl):
# Regexs
# ======================
QUERY_REGEX = re.compile('^(?:([\w\-:]+)/)?(\d*)(?:-(\d+))?\*/?(.+)$')
REPLAY_REGEX = re.compile('^(\d*)([a-z]+_)?/{0,3}(.+)$')
REPLAY_REGEX = re.compile('^(\d*)([a-z]+_)?/{1,3}(.+)$')
#LATEST_REPLAY_REGEX = re.compile('^\w_)')
DEFAULT_SCHEME = 'http://'
@ -221,8 +222,15 @@ class WbUrl(BaseWbUrl):
def _init_replay(self, url):
replay = self.REPLAY_REGEX.match(url)
if not replay:
if not url:
return None
self.timestamp = ''
self.mod = ''
self.url = url
self.type = self.LATEST_REPLAY
return True
res = replay.groups('')
self.timestamp = res[0]