mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
wburl: split into BaseWbUrl and WbUrl for better extensibility
This commit is contained in:
parent
21e885b78a
commit
d702b299ae
@ -1,9 +1,5 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
|
|
||||||
import re
|
|
||||||
import rfc3987
|
|
||||||
|
|
||||||
# WbUrl : wb archival url representation for WB
|
|
||||||
"""
|
"""
|
||||||
WbUrl represents the standard wayback archival url format.
|
WbUrl represents the standard wayback archival url format.
|
||||||
A regular url is a subset of the WbUrl (latest replay).
|
A regular url is a subset of the WbUrl (latest replay).
|
||||||
@ -34,9 +30,38 @@ replay form:
|
|||||||
|
|
||||||
latest_replay: (no timestamp)
|
latest_replay: (no timestamp)
|
||||||
http://example.com
|
http://example.com
|
||||||
|
|
||||||
|
Additionally, the BaseWbUrl provides the base components
|
||||||
|
(url, timestamp, end_timestamp, modifier, type) which
|
||||||
|
can be used to provide a custom representation of the
|
||||||
|
wayback url format.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class WbUrl:
|
import re
|
||||||
|
import rfc3987
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class BaseWbUrl(object):
|
||||||
|
QUERY = 'query'
|
||||||
|
URL_QUERY = 'url_query'
|
||||||
|
REPLAY = 'replay'
|
||||||
|
LATEST_REPLAY = 'latest_replay'
|
||||||
|
|
||||||
|
|
||||||
|
def __init__(self, url='', mod='',
|
||||||
|
timestamp='', end_timestamp='', type=None):
|
||||||
|
|
||||||
|
self.url = url
|
||||||
|
self.timestamp = timestamp
|
||||||
|
self.end_timestamp = end_timestamp
|
||||||
|
self.mod = mod
|
||||||
|
self.type = type
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class WbUrl(BaseWbUrl):
|
||||||
"""
|
"""
|
||||||
# Replay Urls
|
# Replay Urls
|
||||||
# ======================
|
# ======================
|
||||||
@ -107,22 +132,14 @@ class WbUrl:
|
|||||||
QUERY_REGEX = re.compile('^(?:([\w\-:]+)/)?(\d*)(?:-(\d+))?\*/?(.*)$')
|
QUERY_REGEX = re.compile('^(?:([\w\-:]+)/)?(\d*)(?:-(\d+))?\*/?(.*)$')
|
||||||
REPLAY_REGEX = re.compile('^(\d*)([a-z]+_)?/{0,3}(.*)$')
|
REPLAY_REGEX = re.compile('^(\d*)([a-z]+_)?/{0,3}(.*)$')
|
||||||
|
|
||||||
QUERY = 'query'
|
|
||||||
URL_QUERY = 'url_query'
|
|
||||||
REPLAY = 'replay'
|
|
||||||
LATEST_REPLAY = 'latest_replay'
|
|
||||||
|
|
||||||
DEFAULT_SCHEME = 'http://'
|
DEFAULT_SCHEME = 'http://'
|
||||||
# ======================
|
# ======================
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, url):
|
def __init__(self, url):
|
||||||
|
super(WbUrl, self).__init__()
|
||||||
|
|
||||||
self.original_url = url
|
self.original_url = url
|
||||||
self.type = None
|
|
||||||
self.url = ''
|
|
||||||
self.timestamp = ''
|
|
||||||
self.end_timestamp = ''
|
|
||||||
self.mod = ''
|
|
||||||
|
|
||||||
if not any (f(url) for f in [self._init_query, self._init_replay]):
|
if not any (f(url) for f in [self._init_query, self._init_replay]):
|
||||||
raise Exception('Invalid WbUrl: ', url)
|
raise Exception('Invalid WbUrl: ', url)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user