mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 08:04:49 +01:00
cdx: CDXQuery takes params dict not **params
CDXObject comparison using to_json()
This commit is contained in:
parent
57991fd0cf
commit
0dff388e4e
@ -107,6 +107,7 @@ class CDXObject(OrderedDict):
|
||||
|
||||
cdxline = cdxline.rstrip()
|
||||
self._from_json = False
|
||||
self._cached_json = None
|
||||
|
||||
# Allows for filling the fields later or in a custom way
|
||||
if not cdxline:
|
||||
@ -157,6 +158,9 @@ class CDXObject(OrderedDict):
|
||||
# force regen on next __str__ call
|
||||
self.cdxline = None
|
||||
|
||||
# force regen on next to_json() call
|
||||
self._cached_json = None
|
||||
|
||||
def is_revisit(self):
|
||||
"""return ``True`` if this record is a revisit record."""
|
||||
return (self.get(MIMETYPE) == 'warc/revisit' or
|
||||
@ -174,7 +178,7 @@ class CDXObject(OrderedDict):
|
||||
return str(self) + '\n'
|
||||
|
||||
try:
|
||||
result = ' '.join(self[x] for x in fields) + '\n'
|
||||
result = ' '.join(str(self[x]) for x in fields) + '\n'
|
||||
except KeyError as ke:
|
||||
msg = 'Invalid field "{0}" found in fields= argument'
|
||||
msg = msg.format(ke.message)
|
||||
@ -182,7 +186,6 @@ class CDXObject(OrderedDict):
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def to_json(self, fields=None):
|
||||
return self.conv_to_json(self, fields)
|
||||
|
||||
@ -213,7 +216,7 @@ class CDXObject(OrderedDict):
|
||||
return self.cdxline.decode('utf-8')
|
||||
|
||||
if not self._from_json:
|
||||
return ' '.join(val for n, val in six.iteritems(self))
|
||||
return ' '.join(str(val) for val in six.itervalues(self))
|
||||
else:
|
||||
return json_encode(self)
|
||||
|
||||
@ -223,7 +226,13 @@ class CDXObject(OrderedDict):
|
||||
return prefix + self.conv_to_json(dupe, fields)
|
||||
|
||||
def __lt__(self, other):
|
||||
return str(self) < str(other)
|
||||
if not self._cached_json:
|
||||
self._cached_json = self.to_json()
|
||||
|
||||
if not other._cached_json:
|
||||
other._cached_json = other.to_json()
|
||||
|
||||
return self._cached_json < other._cached_json
|
||||
|
||||
|
||||
#=================================================================
|
||||
|
@ -69,7 +69,7 @@ class BaseCDXServer(object):
|
||||
|
||||
def load_cdx(self, **params):
|
||||
params['_url_canon'] = self.url_canon
|
||||
query = CDXQuery(**params)
|
||||
query = CDXQuery(params)
|
||||
|
||||
#key, end_key = self._calc_search_keys(query)
|
||||
#query.set_key(key, end_key)
|
||||
|
@ -60,8 +60,8 @@ class RemoteCDXSource(CDXSource):
|
||||
remote_query = query
|
||||
else:
|
||||
# Only send url and matchType to remote
|
||||
remote_query = CDXQuery(url=query.url,
|
||||
matchType=query.match_type)
|
||||
remote_query = CDXQuery(dict(url=query.url,
|
||||
matchType=query.match_type))
|
||||
|
||||
urlparams = remote_query.urlencode()
|
||||
|
||||
|
@ -5,8 +5,8 @@ from pywb.utils.canonicalize import calc_search_range
|
||||
|
||||
#=================================================================
|
||||
class CDXQuery(object):
|
||||
def __init__(self, **kwargs):
|
||||
self.params = kwargs
|
||||
def __init__(self, params):
|
||||
self.params = params
|
||||
url = self.url
|
||||
if not self.params.get('matchType'):
|
||||
if url.startswith('*.'):
|
||||
@ -18,7 +18,7 @@ class CDXQuery(object):
|
||||
else:
|
||||
self.params['matchType'] = 'exact'
|
||||
|
||||
start, end = calc_search_range(url=url,
|
||||
start, end = calc_search_range(url=self.url,
|
||||
match_type=self.params['matchType'],
|
||||
url_canon=self.params.get('_url_canon'))
|
||||
|
||||
|
@ -29,7 +29,7 @@ def lazy_cdx_load(**params):
|
||||
params['custom_ops'] = [raise_access_exception]
|
||||
|
||||
cdx_iter = cdx_load(['bogus ignored'],
|
||||
CDXQuery(**params),
|
||||
CDXQuery(params),
|
||||
process=True)
|
||||
|
||||
# exception happens on first access attempt
|
||||
|
Loading…
x
Reference in New Issue
Block a user