1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00

cdx: CDXQuery takes params dict not **params

CDXObject comparison using to_json()
This commit is contained in:
Ilya Kreymer 2016-02-23 01:36:39 -08:00
parent 57991fd0cf
commit 0dff388e4e
5 changed files with 20 additions and 11 deletions

View File

@ -107,6 +107,7 @@ class CDXObject(OrderedDict):
cdxline = cdxline.rstrip()
self._from_json = False
self._cached_json = None
# Allows for filling the fields later or in a custom way
if not cdxline:
@ -157,6 +158,9 @@ class CDXObject(OrderedDict):
# force regen on next __str__ call
self.cdxline = None
# force regen on next to_json() call
self._cached_json = None
def is_revisit(self):
"""return ``True`` if this record is a revisit record."""
return (self.get(MIMETYPE) == 'warc/revisit' or
@ -174,7 +178,7 @@ class CDXObject(OrderedDict):
return str(self) + '\n'
try:
result = ' '.join(self[x] for x in fields) + '\n'
result = ' '.join(str(self[x]) for x in fields) + '\n'
except KeyError as ke:
msg = 'Invalid field "{0}" found in fields= argument'
msg = msg.format(ke.message)
@ -182,7 +186,6 @@ class CDXObject(OrderedDict):
return result
def to_json(self, fields=None):
return self.conv_to_json(self, fields)
@ -213,7 +216,7 @@ class CDXObject(OrderedDict):
return self.cdxline.decode('utf-8')
if not self._from_json:
return ' '.join(val for n, val in six.iteritems(self))
return ' '.join(str(val) for val in six.itervalues(self))
else:
return json_encode(self)
@ -223,7 +226,13 @@ class CDXObject(OrderedDict):
return prefix + self.conv_to_json(dupe, fields)
def __lt__(self, other):
return str(self) < str(other)
if not self._cached_json:
self._cached_json = self.to_json()
if not other._cached_json:
other._cached_json = other.to_json()
return self._cached_json < other._cached_json
#=================================================================

View File

@ -69,7 +69,7 @@ class BaseCDXServer(object):
def load_cdx(self, **params):
params['_url_canon'] = self.url_canon
query = CDXQuery(**params)
query = CDXQuery(params)
#key, end_key = self._calc_search_keys(query)
#query.set_key(key, end_key)

View File

@ -60,8 +60,8 @@ class RemoteCDXSource(CDXSource):
remote_query = query
else:
# Only send url and matchType to remote
remote_query = CDXQuery(url=query.url,
matchType=query.match_type)
remote_query = CDXQuery(dict(url=query.url,
matchType=query.match_type))
urlparams = remote_query.urlencode()

View File

@ -5,8 +5,8 @@ from pywb.utils.canonicalize import calc_search_range
#=================================================================
class CDXQuery(object):
def __init__(self, **kwargs):
self.params = kwargs
def __init__(self, params):
self.params = params
url = self.url
if not self.params.get('matchType'):
if url.startswith('*.'):
@ -18,7 +18,7 @@ class CDXQuery(object):
else:
self.params['matchType'] = 'exact'
start, end = calc_search_range(url=url,
start, end = calc_search_range(url=self.url,
match_type=self.params['matchType'],
url_canon=self.params.get('_url_canon'))

View File

@ -29,7 +29,7 @@ def lazy_cdx_load(**params):
params['custom_ops'] = [raise_access_exception]
cdx_iter = cdx_load(['bogus ignored'],
CDXQuery(**params),
CDXQuery(params),
process=True)
# exception happens on first access attempt