mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
cdx: CDXQuery takes params dict not **params
CDXObject comparison using to_json()
This commit is contained in:
parent
57991fd0cf
commit
0dff388e4e
@ -107,6 +107,7 @@ class CDXObject(OrderedDict):
|
|||||||
|
|
||||||
cdxline = cdxline.rstrip()
|
cdxline = cdxline.rstrip()
|
||||||
self._from_json = False
|
self._from_json = False
|
||||||
|
self._cached_json = None
|
||||||
|
|
||||||
# Allows for filling the fields later or in a custom way
|
# Allows for filling the fields later or in a custom way
|
||||||
if not cdxline:
|
if not cdxline:
|
||||||
@ -157,6 +158,9 @@ class CDXObject(OrderedDict):
|
|||||||
# force regen on next __str__ call
|
# force regen on next __str__ call
|
||||||
self.cdxline = None
|
self.cdxline = None
|
||||||
|
|
||||||
|
# force regen on next to_json() call
|
||||||
|
self._cached_json = None
|
||||||
|
|
||||||
def is_revisit(self):
|
def is_revisit(self):
|
||||||
"""return ``True`` if this record is a revisit record."""
|
"""return ``True`` if this record is a revisit record."""
|
||||||
return (self.get(MIMETYPE) == 'warc/revisit' or
|
return (self.get(MIMETYPE) == 'warc/revisit' or
|
||||||
@ -174,7 +178,7 @@ class CDXObject(OrderedDict):
|
|||||||
return str(self) + '\n'
|
return str(self) + '\n'
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = ' '.join(self[x] for x in fields) + '\n'
|
result = ' '.join(str(self[x]) for x in fields) + '\n'
|
||||||
except KeyError as ke:
|
except KeyError as ke:
|
||||||
msg = 'Invalid field "{0}" found in fields= argument'
|
msg = 'Invalid field "{0}" found in fields= argument'
|
||||||
msg = msg.format(ke.message)
|
msg = msg.format(ke.message)
|
||||||
@ -182,7 +186,6 @@ class CDXObject(OrderedDict):
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def to_json(self, fields=None):
|
def to_json(self, fields=None):
|
||||||
return self.conv_to_json(self, fields)
|
return self.conv_to_json(self, fields)
|
||||||
|
|
||||||
@ -213,7 +216,7 @@ class CDXObject(OrderedDict):
|
|||||||
return self.cdxline.decode('utf-8')
|
return self.cdxline.decode('utf-8')
|
||||||
|
|
||||||
if not self._from_json:
|
if not self._from_json:
|
||||||
return ' '.join(val for n, val in six.iteritems(self))
|
return ' '.join(str(val) for val in six.itervalues(self))
|
||||||
else:
|
else:
|
||||||
return json_encode(self)
|
return json_encode(self)
|
||||||
|
|
||||||
@ -223,7 +226,13 @@ class CDXObject(OrderedDict):
|
|||||||
return prefix + self.conv_to_json(dupe, fields)
|
return prefix + self.conv_to_json(dupe, fields)
|
||||||
|
|
||||||
def __lt__(self, other):
|
def __lt__(self, other):
|
||||||
return str(self) < str(other)
|
if not self._cached_json:
|
||||||
|
self._cached_json = self.to_json()
|
||||||
|
|
||||||
|
if not other._cached_json:
|
||||||
|
other._cached_json = other.to_json()
|
||||||
|
|
||||||
|
return self._cached_json < other._cached_json
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
@ -69,7 +69,7 @@ class BaseCDXServer(object):
|
|||||||
|
|
||||||
def load_cdx(self, **params):
|
def load_cdx(self, **params):
|
||||||
params['_url_canon'] = self.url_canon
|
params['_url_canon'] = self.url_canon
|
||||||
query = CDXQuery(**params)
|
query = CDXQuery(params)
|
||||||
|
|
||||||
#key, end_key = self._calc_search_keys(query)
|
#key, end_key = self._calc_search_keys(query)
|
||||||
#query.set_key(key, end_key)
|
#query.set_key(key, end_key)
|
||||||
|
@ -60,8 +60,8 @@ class RemoteCDXSource(CDXSource):
|
|||||||
remote_query = query
|
remote_query = query
|
||||||
else:
|
else:
|
||||||
# Only send url and matchType to remote
|
# Only send url and matchType to remote
|
||||||
remote_query = CDXQuery(url=query.url,
|
remote_query = CDXQuery(dict(url=query.url,
|
||||||
matchType=query.match_type)
|
matchType=query.match_type))
|
||||||
|
|
||||||
urlparams = remote_query.urlencode()
|
urlparams = remote_query.urlencode()
|
||||||
|
|
||||||
|
@ -5,8 +5,8 @@ from pywb.utils.canonicalize import calc_search_range
|
|||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class CDXQuery(object):
|
class CDXQuery(object):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, params):
|
||||||
self.params = kwargs
|
self.params = params
|
||||||
url = self.url
|
url = self.url
|
||||||
if not self.params.get('matchType'):
|
if not self.params.get('matchType'):
|
||||||
if url.startswith('*.'):
|
if url.startswith('*.'):
|
||||||
@ -18,7 +18,7 @@ class CDXQuery(object):
|
|||||||
else:
|
else:
|
||||||
self.params['matchType'] = 'exact'
|
self.params['matchType'] = 'exact'
|
||||||
|
|
||||||
start, end = calc_search_range(url=url,
|
start, end = calc_search_range(url=self.url,
|
||||||
match_type=self.params['matchType'],
|
match_type=self.params['matchType'],
|
||||||
url_canon=self.params.get('_url_canon'))
|
url_canon=self.params.get('_url_canon'))
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ def lazy_cdx_load(**params):
|
|||||||
params['custom_ops'] = [raise_access_exception]
|
params['custom_ops'] = [raise_access_exception]
|
||||||
|
|
||||||
cdx_iter = cdx_load(['bogus ignored'],
|
cdx_iter = cdx_load(['bogus ignored'],
|
||||||
CDXQuery(**params),
|
CDXQuery(params),
|
||||||
process=True)
|
process=True)
|
||||||
|
|
||||||
# exception happens on first access attempt
|
# exception happens on first access attempt
|
||||||
|
Loading…
x
Reference in New Issue
Block a user