mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Fix bug with dedup_info date encoding
This commit is contained in:
parent
59e995ccdf
commit
a0821575b4
@ -209,7 +209,7 @@ class CdxServerDedup(object):
|
|||||||
logger = logging.getLogger("warcprox.dedup.CdxServerDedup")
|
logger = logging.getLogger("warcprox.dedup.CdxServerDedup")
|
||||||
http_pool = urllib3.PoolManager()
|
http_pool = urllib3.PoolManager()
|
||||||
|
|
||||||
def __init__(self, cdx_url="https://web.archive.org/cdx/search/cdx",
|
def __init__(self, cdx_url="https://web.archive.org/cdx/search",
|
||||||
options=warcprox.Options()):
|
options=warcprox.Options()):
|
||||||
self.cdx_url = cdx_url
|
self.cdx_url = cdx_url
|
||||||
self.options = options
|
self.options = options
|
||||||
@ -237,7 +237,7 @@ class CdxServerDedup(object):
|
|||||||
u = url.decode("utf-8") if isinstance(url, bytes) else url
|
u = url.decode("utf-8") if isinstance(url, bytes) else url
|
||||||
try:
|
try:
|
||||||
result = self.http_pool.request('GET', self.cdx_url, fields=dict(
|
result = self.http_pool.request('GET', self.cdx_url, fields=dict(
|
||||||
url=u, fl="timestamp,digest", limit=-1))
|
url=u, fl="timestamp,digest", limit=-10))
|
||||||
assert result.status == 200
|
assert result.status == 200
|
||||||
if isinstance(digest_key, bytes):
|
if isinstance(digest_key, bytes):
|
||||||
dkey = digest_key
|
dkey = digest_key
|
||||||
@ -249,8 +249,8 @@ class CdxServerDedup(object):
|
|||||||
(cdx_ts, cdx_digest) = line.split(b' ')
|
(cdx_ts, cdx_digest) = line.split(b' ')
|
||||||
if cdx_digest == dkey:
|
if cdx_digest == dkey:
|
||||||
dt = datetime(*_split_timestamp(cdx_ts.decode('ascii')))
|
dt = datetime(*_split_timestamp(cdx_ts.decode('ascii')))
|
||||||
return dict(url=url,
|
date = dt.strftime('%Y-%m-%dT%H:%M:%SZ').encode('utf-8')
|
||||||
date=dt.strftime('%Y-%m-%dT%H:%M:%SZ'))
|
return dict(url=url, date=date)
|
||||||
except (HTTPError, AssertionError, ValueError) as exc:
|
except (HTTPError, AssertionError, ValueError) as exc:
|
||||||
self.logger.error('CdxServerDedup request failed for url=%s %s',
|
self.logger.error('CdxServerDedup request failed for url=%s %s',
|
||||||
url, exc)
|
url, exc)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user