mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
handle old dedup entries missing "warc_id"
This commit is contained in:
parent
422672408a
commit
2bec9db7df
@ -143,8 +143,9 @@ class RethinkCapturesDedup:
|
|||||||
dedup_info = {
|
dedup_info = {
|
||||||
"url": entry["url"].encode("utf-8"),
|
"url": entry["url"].encode("utf-8"),
|
||||||
"date": entry["timestamp"].strftime("%Y-%m-%dT%H:%M:%SZ").encode("utf-8"),
|
"date": entry["timestamp"].strftime("%Y-%m-%dT%H:%M:%SZ").encode("utf-8"),
|
||||||
"id": entry["warc_id"].encode("utf-8")
|
|
||||||
}
|
}
|
||||||
|
if "warc_id" in entry:
|
||||||
|
dedup_info["id"] = entry["warc_id"].encode("utf-8")
|
||||||
return dedup_info
|
return dedup_info
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
Loading…
x
Reference in New Issue
Block a user