mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
handle old dedup entries missing "warc_id"
This commit is contained in:
parent
422672408a
commit
2bec9db7df
@ -143,8 +143,9 @@ class RethinkCapturesDedup:
|
||||
dedup_info = {
|
||||
"url": entry["url"].encode("utf-8"),
|
||||
"date": entry["timestamp"].strftime("%Y-%m-%dT%H:%M:%SZ").encode("utf-8"),
|
||||
"id": entry["warc_id"].encode("utf-8")
|
||||
}
|
||||
if "warc_id" in entry:
|
||||
dedup_info["id"] = entry["warc_id"].encode("utf-8")
|
||||
return dedup_info
|
||||
else:
|
||||
return None
|
||||
|
Loading…
x
Reference in New Issue
Block a user