mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
fix typos
This commit is contained in:
parent
1dc7de7dd8
commit
419e5bc536
@ -117,7 +117,7 @@ class WarcWriterProcessor(warcprox.BaseStandardPostfetchProcessor):
|
|||||||
and "ait-job-id" in recorded_url.warcprox_meta["metadata"]
|
and "ait-job-id" in recorded_url.warcprox_meta["metadata"]
|
||||||
):
|
):
|
||||||
crawl_id = recorded_url.warcprox_meta["metadata"]["ait-job-id"]
|
crawl_id = recorded_url.warcprox_meta["metadata"]["ait-job-id"]
|
||||||
if recorded_url.payload_digest in revisits[crawl_id]:
|
if recorded_url.payload_digest in self.revisits[crawl_id]:
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
"Found duplicate revisit, skipping: %s, hash: %s",
|
"Found duplicate revisit, skipping: %s, hash: %s",
|
||||||
recorded_url.url,
|
recorded_url.url,
|
||||||
@ -125,7 +125,7 @@ class WarcWriterProcessor(warcprox.BaseStandardPostfetchProcessor):
|
|||||||
)
|
)
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
revisits[crawl_id].add(recorded_url.payload_digest)
|
self.revisits[crawl_id].add(recorded_url.payload_digest)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user