mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Filter out warc/revisit records in CdxServerDedup
This commit is contained in:
parent
202d664f39
commit
f77aef9110
@ -215,7 +215,8 @@ class CdxServerDedup(object):
|
||||
u = url.decode("utf-8") if isinstance(url, bytes) else url
|
||||
try:
|
||||
result = self.http_pool.request('GET', self.cdx_url, fields=dict(
|
||||
url=u, fl="timestamp,digest", limit=-10))
|
||||
url=u, fl="timestamp,digest", filter="!mimetype:warc/revisit",
|
||||
limit=-10))
|
||||
assert result.status == 200
|
||||
if isinstance(digest_key, bytes):
|
||||
dkey = digest_key
|
||||
|
Loading…
x
Reference in New Issue
Block a user