mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Filter out warc/revisit records in CdxServerDedup
This commit is contained in:
parent
202d664f39
commit
f77aef9110
@ -215,7 +215,8 @@ class CdxServerDedup(object):
|
|||||||
u = url.decode("utf-8") if isinstance(url, bytes) else url
|
u = url.decode("utf-8") if isinstance(url, bytes) else url
|
||||||
try:
|
try:
|
||||||
result = self.http_pool.request('GET', self.cdx_url, fields=dict(
|
result = self.http_pool.request('GET', self.cdx_url, fields=dict(
|
||||||
url=u, fl="timestamp,digest", limit=-10))
|
url=u, fl="timestamp,digest", filter="!mimetype:warc/revisit",
|
||||||
|
limit=-10))
|
||||||
assert result.status == 200
|
assert result.status == 200
|
||||||
if isinstance(digest_key, bytes):
|
if isinstance(digest_key, bytes):
|
||||||
dkey = digest_key
|
dkey = digest_key
|
||||||
|
Loading…
x
Reference in New Issue
Block a user