From 6beb19dc16bb60fa228f271c9eb9de29db203c64 Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Wed, 25 Oct 2017 20:28:56 +0000 Subject: [PATCH] Expand comment with limit=-1 explanation --- warcprox/dedup.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/warcprox/dedup.py b/warcprox/dedup.py index 46f3c40..e70f5f9 100644 --- a/warcprox/dedup.py +++ b/warcprox/dedup.py @@ -206,6 +206,10 @@ class CdxServerDedup(object): computed on the original content, after decoding Content-Encoding and Transfer-Encoding, if any), if they match, write a revisit record. + Get only the last item (limit=-1) because Wayback Machine has special + performance optimisation to handle that. limit < 0 is very inefficient + in general. Maybe it could be configurable in the future. + :param digest_key: b'sha1:' (prefix is optional). Example: b'sha1:B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A' :param url: Target URL string