diff --git a/recorder/filters.py b/recorder/filters.py index b2ffc65f..c9ab74ee 100644 --- a/recorder/filters.py +++ b/recorder/filters.py @@ -23,20 +23,23 @@ class ExcludeSpecificHeaders(object): # Revisit Policy # ============================================================================ class WriteRevisitDupePolicy(object): - def __call__(self, cdx): + def __call__(self, cdx, params): dt = timestamp_to_datetime(cdx['timestamp']) return ('revisit', cdx['url'], datetime_to_iso_date(dt)) # ============================================================================ class SkipDupePolicy(object): - def __call__(self, cdx): - return 'skip' + def __call__(self, cdx, params): + if cdx['url'] == params['url']: + return 'skip' + else: + return 'write' # ============================================================================ class WriteDupePolicy(object): - def __call__(self, cdx): + def __call__(self, cdx, params): return 'write' diff --git a/recorder/redisindexer.py b/recorder/redisindexer.py index 886cb62a..c3fa1c93 100644 --- a/recorder/redisindexer.py +++ b/recorder/redisindexer.py @@ -73,7 +73,7 @@ class WritableRedisIndexer(RedisIndexSource): cdx_iter, errs = self.cdx_lookup(params) for cdx in cdx_iter: - res = self.dupe_policy(cdx) + res = self.dupe_policy(cdx, params) if res: return res