1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00

recorder: SkipDupePolicy only skips if url is an exact match (not just by urlkey)

This commit is contained in:
Ilya Kreymer 2016-04-07 10:44:05 -07:00
parent f4cc143dc7
commit 00bdddd1e9
2 changed files with 8 additions and 5 deletions

View File

@ -23,20 +23,23 @@ class ExcludeSpecificHeaders(object):
# Revisit Policy
# ============================================================================
class WriteRevisitDupePolicy(object):
def __call__(self, cdx):
def __call__(self, cdx, params):
dt = timestamp_to_datetime(cdx['timestamp'])
return ('revisit', cdx['url'], datetime_to_iso_date(dt))
# ============================================================================
class SkipDupePolicy(object):
def __call__(self, cdx):
return 'skip'
def __call__(self, cdx, params):
if cdx['url'] == params['url']:
return 'skip'
else:
return 'write'
# ============================================================================
class WriteDupePolicy(object):
def __call__(self, cdx):
def __call__(self, cdx, params):
return 'write'

View File

@ -73,7 +73,7 @@ class WritableRedisIndexer(RedisIndexSource):
cdx_iter, errs = self.cdx_lookup(params)
for cdx in cdx_iter:
res = self.dupe_policy(cdx)
res = self.dupe_policy(cdx, params)
if res:
return res