Use DedupableMixin in RethinkCapturesDedup

I note that we didn't do any payload_size check at all here.
This commit is contained in:
Vangelis Banos 2018-04-24 17:06:56 +00:00
parent 9dac806ca1
commit 255d359ad4

View File

@ -34,6 +34,7 @@ import threading
import datetime
import doublethink
import rethinkdb as r
from warcprox.dedup import DedupableMixin
class RethinkCaptures:
"""Inserts in batches every 0.5 seconds"""
@ -215,10 +216,11 @@ class RethinkCaptures:
if self._timer:
self._timer.join()
class RethinkCapturesDedup(warcprox.dedup.DedupDb):
class RethinkCapturesDedup(warcprox.dedup.DedupDb, DedupableMixin):
logger = logging.getLogger("warcprox.dedup.RethinkCapturesDedup")
def __init__(self, options=warcprox.Options()):
DedupableMixin.__init__(self, options)
self.captures_db = RethinkCaptures(options=options)
self.options = options
@ -251,5 +253,6 @@ class RethinkCapturesDedup(warcprox.dedup.DedupDb):
self.captures_db.close()
def notify(self, recorded_url, records):
self.captures_db.notify(recorded_url, records)
if (records and records[0].type == b'response'
and self.should_dedup(recorded_url)):
self.captures_db.notify(recorded_url, records)