mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Use DedupableMixin in RethinkCapturesDedup
I note that we didn't do any payload_size check at all here.
This commit is contained in:
parent
9dac806ca1
commit
255d359ad4
@ -34,6 +34,7 @@ import threading
|
||||
import datetime
|
||||
import doublethink
|
||||
import rethinkdb as r
|
||||
from warcprox.dedup import DedupableMixin
|
||||
|
||||
class RethinkCaptures:
|
||||
"""Inserts in batches every 0.5 seconds"""
|
||||
@ -215,10 +216,11 @@ class RethinkCaptures:
|
||||
if self._timer:
|
||||
self._timer.join()
|
||||
|
||||
class RethinkCapturesDedup(warcprox.dedup.DedupDb):
|
||||
class RethinkCapturesDedup(warcprox.dedup.DedupDb, DedupableMixin):
|
||||
logger = logging.getLogger("warcprox.dedup.RethinkCapturesDedup")
|
||||
|
||||
def __init__(self, options=warcprox.Options()):
|
||||
DedupableMixin.__init__(self, options)
|
||||
self.captures_db = RethinkCaptures(options=options)
|
||||
self.options = options
|
||||
|
||||
@ -251,5 +253,6 @@ class RethinkCapturesDedup(warcprox.dedup.DedupDb):
|
||||
self.captures_db.close()
|
||||
|
||||
def notify(self, recorded_url, records):
|
||||
self.captures_db.notify(recorded_url, records)
|
||||
|
||||
if (records and records[0].type == b'response'
|
||||
and self.should_dedup(recorded_url)):
|
||||
self.captures_db.notify(recorded_url, records)
|
||||
|
Loading…
x
Reference in New Issue
Block a user