mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Use DedupableMixin in RethinkCapturesDedup
I note that we didn't do any payload_size check at all here.
This commit is contained in:
parent
9dac806ca1
commit
255d359ad4
@ -34,6 +34,7 @@ import threading
|
|||||||
import datetime
|
import datetime
|
||||||
import doublethink
|
import doublethink
|
||||||
import rethinkdb as r
|
import rethinkdb as r
|
||||||
|
from warcprox.dedup import DedupableMixin
|
||||||
|
|
||||||
class RethinkCaptures:
|
class RethinkCaptures:
|
||||||
"""Inserts in batches every 0.5 seconds"""
|
"""Inserts in batches every 0.5 seconds"""
|
||||||
@ -215,10 +216,11 @@ class RethinkCaptures:
|
|||||||
if self._timer:
|
if self._timer:
|
||||||
self._timer.join()
|
self._timer.join()
|
||||||
|
|
||||||
class RethinkCapturesDedup(warcprox.dedup.DedupDb):
|
class RethinkCapturesDedup(warcprox.dedup.DedupDb, DedupableMixin):
|
||||||
logger = logging.getLogger("warcprox.dedup.RethinkCapturesDedup")
|
logger = logging.getLogger("warcprox.dedup.RethinkCapturesDedup")
|
||||||
|
|
||||||
def __init__(self, options=warcprox.Options()):
|
def __init__(self, options=warcprox.Options()):
|
||||||
|
DedupableMixin.__init__(self, options)
|
||||||
self.captures_db = RethinkCaptures(options=options)
|
self.captures_db = RethinkCaptures(options=options)
|
||||||
self.options = options
|
self.options = options
|
||||||
|
|
||||||
@ -251,5 +253,6 @@ class RethinkCapturesDedup(warcprox.dedup.DedupDb):
|
|||||||
self.captures_db.close()
|
self.captures_db.close()
|
||||||
|
|
||||||
def notify(self, recorded_url, records):
|
def notify(self, recorded_url, records):
|
||||||
self.captures_db.notify(recorded_url, records)
|
if (records and records[0].type == b'response'
|
||||||
|
and self.should_dedup(recorded_url)):
|
||||||
|
self.captures_db.notify(recorded_url, records)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user