mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
use kafka-python 1.0 recommended api; use kafka capture feed specified in warcprox-meta header, if any
This commit is contained in:
parent
ee3ee5d621
commit
89f965d1d3
2
setup.py
2
setup.py
@ -19,7 +19,7 @@ class PyTest(TestCommand):
|
||||
deps = [
|
||||
'certauth>=1.1.0',
|
||||
'warctools',
|
||||
'kafka-python',
|
||||
'kafka-python>=1.0.1',
|
||||
'surt>=0.3b4',
|
||||
'rethinkstuff',
|
||||
'PySocks',
|
||||
|
@ -9,8 +9,8 @@ class CaptureFeed:
|
||||
|
||||
def __init__(self, broker_list, topic):
|
||||
self.broker_list = broker_list
|
||||
self.topic = topic.encode('utf-8')
|
||||
self._producer = kafka.SimpleProducer(kafka.KafkaClient(broker_list))
|
||||
self.topic = topic
|
||||
self._producer = kafka.KafkaProducer(bootstrap_servers=broker_list)
|
||||
|
||||
def notify(self, recorded_url, records):
|
||||
if records[0].type not in (b'revisit', b'response'):
|
||||
@ -50,7 +50,9 @@ class CaptureFeed:
|
||||
for (k,v) in recorded_url.warcprox_meta['capture-feed-extra-fields'].items():
|
||||
d[k] = v
|
||||
|
||||
msg = json.dumps(d, separators=(',', ':')).encode('utf-8')
|
||||
self.logger.debug('feeding kafka %s', msg)
|
||||
self._producer.send_messages(self.topic, msg)
|
||||
topic = recorded_url.warcprox_meta.get('capture-feed-topic', self.topic)
|
||||
|
||||
msg = json.dumps(d, separators=(',', ':')).encode('utf-8')
|
||||
self.logger.debug('feeding kafka topic=%s msg=%s', repr(topic), msg)
|
||||
self._producer.send(topic, msg)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user