use kafka-python 1.0 recommended api; use kafka capture feed specified in warcprox-meta header, if any

This commit is contained in:
Noah Levitt 2016-03-03 18:58:52 +00:00
parent ee3ee5d621
commit 89f965d1d3
2 changed files with 9 additions and 7 deletions

View File

@ -19,7 +19,7 @@ class PyTest(TestCommand):
deps = [ deps = [
'certauth>=1.1.0', 'certauth>=1.1.0',
'warctools', 'warctools',
'kafka-python', 'kafka-python>=1.0.1',
'surt>=0.3b4', 'surt>=0.3b4',
'rethinkstuff', 'rethinkstuff',
'PySocks', 'PySocks',

View File

@ -9,8 +9,8 @@ class CaptureFeed:
def __init__(self, broker_list, topic): def __init__(self, broker_list, topic):
self.broker_list = broker_list self.broker_list = broker_list
self.topic = topic.encode('utf-8') self.topic = topic
self._producer = kafka.SimpleProducer(kafka.KafkaClient(broker_list)) self._producer = kafka.KafkaProducer(bootstrap_servers=broker_list)
def notify(self, recorded_url, records): def notify(self, recorded_url, records):
if records[0].type not in (b'revisit', b'response'): if records[0].type not in (b'revisit', b'response'):
@ -50,7 +50,9 @@ class CaptureFeed:
for (k,v) in recorded_url.warcprox_meta['capture-feed-extra-fields'].items(): for (k,v) in recorded_url.warcprox_meta['capture-feed-extra-fields'].items():
d[k] = v d[k] = v
msg = json.dumps(d, separators=(',', ':')).encode('utf-8') topic = recorded_url.warcprox_meta.get('capture-feed-topic', self.topic)
self.logger.debug('feeding kafka %s', msg)
self._producer.send_messages(self.topic, msg) msg = json.dumps(d, separators=(',', ':')).encode('utf-8')
self.logger.debug('feeding kafka topic=%s msg=%s', repr(topic), msg)
self._producer.send(topic, msg)