slightly less incomplete work on new postfetch processor chain

This commit is contained in:
Noah Levitt 2018-01-12 14:58:26 -08:00
parent c715eaba4e
commit bd25991a0d
5 changed files with 146 additions and 240 deletions

View File

@ -19,8 +19,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
USA. USA.
""" """
import sys
import datetime import datetime
import threading import threading
import time
import logging
from argparse import Namespace as _Namespace from argparse import Namespace as _Namespace
from pkg_resources import get_distribution as _get_distribution from pkg_resources import get_distribution as _get_distribution
__version__ = _get_distribution('warcprox').version __version__ = _get_distribution('warcprox').version
@ -28,6 +31,7 @@ try:
import queue import queue
except ImportError: except ImportError:
import Queue as queue import Queue as queue
def digest_str(hash_obj, base32=False): def digest_str(hash_obj, base32=False):
import base64 import base64
return hash_obj.name.encode('utf-8') + b':' + ( return hash_obj.name.encode('utf-8') + b':' + (
@ -93,15 +97,17 @@ class RequestBlockedByRule(Exception):
return "%s: %s" % (self.__class__.__name__, self.msg) return "%s: %s" % (self.__class__.__name__, self.msg)
class BasePostfetchProcessor(threading.Thread): class BasePostfetchProcessor(threading.Thread):
def __init__(self, inq, outq, profile=False): logger = logging.getLogger("warcprox.BasePostfetchProcessor")
def __init__(self, inq, outq, options=Options()):
threading.Thread.__init__(self, name='???') threading.Thread.__init__(self, name='???')
self.inq = inq self.inq = inq
self.outq = outq self.outq = outq
self.options = options
self.stop = threading.Event() self.stop = threading.Event()
self.profile = profile
def run(self): def run(self):
if self.profile: if self.options.profile:
import cProfile import cProfile
self.profiler = cProfile.Profile() self.profiler = cProfile.Profile()
self.profiler.enable() self.profiler.enable()
@ -186,9 +192,15 @@ class ListenerPostfetchProcessor(BaseStandardPostfetchProcessor):
def _process_url(self, recorded_url): def _process_url(self, recorded_url):
return self.listener.notify(recorded_url, recorded_url.warc_records) return self.listener.notify(recorded_url, recorded_url.warc_records)
# @classmethod
# def wrap(cls, listener, inq, outq, profile=False):
# if listener:
# return cls(listener, inq, outq, profile)
# else:
# return None
# monkey-patch log levels TRACE and NOTICE # monkey-patch log levels TRACE and NOTICE
TRACE = 5 TRACE = 5
import logging
def _logger_trace(self, msg, *args, **kwargs): def _logger_trace(self, msg, *args, **kwargs):
if self.isEnabledFor(TRACE): if self.isEnabledFor(TRACE):
self._log(TRACE, msg, args, **kwargs) self._log(TRACE, msg, args, **kwargs)
@ -197,7 +209,6 @@ logging.trace = logging.root.trace
logging.addLevelName(TRACE, 'TRACE') logging.addLevelName(TRACE, 'TRACE')
NOTICE = (logging.INFO + logging.WARN) // 2 NOTICE = (logging.INFO + logging.WARN) // 2
import logging
def _logger_notice(self, msg, *args, **kwargs): def _logger_notice(self, msg, *args, **kwargs):
if self.isEnabledFor(NOTICE): if self.isEnabledFor(NOTICE):
self._log(NOTICE, msg, args, **kwargs) self._log(NOTICE, msg, args, **kwargs)

View File

@ -32,6 +32,7 @@ import gc
import datetime import datetime
import warcprox import warcprox
import certauth import certauth
import functools
class Factory: class Factory:
@staticmethod @staticmethod
@ -60,7 +61,8 @@ class Factory:
logging.info('statistics tracking disabled') logging.info('statistics tracking disabled')
stats_db = None stats_db = None
else: else:
stats_db = warcprox.stats.StatsDb(options.stats_db_file, options=options) stats_db = warcprox.stats.StatsDb(
options.stats_db_file, options=options)
return stats_db return stats_db
# @staticmethod # @staticmethod
@ -70,6 +72,10 @@ class Factory:
# options.cacert, args.certs_dir, ca_name=ca_name) # options.cacert, args.certs_dir, ca_name=ca_name)
# return ca # return ca
@staticmethod
def warc_writer(inq, outq, options):
return warcprox.writerthread.WarcWriterThread(inq, outq, options)
@staticmethod @staticmethod
def playback_proxy(options): def playback_proxy(options):
if options.playback_port is not None: if options.playback_port is not None:
@ -86,48 +92,32 @@ class Factory:
def crawl_logger(options): def crawl_logger(options):
if options.crawl_log_dir: if options.crawl_log_dir:
return warcprox.crawl_log.CrawlLogger( return warcprox.crawl_log.CrawlLogger(
options.crawl_log_dir, options=options)) options.crawl_log_dir, options=options)
else: else:
return None return None
@staticmethod @staticmethod
def plugin(qualname, inq, outq): def plugin(qualname):
try: try:
(module_name, class_name) = qualname.rsplit('.', 1) (module_name, class_name) = qualname.rsplit('.', 1)
module_ = importlib.import_module(module_name) module_ = importlib.import_module(module_name)
class_ = getattr(module_, class_name) class_ = getattr(module_, class_name)
instance = class_() listener = class_()
plugin.notify # make sure it has this method plugin.notify # make sure it has this method
return instance return plugin
except Exception as e: except Exception as e:
logging.fatal('problem with plugin class %r: %s', qualname, e) logging.fatal('problem with plugin class %r: %s', qualname, e)
sys.exit(1) sys.exit(1)
# @staticmethod @staticmethod
# def plugins(options): def service_registry(options):
# plugins = [] if options.rethinkdb_services_url:
# for qualname in options.plugins or []: parsed = doublethink.parse_rethinkdb_url(
# try: options.rethinkdb_services_url)
# (module_name, class_name) = qualname.rsplit('.', 1) rr = doublethink.Rethinker(servers=parsed.hosts, db=parsed.database)
# module_ = importlib.import_module(module_name) return doublethink.ServiceRegistry(rr, table=parsed.table)
# class_ = getattr(module_, class_name) else:
# plugin = class_() return None
# plugin.notify # make sure it has this method
# plugins.append(plugin)
# except Exception as e:
# logging.fatal('problem with plugin class %r: %s', qualname, e)
# sys.exit(1)
# return plugins
# @staticmethod
# def service_registry(options):
# if options.rethinkdb_services_url:
# parsed = doublethink.parse_rethinkdb_url(
# options.rethinkdb_services_url)
# rr = doublethink.Rethinker(servers=parsed.hosts, db=parsed.database)
# return doublethink.ServiceRegistry(rr, table=parsed.table)
# else:
# return None
class WarcproxController(object): class WarcproxController(object):
logger = logging.getLogger("warcprox.controller.WarcproxController") logger = logging.getLogger("warcprox.controller.WarcproxController")
@ -148,64 +138,54 @@ class WarcproxController(object):
self.proxy = warcprox.warcproxy.WarcProxy(options=options) self.proxy = warcprox.warcproxy.WarcProxy(options=options)
self.build_postfetch_chain(proxy.recorded_url_q) self.build_postfetch_chain(self.proxy.recorded_url_q)
# if warc_writer_threads is not None: self.service_registry = Factory.service_registry(options)
# self.warc_writer_threads = warc_writer_threads
# else:
# self.warc_writer_threads = [
# warcprox.writerthread.WarcWriterThread(
# name='WarcWriterThread%03d' % i,
# recorded_url_q=self.proxy.recorded_url_q,
# listeners=[self.proxy.running_stats], options=options)
# for i in range(int(self.proxy.max_threads ** 0.5))]
# self.playback_proxy = playback_proxy
# self.service_registry = service_registry
def build_postfetch_chain(self, inq): def build_postfetch_chain(self, inq):
outq = warcprox.TimestampedQueue(maxsize=self.options.queue_size) constructors = []
def maybe_add_to_chain(processor_init): self.dedup_db = Factory.dedup_db(self.options)
processor = processor_init(inq, outq, self.options)
if processor: if self.dedup_db:
constructors.append(self.dedup_db.loader)
constructors.append(Factory.warc_writer)
if self.dedup_db:
constructors.append(self.dedup_db.storer)
stats_db = Factory.stats_db(self.options)
if stats_db:
constructors.append(functools.partial(
warcprox.ListenerPostfetchProcessor, stats_db))
self.playback_proxy = Factory.playback_proxy(self.options)
if self.playback_proxy:
constructors.append(functools.partial(
warcprox.ListenerPostfetchProcessor,
self.playback_proxy.playback_index_db))
crawl_logger = Factory.crawl_logger(self.options)
if crawl_logger:
constructors.append(functools.partial(
warcprox.ListenerPostfetchProcessor, crawl_logger))
for qualname in self.options.plugins or []:
plugin = Factory.plugin(qualname)
constructors.append(functools.partial(
warcprox.ListenerPostfetchProcessor, plugin))
self._postfetch_chain = []
for i, constructor in enumerate(constructors):
if i != len(constructors) - 1:
outq = warcprox.TimestampedQueue(
maxsize=self.options.queue_size)
else:
outq = None
processor = constructor(inq, outq, self.options)
self._postfetch_chain.append(processor) self._postfetch_chain.append(processor)
inq = outq inq = outq
outq = warcprox.TimestampedQueue(maxsize=self.options.queue_size)
self.dedup_db = Factory.dedup_db(options)
# dedup loader
if self.dedup_db:
maybe_add_to_chain(self.dedup_db.loader)
# warc writer
maybe_add_to_chain(Factory.warc_writer)
# dedup storer
if self.dedup_db:
maybe_add_to_chain(self.dedup_db.storer)
# playback index storer
# XXX XXX XXX FIXME
# self.playback_proxy = Factory.playback_proxy(options)
# if self.playback_proxy:
# maybe_add_to_chain()
# outq = warcprox.TimestampedQueue(maxsize=self.options.queue_size)
# processor = self.playback_proxy.playback_index_db(inq, outq)
# self._postfetch_chain.append(processor)
# inq = outq
# stats db
maybe_add_to_chain(Factory.stats_db)
# crawl logger
maybe_add_to_chain(Factory.crawl_logger)
for qualname in self.options.plugins:
maybe_add_to_chain(
lambda inq, outq, options: Factory.plugin(qualname, inq, outq))
# self.plugins = Factory.plugins(options)
def debug_mem(self): def debug_mem(self):
self.logger.info("self.proxy.recorded_url_q.qsize()=%s", self.proxy.recorded_url_q.qsize()) self.logger.info("self.proxy.recorded_url_q.qsize()=%s", self.proxy.recorded_url_q.qsize())
@ -293,26 +273,14 @@ class WarcproxController(object):
self.logger.info('warcprox is already running') self.logger.info('warcprox is already running')
return return
if self.proxy.stats_db:
self.proxy.stats_db.start()
self.proxy_thread = threading.Thread( self.proxy_thread = threading.Thread(
target=self.proxy.serve_forever, name='ProxyThread') target=self.proxy.serve_forever, name='ProxyThread')
self.proxy_thread.start() self.proxy_thread.start()
assert(all( for processor in self._postfetch_chain:
wwt.dedup_db is self.warc_writer_threads[0].dedup_db # logging.info('starting postfetch processor %r', processor)
for wwt in self.warc_writer_threads)) processor.start()
if any((t.dedup_db for t in self.warc_writer_threads)): logging.info('started postfetch processor %r', processor)
self.warc_writer_threads[0].dedup_db.start()
for wwt in self.warc_writer_threads:
wwt.start()
if self.playback_proxy is not None:
self.playback_proxy_thread = threading.Thread(
target=self.playback_proxy.serve_forever,
name='PlaybackProxyThread')
self.playback_proxy_thread.start()
def shutdown(self): def shutdown(self):
with self._start_stop_lock: with self._start_stop_lock:
@ -320,30 +288,34 @@ class WarcproxController(object):
self.logger.info('warcprox is not running') self.logger.info('warcprox is not running')
return return
for wwt in self.warc_writer_threads: # for wwt in self.warc_writer_threads:
wwt.stop.set() # wwt.stop.set()
for processor in self._postfetch_chain:
processor.stop.set()
self.proxy.shutdown() self.proxy.shutdown()
self.proxy.server_close() self.proxy.server_close()
if self.playback_proxy is not None: for processor in self._postfetch_chain:
self.playback_proxy.shutdown() processor.join()
self.playback_proxy.server_close() # if self.playback_proxy is not None:
if self.playback_proxy.playback_index_db is not None: # self.playback_proxy.shutdown()
self.playback_proxy.playback_index_db.close() # self.playback_proxy.server_close()
# if self.playback_proxy.playback_index_db is not None:
# self.playback_proxy.playback_index_db.close()
# wait for threads to finish # # wait for threads to finish
for wwt in self.warc_writer_threads: # for wwt in self.warc_writer_threads:
wwt.join() # wwt.join()
if self.proxy.stats_db: # if self.proxy.stats_db:
self.proxy.stats_db.stop() # self.proxy.stats_db.stop()
self.proxy_thread.join() # self.proxy_thread.join()
if self.playback_proxy is not None: # if self.playback_proxy is not None:
self.playback_proxy_thread.join() # self.playback_proxy_thread.join()
if self.service_registry and hasattr(self, "status_info"): # if self.service_registry and hasattr(self, "status_info"):
self.service_registry.unregister(self.status_info["id"]) # self.service_registry.unregister(self.status_info["id"])
def run_until_shutdown(self): def run_until_shutdown(self):
""" """

View File

@ -35,6 +35,14 @@ from urllib3.exceptions import HTTPError
urllib3.disable_warnings() urllib3.disable_warnings()
class DedupLoader(warcprox.BaseStandardPostfetchProcessor):
def __init__(self, dedup_db, inq, outq, base32=False, profile=False):
warcprox.BaseStandardPostfetchProcessor.__init__(self, inq, outq, profile)
self.dedup_db = dedup_db
self.base32 = base32
def _process_url(self, recorded_url):
decorate_with_dedup_info(self.dedup_db, recorded_url, self.base32)
class DedupDb(object): class DedupDb(object):
logger = logging.getLogger("warcprox.dedup.DedupDb") logger = logging.getLogger("warcprox.dedup.DedupDb")
@ -61,6 +69,12 @@ class DedupDb(object):
conn.commit() conn.commit()
conn.close() conn.close()
def loader(self, inq, outq, profile=False):
return DedupLoader(self, inq, outq, self.options.base32, profile)
def storer(self, inq, outq, profile=False):
return warcprox.ListenerPostfetchProcessor(self, inq, outq, profile)
def save(self, digest_key, response_record, bucket=""): def save(self, digest_key, response_record, bucket=""):
record_id = response_record.get_header(warctools.WarcRecord.ID).decode('latin1') record_id = response_record.get_header(warctools.WarcRecord.ID).decode('latin1')
url = response_record.get_header(warctools.WarcRecord.URL).decode('latin1') url = response_record.get_header(warctools.WarcRecord.URL).decode('latin1')
@ -106,20 +120,20 @@ class DedupDb(object):
else: else:
self.save(digest_key, records[0]) self.save(digest_key, records[0])
def decorate_with_dedup_info(dedup_db, recorded_url, base32=False): def decorate_with_dedup_info(dedup_db, recorded_url, base32=False):
if (recorded_url.response_recorder if (recorded_url.response_recorder
and recorded_url.payload_digest and recorded_url.payload_digest
and recorded_url.response_recorder.payload_size() > 0): and recorded_url.response_recorder.payload_size() > 0):
digest_key = warcprox.digest_str(recorded_url.payload_digest, base32) digest_key = warcprox.digest_str(recorded_url.payload_digest, base32)
if recorded_url.warcprox_meta and "captures-bucket" in recorded_url.warcprox_meta: if recorded_url.warcprox_meta and "captures-bucket" in recorded_url.warcprox_meta:
recorded_url.dedup_info = dedup_db.lookup(digest_key, recorded_url.warcprox_meta["captures-bucket"], recorded_url.dedup_info = dedup_db.lookup(
digest_key, recorded_url.warcprox_meta["captures-bucket"],
recorded_url.url) recorded_url.url)
else: else:
recorded_url.dedup_info = dedup_db.lookup(digest_key, recorded_url.dedup_info = dedup_db.lookup(
url=recorded_url.url) digest_key, url=recorded_url.url)
class RethinkDedupDb: class RethinkDedupDb(DedupDb):
logger = logging.getLogger("warcprox.dedup.RethinkDedupDb") logger = logging.getLogger("warcprox.dedup.RethinkDedupDb")
def __init__(self, options=warcprox.Options()): def __init__(self, options=warcprox.Options()):
@ -181,7 +195,7 @@ class RethinkDedupDb:
else: else:
self.save(digest_key, records[0]) self.save(digest_key, records[0])
class CdxServerDedup(object): class CdxServerDedup(DedupDb):
"""Query a CDX server to perform deduplication. """Query a CDX server to perform deduplication.
""" """
logger = logging.getLogger("warcprox.dedup.CdxServerDedup") logger = logging.getLogger("warcprox.dedup.CdxServerDedup")
@ -244,7 +258,7 @@ class CdxServerDedup(object):
""" """
pass pass
class TroughDedupDb(object): class TroughDedupDb(DedupDb):
''' '''
https://github.com/internetarchive/trough https://github.com/internetarchive/trough
''' '''

View File

@ -198,12 +198,12 @@ def dump_state(signum=None, frame=None):
'dumping state (caught signal %s)\n%s', 'dumping state (caught signal %s)\n%s',
signum, '\n'.join(state_strs)) signum, '\n'.join(state_strs))
def init_controller(args): def parse_args(argv):
''' '''
Creates a warcprox.controller.WarcproxController configured according to Parses command line arguments with argparse.
the supplied arguments (normally the result of parse_args(sys.argv)).
''' '''
options = warcprox.Options(**vars(args)) arg_parser = _build_arg_parser(prog=os.path.basename(argv[0]))
args = arg_parser.parse_args(args=argv[1:])
try: try:
hashlib.new(args.digest_algorithm) hashlib.new(args.digest_algorithm)
@ -211,19 +211,6 @@ def init_controller(args):
logging.fatal(e) logging.fatal(e)
exit(1) exit(1)
controller = warcprox.controller.WarcproxController(
proxy, warc_writer_threads, playback_proxy,
service_registry=svcreg, options=options)
return controller
def parse_args(argv):
'''
Parses command line arguments with argparse.
'''
arg_parser = _build_arg_parser(prog=os.path.basename(argv[0]))
args = arg_parser.parse_args(args=argv[1:])
return args return args
def main(argv=None): def main(argv=None):
@ -249,7 +236,8 @@ def main(argv=None):
# see https://github.com/pyca/cryptography/issues/2911 # see https://github.com/pyca/cryptography/issues/2911
cryptography.hazmat.backends.openssl.backend.activate_builtin_random() cryptography.hazmat.backends.openssl.backend.activate_builtin_random()
controller = init_controller(args) options = warcprox.Options(**vars(args))
controller = warcprox.controller.WarcproxController(options)
signal.signal(signal.SIGTERM, lambda a,b: controller.stop.set()) signal.signal(signal.SIGTERM, lambda a,b: controller.stop.set())
signal.signal(signal.SIGINT, lambda a,b: controller.stop.set()) signal.signal(signal.SIGINT, lambda a,b: controller.stop.set())

View File

@ -2,7 +2,7 @@
warcprox/writerthread.py - warc writer thread, reads from the recorded url warcprox/writerthread.py - warc writer thread, reads from the recorded url
queue, writes warc records, runs final tasks after warc records are written queue, writes warc records, runs final tasks after warc records are written
Copyright (C) 2013-2017 Internet Archive Copyright (C) 2013-2018 Internet Archive
This program is free software; you can redistribute it and/or This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License modify it under the terms of the GNU General Public License
@ -28,44 +28,34 @@ except ImportError:
import Queue as queue import Queue as queue
import logging import logging
import threading
import time import time
from datetime import datetime
from hanzo import warctools
import warcprox import warcprox
import sys
class WarcWriterThread(threading.Thread): class WarcWriterThread(warcprox.BaseStandardPostfetchProcessor):
logger = logging.getLogger("warcprox.warcproxwriter.WarcWriterThread") logger = logging.getLogger("warcprox.writerthread.WarcWriterThread")
def __init__( def __init__(self, inq, outq, options=warcprox.Options()):
self, recorded_url_q, name='WarcWriterThread', writer_pool=None, warcprox.BaseStandardPostfetchProcessor.__init__(
dedup_db=None, listeners=[], options=warcprox.Options()): self, inq, outq, options=options)
"""recorded_url_q is a queue.Queue of warcprox.warcprox.RecordedUrl."""
threading.Thread.__init__(self, name=name)
self.recorded_url_q = recorded_url_q
self.stop = threading.Event()
if writer_pool:
self.writer_pool = writer_pool
else:
self.writer_pool = warcprox.writer.WarcWriterPool()
self.dedup_db = dedup_db
self.listeners = listeners
self.options = options self.options = options
self.idle = None self.writer_pool = warcprox.writer.WarcWriterPool(options)
self.method_filter = set(method.upper() for method in self.options.method_filter or []) self.method_filter = set(method.upper() for method in self.options.method_filter or [])
def run(self): def _get_process_put(self):
if self.options.profile: try:
import cProfile warcprox.BaseStandardPostfetchProcessor._get_process_put(self)
self.profiler = cProfile.Profile() finally:
self.profiler.enable() self.writer_pool.maybe_idle_rollover()
self._run()
self.profiler.disable() def _process_url(self, recorded_url):
else: if self._should_archive(recorded_url):
self._run() records = self.writer_pool.write_records(recorded_url)
recorded_url.warc_records = records
self._log(recorded_url, records)
# try to release resources in a timely fashion
if recorded_url.response_recorder and recorded_url.response_recorder.tempfile:
recorded_url.response_recorder.tempfile.close()
_ALWAYS_ACCEPT = {'WARCPROX_WRITE_RECORD'}
def _filter_accepts(self, recorded_url): def _filter_accepts(self, recorded_url):
if not self.method_filter: if not self.method_filter:
return True return True
@ -81,68 +71,9 @@ class WarcWriterThread(threading.Thread):
# special warc name prefix '-' means "don't archive" # special warc name prefix '-' means "don't archive"
return prefix != '-' and self._filter_accepts(recorded_url) return prefix != '-' and self._filter_accepts(recorded_url)
def _run(self):
self.name = '%s(tid=%s)'% (self.name, warcprox.gettid())
while not self.stop.is_set():
try:
while True:
try:
if self.stop.is_set():
qsize = self.recorded_url_q.qsize()
if qsize % 50 == 0:
self.logger.info("%s urls left to write", qsize)
recorded_url = self.recorded_url_q.get(block=True, timeout=0.5)
records = []
self.idle = None
if self._should_archive(recorded_url):
if self.dedup_db:
warcprox.dedup.decorate_with_dedup_info(self.dedup_db,
recorded_url, base32=self.options.base32)
records = self.writer_pool.write_records(recorded_url)
self._final_tasks(recorded_url, records)
# try to release resources in a timely fashion
if recorded_url.response_recorder and recorded_url.response_recorder.tempfile:
recorded_url.response_recorder.tempfile.close()
except queue.Empty:
if self.stop.is_set():
break
self.idle = time.time()
finally:
self.writer_pool.maybe_idle_rollover()
self.logger.info('WarcWriterThread shutting down')
self._shutdown()
except Exception as e:
if isinstance(e, OSError) and e.errno == 28:
# OSError: [Errno 28] No space left on device
self.logger.critical(
'shutting down due to fatal problem: %s: %s',
e.__class__.__name__, e)
self._shutdown()
sys.exit(1)
self.logger.critical(
'WarcWriterThread will try to continue after unexpected '
'error', exc_info=True)
time.sleep(0.5)
def _shutdown(self):
self.writer_pool.close_writers()
for listener in self.listeners:
if hasattr(listener, 'stop'):
try:
listener.stop()
except:
self.logger.error(
'%s raised exception', listener.stop, exc_info=True)
# closest thing we have to heritrix crawl log at the moment
def _log(self, recorded_url, records): def _log(self, recorded_url, records):
try: try:
payload_digest = records[0].get_header(warctools.WarcRecord.PAYLOAD_DIGEST).decode("utf-8") payload_digest = records[0].get_header('WARC-Payload-Digest').decode("utf-8")
except: except:
payload_digest = "-" payload_digest = "-"
@ -156,13 +87,3 @@ class WarcWriterThread(threading.Thread):
recorded_url.method, recorded_url.url.decode("utf-8"), recorded_url.method, recorded_url.url.decode("utf-8"),
recorded_url.mimetype, recorded_url.size, payload_digest, recorded_url.mimetype, recorded_url.size, payload_digest,
type_, filename, offset) type_, filename, offset)
def _final_tasks(self, recorded_url, records):
if self.listeners:
for listener in self.listeners:
try:
listener.notify(recorded_url, records)
except:
self.logger.error('%s raised exception',
listener.notify, exc_info=True)
self._log(recorded_url, records)