diff --git a/setup.py b/setup.py index 90a7719..912d7ed 100755 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ except: setuptools.setup( name='warcprox', - version='2.2.1b2.dev117', + version='2.2.1b2.dev118', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt', diff --git a/warcprox/controller.py b/warcprox/controller.py index 217f519..fbf9d8a 100644 --- a/warcprox/controller.py +++ b/warcprox/controller.py @@ -44,7 +44,7 @@ class WarcproxController(object): Create warcprox controller. If supplied, `proxy` should be an instance of WarcProxy, and - `warc_writer_threads` should be an list of WarcWriterThread instances. + `warc_writer_threads` should be a list of WarcWriterThread instances. If not supplied, they are created with default values. If supplied, playback_proxy should be an instance of PlaybackProxy. If @@ -266,11 +266,9 @@ class WarcproxController(object): self.shutdown() def _dump_profiling(self): - import pstats - import tempfile - import os - import io + import pstats, tempfile, os, io with tempfile.TemporaryDirectory() as tmpdir: + # proxy threads files = [] for th_id, profiler in self.proxy.profilers.items(): file = os.path.join(tmpdir, '%s.dat' % th_id) @@ -285,3 +283,19 @@ class WarcproxController(object): 'aggregate performance profile of %s proxy threads:\n%s', len(files), buf.getvalue()) + + # warc writer threads + files = [] + for wwt in self.warc_writer_threads: + file = os.path.join(tmpdir, '%s.dat' % th_id) + profiler.dump_stats(file) + files.append(file) + + buf = io.StringIO() + stats = pstats.Stats(*files, stream=buf) + stats.sort_stats('cumulative') + stats.print_stats(0.1) + self.logger.notice( + 'aggregate performance profile of %s warc writer threads:\n%s', + len(self.warc_writer_threads), buf.getvalue()) + diff --git a/warcprox/writerthread.py b/warcprox/writerthread.py index 1041a30..8138eec 100644 --- a/warcprox/writerthread.py +++ b/warcprox/writerthread.py @@ -56,23 +56,15 @@ class WarcWriterThread(threading.Thread): self.idle = None self.method_filter = set(method.upper() for method in self.options.method_filter or []) - def run(self): if self.options.profile: import cProfile - import pstats - import io - profiler = cProfile.Profile() + self.profiler = cProfile.Profile() - profiler.enable() + def run(self): + if self.options.profile: + self.profiler.enable() self._run() - profiler.disable() - - buf = io.StringIO() - stats = pstats.Stats(profiler, stream=buf) - stats.sort_stats('cumulative') - stats.print_stats(0.1) - self.logger.notice( - '%s performance profile:\n%s', self, buf.getvalue()) + self.profiler.disable() else: self._run()