diff --git a/setup.py b/setup.py index 7b449d6..4acbd14 100755 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ except: setuptools.setup( name='warcprox', - version='2.1b1.dev67', + version='2.1b1.dev68', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt', diff --git a/tests/test_warcprox.py b/tests/test_warcprox.py index c0d57a5..547e579 100755 --- a/tests/test_warcprox.py +++ b/tests/test_warcprox.py @@ -1328,6 +1328,34 @@ def test_timestamped_queue(): time.sleep(1) assert q.seconds_behind() > 1 +def test_controller_with_defaults(): + # tests some initialization code that we rarely touch otherwise + controller = warcprox.controller.WarcproxController() + assert controller.proxy + assert not controller.proxy_thread + assert not controller.playback_proxy + assert not controller.playback_proxy_thread + assert controller.warc_writer_thread + assert controller.proxy.RequestHandlerClass == warcprox.warcproxy.WarcProxyHandler + assert controller.proxy.ca + assert controller.proxy.digest_algorithm == 'sha1' + assert controller.proxy.pool + assert controller.proxy.recorded_url_q + assert controller.proxy.server_address == ('127.0.0.1', 8000) + assert controller.proxy.server_port == 8000 + assert controller.warc_writer_thread.recorded_url_q + assert controller.warc_writer_thread.recorded_url_q is controller.proxy.recorded_url_q + assert controller.warc_writer_thread.writer_pool + assert controller.warc_writer_thread.writer_pool.default_warc_writer + assert controller.warc_writer_thread.writer_pool.default_warc_writer.directory == './warcs' + assert controller.warc_writer_thread.writer_pool.default_warc_writer.rollover_idle_time is None + assert controller.warc_writer_thread.writer_pool.default_warc_writer.rollover_size == 1000000000 + assert controller.warc_writer_thread.writer_pool.default_warc_writer.prefix == 'warcprox' + assert controller.warc_writer_thread.writer_pool.default_warc_writer.gzip is False + assert controller.warc_writer_thread.writer_pool.default_warc_writer.record_builder + assert not controller.warc_writer_thread.writer_pool.default_warc_writer.record_builder.base32 + assert controller.warc_writer_thread.writer_pool.default_warc_writer.record_builder.digest_algorithm == 'sha1' + if __name__ == '__main__': pytest.main() diff --git a/warcprox/controller.py b/warcprox/controller.py index 8f56b86..3ed2b8c 100644 --- a/warcprox/controller.py +++ b/warcprox/controller.py @@ -53,12 +53,13 @@ class WarcproxController(object): if proxy is not None: self.proxy = proxy else: - self.proxy = warcprox.warcprox.WarcProxy() + self.proxy = warcprox.warcproxy.WarcProxy(options=options) if warc_writer_thread is not None: self.warc_writer_thread = warc_writer_thread else: - self.warc_writer_thread = warcprox.warcwriter.WarcWriterThread(recorded_url_q=self.proxy.recorded_url_q) + self.warc_writer_thread = warcprox.writerthread.WarcWriterThread( + recorded_url_q=self.proxy.recorded_url_q) self.proxy_thread = None self.playback_proxy_thread = None diff --git a/warcprox/mitmproxy.py b/warcprox/mitmproxy.py index e15ed3c..5f592e4 100644 --- a/warcprox/mitmproxy.py +++ b/warcprox/mitmproxy.py @@ -438,6 +438,9 @@ class PooledMixIn(socketserver.ThreadingMixIn): This override is necessary for the size of the thread pool to act as a cap on the number of open file handles. + + N.b. this method blocks if necessary, even though it's called from + `_handle_request_noblock`. ''' # neither threading.Condition Queue.not_empty nor Queue.not_full do # what we need here, right? diff --git a/warcprox/writerthread.py b/warcprox/writerthread.py index 34a2b4e..a845e37 100644 --- a/warcprox/writerthread.py +++ b/warcprox/writerthread.py @@ -2,7 +2,7 @@ warcprox/writerthread.py - warc writer thread, reads from the recorded url queue, writes warc records, runs final tasks after warc records are written -Copyright (C) 2013-2016 Internet Archive +Copyright (C) 2013-2017 Internet Archive This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License @@ -44,7 +44,9 @@ import sys class WarcWriterThread(threading.Thread): logger = logging.getLogger("warcprox.warcproxwriter.WarcWriterThread") - def __init__(self, recorded_url_q=None, writer_pool=None, dedup_db=None, listeners=None, options=warcprox.Options()): + def __init__( + self, recorded_url_q=None, writer_pool=None, dedup_db=None, + listeners=None, options=warcprox.Options()): """recorded_url_q is a queue.Queue of warcprox.warcprox.RecordedUrl.""" threading.Thread.__init__(self, name='WarcWriterThread') self.recorded_url_q = recorded_url_q @@ -52,7 +54,7 @@ class WarcWriterThread(threading.Thread): if writer_pool: self.writer_pool = writer_pool else: - self.writer_pool = WarcWriterPool() + self.writer_pool = warcprox.writer.WarcWriterPool() self.dedup_db = dedup_db self.listeners = listeners self.options = options