From 47731c61c13520349b565151ede6f028f3789f5a Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Wed, 8 Jan 2020 14:05:04 -0800 Subject: [PATCH 1/3] tests need trough --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 10d902e..5211f07 100644 --- a/.travis.yml +++ b/.travis.yml @@ -35,7 +35,7 @@ before_install: - ping -c2 trough install: -- pip install . pytest requests warcio mock +- pip install .[trough] pytest requests warcio mock before_script: - docker exec trough bash -c 'while ! test -e /tmp/trough-read.out ; do sleep 0.5 ; done' || true From de9219e64652c17f0989c3ca27c35fb6e0901b22 Mon Sep 17 00:00:00 2001 From: Noah Levitt Date: Thu, 6 Feb 2020 10:10:53 -0800 Subject: [PATCH 2/3] require more recent urllib3 to avoid this error: https://github.com/internetarchive/warcprox/issues/148 2020-01-28 14:42:44,851 2023 ERROR MitmProxyHandler(tid=2037,started=2020-01-28T20:42:44.834551,client=127.0.0.1:49100) warcprox.warcprox.WarcProxyHandler.do_COMMAND(mitmproxy.py:442) problem processing request 'GET / HTTP/1.1': TypeError("connection_from_host() got an unexpected keyword argument 'pool_kwargs'",) Traceback (most recent call last): File "/usr/local/lib/python3.5/dist-packages/warcprox/mitmproxy.py", line 413, in do_COMMAND self._connect_to_remote_server() File "/usr/local/lib/python3.5/dist-packages/warcprox/warcproxy.py", line 189, in _connect_to_remote_server return warcprox.mitmproxy.MitmProxyHandler._connect_to_remote_server(self) File "/usr/local/lib/python3.5/dist-packages/warcprox/mitmproxy.py", line 277, in _connect_to_remote_server pool_kwargs={'maxsize': 12, 'timeout': self._socket_timeout}) TypeError: connection_from_host() got an unexpected keyword argument 'pool_kwargs' --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 96089bb..c8306b8 100755 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ deps = [ 'warctools>=4.10.0', 'urlcanon>=0.3.0', 'doublethink>=0.2.0.dev87', - 'urllib3>=1.14', + 'urllib3>=1.23', 'requests>=2.0.1', 'PySocks>=1.6.8', 'cryptography>=2.3', From 89e6745274d8470624c2e3c147060f31b3621e9a Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Wed, 8 Jul 2020 16:48:05 +0000 Subject: [PATCH 3/3] Handle RuntimeError Some times when warcprox runs for several days under load it freezes and the last error in the log is: ``` WARNING:warcprox.warcproxy.WarcProxy:exception processing request from ('207.241.225.241', 40738) Traceback (most recent call last): File "/usr/lib/python3.7/socketserver.py", line 316, in _handle_request_noblock self.process_request(request, client_address) File "/opt/spn2/lib/python3.7/site-packages/warcprox/mitmproxy.py", line 641, in process_request self.process_request_thread, request, client_address) File "/usr/lib/python3.7/concurrent/futures/thread.py", line 172, in submit self._adjust_thread_count() File "/usr/lib/python3.7/concurrent/futures/thread.py", line 193, in _adjust_thread_count t.start() File "/usr/lib/python3.7/threading.py", line 852, in start _start_new_thread(self._bootstrap, ()) RuntimeError: can't start new thread ``` The process seems to run but it doesn't respond to any connection, not even `status` requests. We handle this exception and allow it to continue operation. --- warcprox/mitmproxy.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/warcprox/mitmproxy.py b/warcprox/mitmproxy.py index 6b32a40..88f6c1e 100644 --- a/warcprox/mitmproxy.py +++ b/warcprox/mitmproxy.py @@ -637,13 +637,17 @@ class PooledMixIn(socketserver.ThreadingMixIn): def process_request(self, request, client_address): self.active_requests[request] = doublethink.utcnow() - future = self.pool.submit( - self.process_request_thread, request, client_address) - future.add_done_callback( - lambda f: self.active_requests.pop(request, None)) - if future.done(): - # avoid theoretical timing issue, in case process_request_thread - # managed to finish before future.add_done_callback() ran + try: + future = self.pool.submit( + self.process_request_thread, request, client_address) + future.add_done_callback( + lambda f: self.active_requests.pop(request, None)) + if future.done(): + # avoid theoretical timing issue, in case process_request_thread + # managed to finish before future.add_done_callback() ran + self.active_requests.pop(request, None) + except RuntimeError as exc: + self.logger.error("Error processing request %s", str(exc)) self.active_requests.pop(request, None) def get_request(self):