mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
resetting to Jul 1 updates
This commit is contained in:
parent
20789e4edb
commit
8f10fce93a
4
setup.py
4
setup.py
@ -2,7 +2,7 @@
|
||||
'''
|
||||
setup.py - setuptools installation configuration for warcprox
|
||||
|
||||
Copyright (C) 2013-2022 Internet Archive
|
||||
Copyright (C) 2013-2021 Internet Archive
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
@ -44,7 +44,7 @@ except:
|
||||
|
||||
setuptools.setup(
|
||||
name='warcprox',
|
||||
version='2.4.31-qa-2',
|
||||
version='2.4.29',
|
||||
description='WARC writing MITM HTTP/S proxy',
|
||||
url='https://github.com/internetarchive/warcprox',
|
||||
author='Noah Levitt',
|
||||
|
@ -1730,7 +1730,6 @@ def test_load_plugin():
|
||||
assert isinstance(
|
||||
controller._postfetch_chain[-4].listener,
|
||||
warcprox.stats.RunningStats)
|
||||
# MyEarlyPlugin
|
||||
assert isinstance(
|
||||
controller._postfetch_chain[0],
|
||||
EarlyPlugin)
|
||||
|
@ -166,9 +166,8 @@ class WarcproxController(object):
|
||||
with processor.inq.mutex:
|
||||
l = list(processor.inq.queue)
|
||||
for recorded_url in l:
|
||||
if recorded_url.timestamp:
|
||||
if not earliest or (recorded_url.timestamp < earliest):
|
||||
earliest = recorded_url.timestamp
|
||||
if earliest is None or recorded_url.timestamp < earliest:
|
||||
earliest = recorded_url.timestamp
|
||||
return earliest
|
||||
|
||||
def postfetch_status(self):
|
||||
|
@ -596,18 +596,15 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
'bytes exceeded for URL %s',
|
||||
self._max_resource_size, self.url)
|
||||
break
|
||||
elif time.time() - start > 3 * 60 * 60:
|
||||
if not 'content-length' in self.headers:
|
||||
prox_rec_res.truncated = b'time'
|
||||
self._remote_server_conn.sock.shutdown(socket.SHUT_RDWR)
|
||||
self._remote_server_conn.sock.close()
|
||||
self.logger.info(
|
||||
'reached hard timeout of 3 hours fetching url '
|
||||
'without content-length: %s', self.url)
|
||||
break
|
||||
else:
|
||||
self.logger.info(
|
||||
'long-running fetch for URL %s', self.url)
|
||||
elif (not 'content-length' in self.headers
|
||||
and time.time() - start > 3 * 60 * 60):
|
||||
prox_rec_res.truncated = b'time'
|
||||
self._remote_server_conn.sock.shutdown(socket.SHUT_RDWR)
|
||||
self._remote_server_conn.sock.close()
|
||||
self.logger.info(
|
||||
'reached hard timeout of 3 hours fetching url '
|
||||
'without content-length: %s', self.url)
|
||||
break
|
||||
|
||||
self.log_request(prox_rec_res.status, prox_rec_res.recorder.len)
|
||||
# Let's close off the remote end. If remote connection is fine,
|
||||
@ -675,17 +672,13 @@ class PooledMixIn(socketserver.ThreadingMixIn):
|
||||
|
||||
def process_request(self, request, client_address):
|
||||
self.active_requests[request] = doublethink.utcnow()
|
||||
try:
|
||||
future = self.pool.submit(
|
||||
self.process_request_thread, request, client_address)
|
||||
future.add_done_callback(
|
||||
lambda f: self.active_requests.pop(request, None))
|
||||
if future.done():
|
||||
# avoid theoretical timing issue, in case process_request_thread
|
||||
# managed to finish before future.add_done_callback() ran
|
||||
self.active_requests.pop(request, None)
|
||||
except RuntimeError as exc:
|
||||
self.logger.error("Error processing request %s", str(exc))
|
||||
future = self.pool.submit(
|
||||
self.process_request_thread, request, client_address)
|
||||
future.add_done_callback(
|
||||
lambda f: self.active_requests.pop(request, None))
|
||||
if future.done():
|
||||
# avoid theoretical timing issue, in case process_request_thread
|
||||
# managed to finish before future.add_done_callback() ran
|
||||
self.active_requests.pop(request, None)
|
||||
|
||||
def get_request(self):
|
||||
|
@ -46,7 +46,6 @@ import tempfile
|
||||
import hashlib
|
||||
import doublethink
|
||||
import re
|
||||
import zlib
|
||||
|
||||
class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
|
||||
'''
|
||||
@ -176,9 +175,6 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
|
||||
warcprox_meta = json.loads(self.headers['Warcprox-Meta'])
|
||||
self._security_check(warcprox_meta)
|
||||
self._enforce_limits(warcprox_meta)
|
||||
if 'compressed_blocks' in warcprox_meta:
|
||||
warcprox_meta['blocks'] = json.loads(zlib.decompress(warcprox_meta['compressed_blocks']).decode())
|
||||
del warcprox_meta['compressed_blocks']
|
||||
self._enforce_blocks(warcprox_meta)
|
||||
|
||||
def _connect_to_remote_server(self):
|
||||
|
Loading…
x
Reference in New Issue
Block a user