mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
resetting to Jul 1 updates
This commit is contained in:
parent
20789e4edb
commit
8f10fce93a
4
setup.py
4
setup.py
@ -2,7 +2,7 @@
|
|||||||
'''
|
'''
|
||||||
setup.py - setuptools installation configuration for warcprox
|
setup.py - setuptools installation configuration for warcprox
|
||||||
|
|
||||||
Copyright (C) 2013-2022 Internet Archive
|
Copyright (C) 2013-2021 Internet Archive
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or
|
This program is free software; you can redistribute it and/or
|
||||||
modify it under the terms of the GNU General Public License
|
modify it under the terms of the GNU General Public License
|
||||||
@ -44,7 +44,7 @@ except:
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='warcprox',
|
name='warcprox',
|
||||||
version='2.4.31-qa-2',
|
version='2.4.29',
|
||||||
description='WARC writing MITM HTTP/S proxy',
|
description='WARC writing MITM HTTP/S proxy',
|
||||||
url='https://github.com/internetarchive/warcprox',
|
url='https://github.com/internetarchive/warcprox',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
@ -1730,7 +1730,6 @@ def test_load_plugin():
|
|||||||
assert isinstance(
|
assert isinstance(
|
||||||
controller._postfetch_chain[-4].listener,
|
controller._postfetch_chain[-4].listener,
|
||||||
warcprox.stats.RunningStats)
|
warcprox.stats.RunningStats)
|
||||||
# MyEarlyPlugin
|
|
||||||
assert isinstance(
|
assert isinstance(
|
||||||
controller._postfetch_chain[0],
|
controller._postfetch_chain[0],
|
||||||
EarlyPlugin)
|
EarlyPlugin)
|
||||||
|
@ -166,9 +166,8 @@ class WarcproxController(object):
|
|||||||
with processor.inq.mutex:
|
with processor.inq.mutex:
|
||||||
l = list(processor.inq.queue)
|
l = list(processor.inq.queue)
|
||||||
for recorded_url in l:
|
for recorded_url in l:
|
||||||
if recorded_url.timestamp:
|
if earliest is None or recorded_url.timestamp < earliest:
|
||||||
if not earliest or (recorded_url.timestamp < earliest):
|
earliest = recorded_url.timestamp
|
||||||
earliest = recorded_url.timestamp
|
|
||||||
return earliest
|
return earliest
|
||||||
|
|
||||||
def postfetch_status(self):
|
def postfetch_status(self):
|
||||||
|
@ -596,18 +596,15 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
|||||||
'bytes exceeded for URL %s',
|
'bytes exceeded for URL %s',
|
||||||
self._max_resource_size, self.url)
|
self._max_resource_size, self.url)
|
||||||
break
|
break
|
||||||
elif time.time() - start > 3 * 60 * 60:
|
elif (not 'content-length' in self.headers
|
||||||
if not 'content-length' in self.headers:
|
and time.time() - start > 3 * 60 * 60):
|
||||||
prox_rec_res.truncated = b'time'
|
prox_rec_res.truncated = b'time'
|
||||||
self._remote_server_conn.sock.shutdown(socket.SHUT_RDWR)
|
self._remote_server_conn.sock.shutdown(socket.SHUT_RDWR)
|
||||||
self._remote_server_conn.sock.close()
|
self._remote_server_conn.sock.close()
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
'reached hard timeout of 3 hours fetching url '
|
'reached hard timeout of 3 hours fetching url '
|
||||||
'without content-length: %s', self.url)
|
'without content-length: %s', self.url)
|
||||||
break
|
break
|
||||||
else:
|
|
||||||
self.logger.info(
|
|
||||||
'long-running fetch for URL %s', self.url)
|
|
||||||
|
|
||||||
self.log_request(prox_rec_res.status, prox_rec_res.recorder.len)
|
self.log_request(prox_rec_res.status, prox_rec_res.recorder.len)
|
||||||
# Let's close off the remote end. If remote connection is fine,
|
# Let's close off the remote end. If remote connection is fine,
|
||||||
@ -675,17 +672,13 @@ class PooledMixIn(socketserver.ThreadingMixIn):
|
|||||||
|
|
||||||
def process_request(self, request, client_address):
|
def process_request(self, request, client_address):
|
||||||
self.active_requests[request] = doublethink.utcnow()
|
self.active_requests[request] = doublethink.utcnow()
|
||||||
try:
|
future = self.pool.submit(
|
||||||
future = self.pool.submit(
|
self.process_request_thread, request, client_address)
|
||||||
self.process_request_thread, request, client_address)
|
future.add_done_callback(
|
||||||
future.add_done_callback(
|
lambda f: self.active_requests.pop(request, None))
|
||||||
lambda f: self.active_requests.pop(request, None))
|
if future.done():
|
||||||
if future.done():
|
# avoid theoretical timing issue, in case process_request_thread
|
||||||
# avoid theoretical timing issue, in case process_request_thread
|
# managed to finish before future.add_done_callback() ran
|
||||||
# managed to finish before future.add_done_callback() ran
|
|
||||||
self.active_requests.pop(request, None)
|
|
||||||
except RuntimeError as exc:
|
|
||||||
self.logger.error("Error processing request %s", str(exc))
|
|
||||||
self.active_requests.pop(request, None)
|
self.active_requests.pop(request, None)
|
||||||
|
|
||||||
def get_request(self):
|
def get_request(self):
|
||||||
|
@ -46,7 +46,6 @@ import tempfile
|
|||||||
import hashlib
|
import hashlib
|
||||||
import doublethink
|
import doublethink
|
||||||
import re
|
import re
|
||||||
import zlib
|
|
||||||
|
|
||||||
class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
|
class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
|
||||||
'''
|
'''
|
||||||
@ -176,9 +175,6 @@ class WarcProxyHandler(warcprox.mitmproxy.MitmProxyHandler):
|
|||||||
warcprox_meta = json.loads(self.headers['Warcprox-Meta'])
|
warcprox_meta = json.loads(self.headers['Warcprox-Meta'])
|
||||||
self._security_check(warcprox_meta)
|
self._security_check(warcprox_meta)
|
||||||
self._enforce_limits(warcprox_meta)
|
self._enforce_limits(warcprox_meta)
|
||||||
if 'compressed_blocks' in warcprox_meta:
|
|
||||||
warcprox_meta['blocks'] = json.loads(zlib.decompress(warcprox_meta['compressed_blocks']).decode())
|
|
||||||
del warcprox_meta['compressed_blocks']
|
|
||||||
self._enforce_blocks(warcprox_meta)
|
self._enforce_blocks(warcprox_meta)
|
||||||
|
|
||||||
def _connect_to_remote_server(self):
|
def _connect_to_remote_server(self):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user