mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
new option --onion-tor-socks-proxy, host:port of tor socks proxy, used only to connect to .onion sites
This commit is contained in:
parent
fb58244c4f
commit
00dc9eed84
@ -12,6 +12,7 @@ addons:
|
||||
packages:
|
||||
- python-gdbm
|
||||
- python3-gdbm
|
||||
- tor
|
||||
|
||||
services:
|
||||
- docker
|
||||
|
11
setup.py
11
setup.py
@ -3,7 +3,7 @@
|
||||
|
||||
from setuptools.command.test import test as TestCommand
|
||||
import sys
|
||||
import setuptools
|
||||
import setuptools
|
||||
|
||||
# special class needs to be added to support the pytest written dump-anydbm tests
|
||||
class PyTest(TestCommand):
|
||||
@ -17,7 +17,14 @@ class PyTest(TestCommand):
|
||||
errno = pytest.main(self.test_args)
|
||||
sys.exit(errno)
|
||||
|
||||
deps = ['certauth>=1.1.0', 'warctools', 'kafka-python', 'surt==0.3b2', 'rethinkstuff']
|
||||
deps = [
|
||||
'certauth>=1.1.0',
|
||||
'warctools',
|
||||
'kafka-python',
|
||||
'surt==0.3b2',
|
||||
'rethinkstuff',
|
||||
'PySocks',
|
||||
]
|
||||
try:
|
||||
import concurrent.futures
|
||||
except:
|
||||
|
@ -20,3 +20,5 @@ RUN mkdir -vp /etc/service/rethinkdb \
|
||||
RUN apt-get -y install python-virtualenv git
|
||||
RUN apt-get -y install python-gdbm python3-gdbm libpython2.7-dev libpython3.4-dev libffi-dev libssl-dev
|
||||
RUN pip install devpi-client
|
||||
RUN apt-get -y install tor # starts tor socks proxy on port 9050
|
||||
|
||||
|
@ -43,6 +43,8 @@ def parse_args():
|
||||
arg_parser.add_argument('--certs-dir', dest='certs_dir',
|
||||
default='./{0}-warcprox-ca'.format(socket.gethostname()),
|
||||
help='where to store and load generated certificates')
|
||||
arg_parser.add_argument('--onion-tor-socks-proxy', dest='onion_tor_socks_proxy',
|
||||
default=None, help='host:port of tor socks proxy, used only to connect to .onion sites')
|
||||
arg_parser.add_argument('--version', action='version',
|
||||
version="warcprox {}".format(warcprox.__version__))
|
||||
arg_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true')
|
||||
|
@ -272,7 +272,8 @@ def warcprox_(request, captures_db, dedup_db, stats_db, service_registry):
|
||||
|
||||
recorded_url_q = queue.Queue()
|
||||
|
||||
options = warcprox.Options(port=0, playback_port=0)
|
||||
options = warcprox.Options(port=0, playback_port=0,
|
||||
onion_tor_socks_proxy='localhost:9050')
|
||||
proxy = warcprox.warcproxy.WarcProxy(ca=ca, recorded_url_q=recorded_url_q,
|
||||
stats_db=stats_db, options=options)
|
||||
options.port = proxy.server_port
|
||||
@ -696,6 +697,18 @@ def test_dedup_buckets(https_daemon, http_daemon, warcprox_, archiving_proxies,
|
||||
finally:
|
||||
fh.close()
|
||||
|
||||
# XXX this test relies on a tor proxy running at localhost:9050 with a working
|
||||
# connection to the internet, and relies on a third party site (facebook) being
|
||||
# up and behaving a certain way
|
||||
def test_tor_onion(archiving_proxies):
|
||||
response = requests.get('http://www.facebookcorewwwi.onion/',
|
||||
proxies=archiving_proxies, verify=False, allow_redirects=False)
|
||||
assert response.status_code == 302
|
||||
|
||||
response = requests.get('https://www.facebookcorewwwi.onion/',
|
||||
proxies=archiving_proxies, verify=False, allow_redirects=False)
|
||||
assert response.status_code == 200
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main()
|
||||
|
||||
|
@ -84,6 +84,8 @@ def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
|
||||
help=argparse.SUPPRESS)
|
||||
arg_parser.add_argument('--profile', action='store_true', default=False,
|
||||
help=argparse.SUPPRESS)
|
||||
arg_parser.add_argument('--onion-tor-socks-proxy', dest='onion_tor_socks_proxy',
|
||||
default=None, help='host:port of tor socks proxy, used only to connect to .onion sites')
|
||||
arg_parser.add_argument('--version', action='version',
|
||||
version="warcprox {}".format(warcprox.__version__))
|
||||
arg_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true')
|
||||
|
@ -16,6 +16,7 @@ import ssl
|
||||
import warcprox
|
||||
import threading
|
||||
import datetime
|
||||
import socks
|
||||
|
||||
class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
logger = logging.getLogger("warcprox.mitmproxy.MitmProxyHandler")
|
||||
@ -51,7 +52,18 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||
|
||||
def _connect_to_host(self):
|
||||
# Connect to destination
|
||||
self._proxy_sock = socket.socket()
|
||||
if self.onion_tor_socks_proxy_host and self.hostname.lower().endswith('.onion'):
|
||||
self.logger.info("using tor socks proxy at %s:%s to connect to %s",
|
||||
self.onion_tor_socks_proxy_host,
|
||||
self.onion_tor_socks_proxy_port or 1080,
|
||||
self.hostname)
|
||||
self._proxy_sock = socks.socksocket()
|
||||
self._proxy_sock.set_proxy(socks.SOCKS5,
|
||||
addr=self.onion_tor_socks_proxy_host,
|
||||
port=self.onion_tor_socks_proxy_port, rdns=True)
|
||||
else:
|
||||
self._proxy_sock = socket.socket()
|
||||
|
||||
self._proxy_sock.settimeout(60) # XXX what value should this have?
|
||||
self._proxy_sock.connect((self.hostname, int(self.port)))
|
||||
|
||||
|
@ -350,6 +350,16 @@ class SingleThreadedWarcProxy(http_server.HTTPServer):
|
||||
|
||||
def __init__(self, ca=None, recorded_url_q=None, stats_db=None, options=warcprox.Options()):
|
||||
server_address = (options.address or 'localhost', options.port if options.port is not None else 8000)
|
||||
|
||||
if options.onion_tor_socks_proxy:
|
||||
try:
|
||||
host, port = options.onion_tor_socks_proxy.split(':')
|
||||
WarcProxyHandler.onion_tor_socks_proxy_host = host
|
||||
WarcProxyHandler.onion_tor_socks_proxy_port = int(port)
|
||||
except ValueError:
|
||||
WarcProxyHandler.onion_tor_socks_proxy_host = options.onion_tor_socks_proxy
|
||||
WarcProxyHandler.onion_tor_socks_proxy_port = None
|
||||
|
||||
http_server.HTTPServer.__init__(self, server_address, WarcProxyHandler, bind_and_activate=True)
|
||||
|
||||
self.digest_algorithm = options.digest_algorithm or 'sha1'
|
||||
|
Loading…
x
Reference in New Issue
Block a user