new option --onion-tor-socks-proxy, host:port of tor socks proxy, used only to connect to .onion sites

This commit is contained in:
Noah Levitt 2015-11-13 01:17:35 +00:00
parent fb58244c4f
commit 00dc9eed84
8 changed files with 53 additions and 4 deletions

View File

@ -12,6 +12,7 @@ addons:
packages:
- python-gdbm
- python3-gdbm
- tor
services:
- docker

View File

@ -3,7 +3,7 @@
from setuptools.command.test import test as TestCommand
import sys
import setuptools
import setuptools
# special class needs to be added to support the pytest written dump-anydbm tests
class PyTest(TestCommand):
@ -17,7 +17,14 @@ class PyTest(TestCommand):
errno = pytest.main(self.test_args)
sys.exit(errno)
deps = ['certauth>=1.1.0', 'warctools', 'kafka-python', 'surt==0.3b2', 'rethinkstuff']
deps = [
'certauth>=1.1.0',
'warctools',
'kafka-python',
'surt==0.3b2',
'rethinkstuff',
'PySocks',
]
try:
import concurrent.futures
except:

View File

@ -20,3 +20,5 @@ RUN mkdir -vp /etc/service/rethinkdb \
RUN apt-get -y install python-virtualenv git
RUN apt-get -y install python-gdbm python3-gdbm libpython2.7-dev libpython3.4-dev libffi-dev libssl-dev
RUN pip install devpi-client
RUN apt-get -y install tor # starts tor socks proxy on port 9050

View File

@ -43,6 +43,8 @@ def parse_args():
arg_parser.add_argument('--certs-dir', dest='certs_dir',
default='./{0}-warcprox-ca'.format(socket.gethostname()),
help='where to store and load generated certificates')
arg_parser.add_argument('--onion-tor-socks-proxy', dest='onion_tor_socks_proxy',
default=None, help='host:port of tor socks proxy, used only to connect to .onion sites')
arg_parser.add_argument('--version', action='version',
version="warcprox {}".format(warcprox.__version__))
arg_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true')

View File

@ -272,7 +272,8 @@ def warcprox_(request, captures_db, dedup_db, stats_db, service_registry):
recorded_url_q = queue.Queue()
options = warcprox.Options(port=0, playback_port=0)
options = warcprox.Options(port=0, playback_port=0,
onion_tor_socks_proxy='localhost:9050')
proxy = warcprox.warcproxy.WarcProxy(ca=ca, recorded_url_q=recorded_url_q,
stats_db=stats_db, options=options)
options.port = proxy.server_port
@ -696,6 +697,18 @@ def test_dedup_buckets(https_daemon, http_daemon, warcprox_, archiving_proxies,
finally:
fh.close()
# XXX this test relies on a tor proxy running at localhost:9050 with a working
# connection to the internet, and relies on a third party site (facebook) being
# up and behaving a certain way
def test_tor_onion(archiving_proxies):
response = requests.get('http://www.facebookcorewwwi.onion/',
proxies=archiving_proxies, verify=False, allow_redirects=False)
assert response.status_code == 302
response = requests.get('https://www.facebookcorewwwi.onion/',
proxies=archiving_proxies, verify=False, allow_redirects=False)
assert response.status_code == 200
if __name__ == '__main__':
pytest.main()

View File

@ -84,6 +84,8 @@ def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
help=argparse.SUPPRESS)
arg_parser.add_argument('--profile', action='store_true', default=False,
help=argparse.SUPPRESS)
arg_parser.add_argument('--onion-tor-socks-proxy', dest='onion_tor_socks_proxy',
default=None, help='host:port of tor socks proxy, used only to connect to .onion sites')
arg_parser.add_argument('--version', action='version',
version="warcprox {}".format(warcprox.__version__))
arg_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true')

View File

@ -16,6 +16,7 @@ import ssl
import warcprox
import threading
import datetime
import socks
class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
logger = logging.getLogger("warcprox.mitmproxy.MitmProxyHandler")
@ -51,7 +52,18 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
def _connect_to_host(self):
# Connect to destination
self._proxy_sock = socket.socket()
if self.onion_tor_socks_proxy_host and self.hostname.lower().endswith('.onion'):
self.logger.info("using tor socks proxy at %s:%s to connect to %s",
self.onion_tor_socks_proxy_host,
self.onion_tor_socks_proxy_port or 1080,
self.hostname)
self._proxy_sock = socks.socksocket()
self._proxy_sock.set_proxy(socks.SOCKS5,
addr=self.onion_tor_socks_proxy_host,
port=self.onion_tor_socks_proxy_port, rdns=True)
else:
self._proxy_sock = socket.socket()
self._proxy_sock.settimeout(60) # XXX what value should this have?
self._proxy_sock.connect((self.hostname, int(self.port)))

View File

@ -350,6 +350,16 @@ class SingleThreadedWarcProxy(http_server.HTTPServer):
def __init__(self, ca=None, recorded_url_q=None, stats_db=None, options=warcprox.Options()):
server_address = (options.address or 'localhost', options.port if options.port is not None else 8000)
if options.onion_tor_socks_proxy:
try:
host, port = options.onion_tor_socks_proxy.split(':')
WarcProxyHandler.onion_tor_socks_proxy_host = host
WarcProxyHandler.onion_tor_socks_proxy_port = int(port)
except ValueError:
WarcProxyHandler.onion_tor_socks_proxy_host = options.onion_tor_socks_proxy
WarcProxyHandler.onion_tor_socks_proxy_port = None
http_server.HTTPServer.__init__(self, server_address, WarcProxyHandler, bind_and_activate=True)
self.digest_algorithm = options.digest_algorithm or 'sha1'