mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
new option --onion-tor-socks-proxy, host:port of tor socks proxy, used only to connect to .onion sites
This commit is contained in:
parent
fb58244c4f
commit
00dc9eed84
@ -12,6 +12,7 @@ addons:
|
|||||||
packages:
|
packages:
|
||||||
- python-gdbm
|
- python-gdbm
|
||||||
- python3-gdbm
|
- python3-gdbm
|
||||||
|
- tor
|
||||||
|
|
||||||
services:
|
services:
|
||||||
- docker
|
- docker
|
||||||
|
11
setup.py
11
setup.py
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
from setuptools.command.test import test as TestCommand
|
from setuptools.command.test import test as TestCommand
|
||||||
import sys
|
import sys
|
||||||
import setuptools
|
import setuptools
|
||||||
|
|
||||||
# special class needs to be added to support the pytest written dump-anydbm tests
|
# special class needs to be added to support the pytest written dump-anydbm tests
|
||||||
class PyTest(TestCommand):
|
class PyTest(TestCommand):
|
||||||
@ -17,7 +17,14 @@ class PyTest(TestCommand):
|
|||||||
errno = pytest.main(self.test_args)
|
errno = pytest.main(self.test_args)
|
||||||
sys.exit(errno)
|
sys.exit(errno)
|
||||||
|
|
||||||
deps = ['certauth>=1.1.0', 'warctools', 'kafka-python', 'surt==0.3b2', 'rethinkstuff']
|
deps = [
|
||||||
|
'certauth>=1.1.0',
|
||||||
|
'warctools',
|
||||||
|
'kafka-python',
|
||||||
|
'surt==0.3b2',
|
||||||
|
'rethinkstuff',
|
||||||
|
'PySocks',
|
||||||
|
]
|
||||||
try:
|
try:
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
except:
|
except:
|
||||||
|
@ -20,3 +20,5 @@ RUN mkdir -vp /etc/service/rethinkdb \
|
|||||||
RUN apt-get -y install python-virtualenv git
|
RUN apt-get -y install python-virtualenv git
|
||||||
RUN apt-get -y install python-gdbm python3-gdbm libpython2.7-dev libpython3.4-dev libffi-dev libssl-dev
|
RUN apt-get -y install python-gdbm python3-gdbm libpython2.7-dev libpython3.4-dev libffi-dev libssl-dev
|
||||||
RUN pip install devpi-client
|
RUN pip install devpi-client
|
||||||
|
RUN apt-get -y install tor # starts tor socks proxy on port 9050
|
||||||
|
|
||||||
|
@ -43,6 +43,8 @@ def parse_args():
|
|||||||
arg_parser.add_argument('--certs-dir', dest='certs_dir',
|
arg_parser.add_argument('--certs-dir', dest='certs_dir',
|
||||||
default='./{0}-warcprox-ca'.format(socket.gethostname()),
|
default='./{0}-warcprox-ca'.format(socket.gethostname()),
|
||||||
help='where to store and load generated certificates')
|
help='where to store and load generated certificates')
|
||||||
|
arg_parser.add_argument('--onion-tor-socks-proxy', dest='onion_tor_socks_proxy',
|
||||||
|
default=None, help='host:port of tor socks proxy, used only to connect to .onion sites')
|
||||||
arg_parser.add_argument('--version', action='version',
|
arg_parser.add_argument('--version', action='version',
|
||||||
version="warcprox {}".format(warcprox.__version__))
|
version="warcprox {}".format(warcprox.__version__))
|
||||||
arg_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true')
|
arg_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true')
|
||||||
|
@ -272,7 +272,8 @@ def warcprox_(request, captures_db, dedup_db, stats_db, service_registry):
|
|||||||
|
|
||||||
recorded_url_q = queue.Queue()
|
recorded_url_q = queue.Queue()
|
||||||
|
|
||||||
options = warcprox.Options(port=0, playback_port=0)
|
options = warcprox.Options(port=0, playback_port=0,
|
||||||
|
onion_tor_socks_proxy='localhost:9050')
|
||||||
proxy = warcprox.warcproxy.WarcProxy(ca=ca, recorded_url_q=recorded_url_q,
|
proxy = warcprox.warcproxy.WarcProxy(ca=ca, recorded_url_q=recorded_url_q,
|
||||||
stats_db=stats_db, options=options)
|
stats_db=stats_db, options=options)
|
||||||
options.port = proxy.server_port
|
options.port = proxy.server_port
|
||||||
@ -696,6 +697,18 @@ def test_dedup_buckets(https_daemon, http_daemon, warcprox_, archiving_proxies,
|
|||||||
finally:
|
finally:
|
||||||
fh.close()
|
fh.close()
|
||||||
|
|
||||||
|
# XXX this test relies on a tor proxy running at localhost:9050 with a working
|
||||||
|
# connection to the internet, and relies on a third party site (facebook) being
|
||||||
|
# up and behaving a certain way
|
||||||
|
def test_tor_onion(archiving_proxies):
|
||||||
|
response = requests.get('http://www.facebookcorewwwi.onion/',
|
||||||
|
proxies=archiving_proxies, verify=False, allow_redirects=False)
|
||||||
|
assert response.status_code == 302
|
||||||
|
|
||||||
|
response = requests.get('https://www.facebookcorewwwi.onion/',
|
||||||
|
proxies=archiving_proxies, verify=False, allow_redirects=False)
|
||||||
|
assert response.status_code == 200
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
pytest.main()
|
pytest.main()
|
||||||
|
|
||||||
|
@ -84,6 +84,8 @@ def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
|
|||||||
help=argparse.SUPPRESS)
|
help=argparse.SUPPRESS)
|
||||||
arg_parser.add_argument('--profile', action='store_true', default=False,
|
arg_parser.add_argument('--profile', action='store_true', default=False,
|
||||||
help=argparse.SUPPRESS)
|
help=argparse.SUPPRESS)
|
||||||
|
arg_parser.add_argument('--onion-tor-socks-proxy', dest='onion_tor_socks_proxy',
|
||||||
|
default=None, help='host:port of tor socks proxy, used only to connect to .onion sites')
|
||||||
arg_parser.add_argument('--version', action='version',
|
arg_parser.add_argument('--version', action='version',
|
||||||
version="warcprox {}".format(warcprox.__version__))
|
version="warcprox {}".format(warcprox.__version__))
|
||||||
arg_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true')
|
arg_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true')
|
||||||
|
@ -16,6 +16,7 @@ import ssl
|
|||||||
import warcprox
|
import warcprox
|
||||||
import threading
|
import threading
|
||||||
import datetime
|
import datetime
|
||||||
|
import socks
|
||||||
|
|
||||||
class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
||||||
logger = logging.getLogger("warcprox.mitmproxy.MitmProxyHandler")
|
logger = logging.getLogger("warcprox.mitmproxy.MitmProxyHandler")
|
||||||
@ -51,7 +52,18 @@ class MitmProxyHandler(http_server.BaseHTTPRequestHandler):
|
|||||||
|
|
||||||
def _connect_to_host(self):
|
def _connect_to_host(self):
|
||||||
# Connect to destination
|
# Connect to destination
|
||||||
self._proxy_sock = socket.socket()
|
if self.onion_tor_socks_proxy_host and self.hostname.lower().endswith('.onion'):
|
||||||
|
self.logger.info("using tor socks proxy at %s:%s to connect to %s",
|
||||||
|
self.onion_tor_socks_proxy_host,
|
||||||
|
self.onion_tor_socks_proxy_port or 1080,
|
||||||
|
self.hostname)
|
||||||
|
self._proxy_sock = socks.socksocket()
|
||||||
|
self._proxy_sock.set_proxy(socks.SOCKS5,
|
||||||
|
addr=self.onion_tor_socks_proxy_host,
|
||||||
|
port=self.onion_tor_socks_proxy_port, rdns=True)
|
||||||
|
else:
|
||||||
|
self._proxy_sock = socket.socket()
|
||||||
|
|
||||||
self._proxy_sock.settimeout(60) # XXX what value should this have?
|
self._proxy_sock.settimeout(60) # XXX what value should this have?
|
||||||
self._proxy_sock.connect((self.hostname, int(self.port)))
|
self._proxy_sock.connect((self.hostname, int(self.port)))
|
||||||
|
|
||||||
|
@ -350,6 +350,16 @@ class SingleThreadedWarcProxy(http_server.HTTPServer):
|
|||||||
|
|
||||||
def __init__(self, ca=None, recorded_url_q=None, stats_db=None, options=warcprox.Options()):
|
def __init__(self, ca=None, recorded_url_q=None, stats_db=None, options=warcprox.Options()):
|
||||||
server_address = (options.address or 'localhost', options.port if options.port is not None else 8000)
|
server_address = (options.address or 'localhost', options.port if options.port is not None else 8000)
|
||||||
|
|
||||||
|
if options.onion_tor_socks_proxy:
|
||||||
|
try:
|
||||||
|
host, port = options.onion_tor_socks_proxy.split(':')
|
||||||
|
WarcProxyHandler.onion_tor_socks_proxy_host = host
|
||||||
|
WarcProxyHandler.onion_tor_socks_proxy_port = int(port)
|
||||||
|
except ValueError:
|
||||||
|
WarcProxyHandler.onion_tor_socks_proxy_host = options.onion_tor_socks_proxy
|
||||||
|
WarcProxyHandler.onion_tor_socks_proxy_port = None
|
||||||
|
|
||||||
http_server.HTTPServer.__init__(self, server_address, WarcProxyHandler, bind_and_activate=True)
|
http_server.HTTPServer.__init__(self, server_address, WarcProxyHandler, bind_and_activate=True)
|
||||||
|
|
||||||
self.digest_algorithm = options.digest_algorithm or 'sha1'
|
self.digest_algorithm = options.digest_algorithm or 'sha1'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user