diff --git a/docs/manual/configuring.rst b/docs/manual/configuring.rst index 30bd7708..e5922948 100644 --- a/docs/manual/configuring.rst +++ b/docs/manual/configuring.rst @@ -555,3 +555,15 @@ To enable the previous behavior, add to config:: enable_flash_video_rewrite: true The system may be revamped in the future and enabled by default, but for now, it is provided "as-is" for compatibility reasons. + +Verify SSL-Certificates +----------------------- + +By default, SSL-Certificates of websites are not verified. To enable verification, add the following to the config:: + + certificates: + cert_reqs: 'CERT_REQUIRED' + ca_cert_dir: '/etc/ssl/certs' + +``ca_cert_dir`` can optionally point to a directory containing the CA certificates that you trust. Most linux distributions provide CA certificates via a package called ``ca-certificates``. +If omitted, the default system CA used by Python is used. diff --git a/pywb/warcserver/http.py b/pywb/warcserver/http.py index 68e8711a..3bb35a9b 100644 --- a/pywb/warcserver/http.py +++ b/pywb/warcserver/http.py @@ -17,8 +17,10 @@ class PywbHttpAdapter(HTTPAdapter): until a better solution is found """ - # todo: allow configuring this later? - cert_reqs = 'CERT_NONE' + def __init__(self, cert_reqs='CERT_NONE', ca_cert_dir=None, **init_kwargs): + self.cert_reqs = cert_reqs + self.ca_cert_dir = ca_cert_dir + return super(PywbHttpAdapter, self).__init__(**init_kwargs) def init_poolmanager( self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool_kwargs @@ -32,11 +34,13 @@ class PywbHttpAdapter(HTTPAdapter): block=block, strict=True, cert_reqs=self.cert_reqs, + ca_cert_dir=self.ca_cert_dir, **pool_kwargs ) def proxy_manager_for(self, proxy, **proxy_kwargs): proxy_kwargs['cert_reqs'] = self.cert_reqs + proxy_kwargs['ca_cert_dir'] = self.ca_cert_dir return super(PywbHttpAdapter, self).proxy_manager_for(proxy, **proxy_kwargs) diff --git a/pywb/warcserver/warcserver.py b/pywb/warcserver/warcserver.py index d417c0fa..622aba2f 100644 --- a/pywb/warcserver/warcserver.py +++ b/pywb/warcserver/warcserver.py @@ -2,6 +2,9 @@ from pywb.utils.loaders import load_yaml_config, load_overlay_config from pywb.warcserver.basewarcserver import BaseWarcServer +from pywb.warcserver.http import PywbHttpAdapter, DefaultAdapters +from urllib3.util.retry import Retry + from pywb.warcserver.index.aggregator import CacheDirectoryIndexSource, RedisMultiKeyIndexSource from pywb.warcserver.index.aggregator import GeventTimeoutAggregator, SimpleAggregator @@ -70,6 +73,15 @@ class WarcServer(BaseWarcServer): self.rules_file = self.config.get('rules_file', '') + if 'certificates' in self.config: + certs_config = self.config['certificates'] + DefaultAdapters.live_adapter = PywbHttpAdapter(max_retries=Retry(3), + cert_reqs=certs_config.get('cert_reqs', 'CERT_NONE'), + ca_cert_dir=certs_config.get('ca_cert_dir')) + DefaultAdapters.remote_adapter = PywbHttpAdapter(max_retries=Retry(3), + cert_reqs=certs_config.get('cert_reqs', 'CERT_NONE'), + ca_cert_dir=certs_config.get('ca_cert_dir')) + self.auto_handler = None if self.config.get('enable_auto_colls', True): diff --git a/tests/config_test_cert_req.yaml b/tests/config_test_cert_req.yaml new file mode 100644 index 00000000..e4505bfb --- /dev/null +++ b/tests/config_test_cert_req.yaml @@ -0,0 +1,7 @@ +debug: true + +collections: + live: $live + +certificates: + cert_reqs: 'CERT_REQUIRED' diff --git a/tests/test_cert_req.py b/tests/test_cert_req.py new file mode 100644 index 00000000..e7eac821 --- /dev/null +++ b/tests/test_cert_req.py @@ -0,0 +1,17 @@ +from .base_config_test import BaseConfigTest + +# ============================================================================ +class TestCertReq(BaseConfigTest): + @classmethod + def setup_class(cls): + super(TestCertReq, cls).setup_class('config_test_cert_req.yaml') + + def test_expired_cert(self): + resp = self.testapp.get('/live/mp_/https://expired.badssl.com/', status='*') + + assert resp.status_int == 400 + + def test_good_cert(self): + resp = self.testapp.get('/live/mp_/https://www.google.com/', status='*') + + assert resp.status_int >= 200 and resp.status_int < 400