1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

Add support for verifying ssl certificates (#596)

* Add support for verifying ssl certificates

Signed-off-by: Lukas Straub <lukasstraub2@web.de>

* Add documentation for new certificate configuration options

Signed-off-by: Lukas Straub <lukasstraub2@web.de>

* Add test to check the verification of ssl certificates

Signed-off-by: Lukas Straub <lukasstraub2@web.de>
This commit is contained in:
Lukey3332 2021-01-26 21:41:26 +01:00 committed by GitHub
parent b66608c5f3
commit f628b40e02
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 54 additions and 2 deletions

View File

@ -555,3 +555,15 @@ To enable the previous behavior, add to config::
enable_flash_video_rewrite: true
The system may be revamped in the future and enabled by default, but for now, it is provided "as-is" for compatibility reasons.
Verify SSL-Certificates
-----------------------
By default, SSL-Certificates of websites are not verified. To enable verification, add the following to the config::
certificates:
cert_reqs: 'CERT_REQUIRED'
ca_cert_dir: '/etc/ssl/certs'
``ca_cert_dir`` can optionally point to a directory containing the CA certificates that you trust. Most linux distributions provide CA certificates via a package called ``ca-certificates``.
If omitted, the default system CA used by Python is used.

View File

@ -17,8 +17,10 @@ class PywbHttpAdapter(HTTPAdapter):
until a better solution is found
"""
# todo: allow configuring this later?
cert_reqs = 'CERT_NONE'
def __init__(self, cert_reqs='CERT_NONE', ca_cert_dir=None, **init_kwargs):
self.cert_reqs = cert_reqs
self.ca_cert_dir = ca_cert_dir
return super(PywbHttpAdapter, self).__init__(**init_kwargs)
def init_poolmanager(
self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool_kwargs
@ -32,11 +34,13 @@ class PywbHttpAdapter(HTTPAdapter):
block=block,
strict=True,
cert_reqs=self.cert_reqs,
ca_cert_dir=self.ca_cert_dir,
**pool_kwargs
)
def proxy_manager_for(self, proxy, **proxy_kwargs):
proxy_kwargs['cert_reqs'] = self.cert_reqs
proxy_kwargs['ca_cert_dir'] = self.ca_cert_dir
return super(PywbHttpAdapter, self).proxy_manager_for(proxy, **proxy_kwargs)

View File

@ -2,6 +2,9 @@ from pywb.utils.loaders import load_yaml_config, load_overlay_config
from pywb.warcserver.basewarcserver import BaseWarcServer
from pywb.warcserver.http import PywbHttpAdapter, DefaultAdapters
from urllib3.util.retry import Retry
from pywb.warcserver.index.aggregator import CacheDirectoryIndexSource, RedisMultiKeyIndexSource
from pywb.warcserver.index.aggregator import GeventTimeoutAggregator, SimpleAggregator
@ -70,6 +73,15 @@ class WarcServer(BaseWarcServer):
self.rules_file = self.config.get('rules_file', '')
if 'certificates' in self.config:
certs_config = self.config['certificates']
DefaultAdapters.live_adapter = PywbHttpAdapter(max_retries=Retry(3),
cert_reqs=certs_config.get('cert_reqs', 'CERT_NONE'),
ca_cert_dir=certs_config.get('ca_cert_dir'))
DefaultAdapters.remote_adapter = PywbHttpAdapter(max_retries=Retry(3),
cert_reqs=certs_config.get('cert_reqs', 'CERT_NONE'),
ca_cert_dir=certs_config.get('ca_cert_dir'))
self.auto_handler = None
if self.config.get('enable_auto_colls', True):

View File

@ -0,0 +1,7 @@
debug: true
collections:
live: $live
certificates:
cert_reqs: 'CERT_REQUIRED'

17
tests/test_cert_req.py Normal file
View File

@ -0,0 +1,17 @@
from .base_config_test import BaseConfigTest
# ============================================================================
class TestCertReq(BaseConfigTest):
@classmethod
def setup_class(cls):
super(TestCertReq, cls).setup_class('config_test_cert_req.yaml')
def test_expired_cert(self):
resp = self.testapp.get('/live/mp_/https://expired.badssl.com/', status='*')
assert resp.status_int == 400
def test_good_cert(self):
resp = self.testapp.get('/live/mp_/https://www.google.com/', status='*')
assert resp.status_int >= 200 and resp.status_int < 400