mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Merge branch 'https-proxy' into develop
Merge readme and changelist from 0.5.3
This commit is contained in:
commit
c251840141
@ -1,3 +1,11 @@
|
||||
pywb 0.6.0 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* HTTPS Proxy Support!
|
||||
|
||||
* Revamped HTTP/S system: proxy collection and capture time switching via cookie!
|
||||
|
||||
|
||||
pywb 0.5.3 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
* better framed replay for non-html content -- include live rewrite timestamp via temp 'pywb.timestamp' cookie, updating banner of iframe load. All timestamp formatting moved to client-side for better customization.
|
||||
@ -6,6 +14,7 @@ pywb 0.5.3 changelist
|
||||
|
||||
* banner-only rewrite mode (via 'bn_' modifier) to support only banner insertion with no rewriting, server-side or client-side.
|
||||
|
||||
|
||||
pywb 0.5.1 changelist
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
minor fixes:
|
||||
|
@ -1,11 +1,11 @@
|
||||
PyWb 0.5.3
|
||||
PyWb 0.6.0
|
||||
==========
|
||||
|
||||
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=develop
|
||||
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=https-proxy
|
||||
:target: https://travis-ci.org/ikreymer/pywb
|
||||
|
||||
.. image:: https://coveralls.io/repos/ikreymer/pywb/badge.png?branch=develop
|
||||
:target: https://coveralls.io/r/ikreymer/pywb?branch=develop
|
||||
.. image:: https://coveralls.io/repos/ikreymer/pywb/badge.png?branch=https-proxy
|
||||
:target: https://coveralls.io/r/ikreymer/pywb?branch=https-proxy
|
||||
|
||||
pywb is a python implementation of web archival replay tools, sometimes also known as 'Wayback Machine'.
|
||||
|
||||
@ -21,6 +21,7 @@ This README contains a basic overview of using pywb. After reading this intro, c
|
||||
|
||||
* `pywb-samples <https://github.com/ikreymer/pywb-samples>`_ provides additional archive samples with difficult-to-replay content.
|
||||
|
||||
* `pywb-proxy-demo <https://github.com/ikreymer/pywb-proxy-demo>`_ showcases the revamped HTTP/S proxy replay system (available from pywb 0.6.0)
|
||||
|
||||
The following deployed applications use pywb:
|
||||
|
||||
|
@ -109,3 +109,6 @@ enable_memento: true
|
||||
|
||||
# Replay content in an iframe
|
||||
framed_replay: true
|
||||
|
||||
debug_echo_env: True
|
||||
|
||||
|
228
pywb/framework/certauth.py
Normal file
228
pywb/framework/certauth.py
Normal file
@ -0,0 +1,228 @@
|
||||
import logging
|
||||
import os
|
||||
from OpenSSL import crypto
|
||||
from OpenSSL.SSL import FILETYPE_PEM
|
||||
import random
|
||||
from argparse import ArgumentParser
|
||||
|
||||
|
||||
#=================================================================
|
||||
# Duration of 100 years
|
||||
CERT_DURATION = 100 * 365 * 24 * 60 * 60
|
||||
|
||||
CERTS_DIR = './pywb-certs/'
|
||||
|
||||
CERT_NAME = 'pywb https proxy replay CA'
|
||||
|
||||
CERT_CA_FILE = './pywb-ca.pem'
|
||||
|
||||
|
||||
#=================================================================
|
||||
class CertificateAuthority(object):
|
||||
"""
|
||||
Utility class for signing individual certificate
|
||||
with a root cert.
|
||||
|
||||
Static generate_ca_root() method for creating the root cert
|
||||
|
||||
All certs saved on filesystem. Individual certs are stored
|
||||
in specified certs_dir and reused if previously created.
|
||||
"""
|
||||
|
||||
def __init__(self, ca_file, certs_dir):
|
||||
if not ca_file:
|
||||
ca_file = CERT_CA_FILE
|
||||
|
||||
if not certs_dir:
|
||||
certs_dir = CERTS_DIR
|
||||
|
||||
self.ca_file = ca_file
|
||||
self.certs_dir = certs_dir
|
||||
|
||||
# read previously created root cert
|
||||
self.cert, self.key = self.read_pem(ca_file)
|
||||
|
||||
if not os.path.exists(certs_dir):
|
||||
os.mkdir(certs_dir)
|
||||
|
||||
def get_cert_for_host(self, host, overwrite=False, wildcard=False):
|
||||
host_filename = os.path.join(self.certs_dir, host) + '.pem'
|
||||
|
||||
if not overwrite and os.path.exists(host_filename):
|
||||
return False, host_filename
|
||||
|
||||
self.generate_host_cert(host, self.cert, self.key, host_filename,
|
||||
wildcard)
|
||||
|
||||
return True, host_filename
|
||||
|
||||
def get_root_PKCS12(self):
|
||||
p12 = crypto.PKCS12()
|
||||
p12.set_certificate(self.cert)
|
||||
p12.set_privatekey(self.key)
|
||||
return p12.export()
|
||||
|
||||
@staticmethod
|
||||
def _make_cert(certname):
|
||||
cert = crypto.X509()
|
||||
cert.set_version(2)
|
||||
cert.set_serial_number(random.randint(0, 2 ** 64 - 1))
|
||||
cert.get_subject().CN = certname
|
||||
|
||||
cert.gmtime_adj_notBefore(0)
|
||||
cert.gmtime_adj_notAfter(CERT_DURATION)
|
||||
return cert
|
||||
|
||||
@staticmethod
|
||||
def generate_ca_root(ca_file, certname=None, overwrite=False):
|
||||
if not certname:
|
||||
certname = CERT_NAME
|
||||
|
||||
if not ca_file:
|
||||
ca_file = CERT_CA_FILE
|
||||
|
||||
if not overwrite and os.path.exists(ca_file):
|
||||
cert, key = CertificateAuthority.read_pem(ca_file)
|
||||
return False, cert, key
|
||||
|
||||
# Generate key
|
||||
key = crypto.PKey()
|
||||
key.generate_key(crypto.TYPE_RSA, 2048)
|
||||
|
||||
# Generate cert
|
||||
cert = CertificateAuthority._make_cert(certname)
|
||||
|
||||
cert.set_issuer(cert.get_subject())
|
||||
cert.set_pubkey(key)
|
||||
cert.add_extensions([
|
||||
crypto.X509Extension(b"basicConstraints",
|
||||
True,
|
||||
b"CA:TRUE, pathlen:0"),
|
||||
|
||||
crypto.X509Extension(b"keyUsage",
|
||||
True,
|
||||
b"keyCertSign, cRLSign"),
|
||||
|
||||
crypto.X509Extension(b"subjectKeyIdentifier",
|
||||
False,
|
||||
b"hash",
|
||||
subject=cert),
|
||||
])
|
||||
cert.sign(key, "sha1")
|
||||
|
||||
# Write cert + key
|
||||
CertificateAuthority.write_pem(ca_file, cert, key)
|
||||
return True, cert, key
|
||||
|
||||
@staticmethod
|
||||
def generate_host_cert(host, root_cert, root_key, host_filename,
|
||||
wildcard=False):
|
||||
# Generate key
|
||||
key = crypto.PKey()
|
||||
key.generate_key(crypto.TYPE_RSA, 2048)
|
||||
|
||||
# Generate CSR
|
||||
req = crypto.X509Req()
|
||||
req.get_subject().CN = host
|
||||
req.set_pubkey(key)
|
||||
req.sign(key, 'sha1')
|
||||
|
||||
# Generate Cert
|
||||
cert = CertificateAuthority._make_cert(host)
|
||||
|
||||
cert.set_issuer(root_cert.get_subject())
|
||||
cert.set_pubkey(req.get_pubkey())
|
||||
|
||||
if wildcard:
|
||||
DNS = 'DNS:'
|
||||
alt_hosts = [DNS + host,
|
||||
DNS + '*.' + host]
|
||||
|
||||
alt_hosts = ', '.join(alt_hosts)
|
||||
|
||||
cert.add_extensions([
|
||||
crypto.X509Extension('subjectAltName',
|
||||
False,
|
||||
alt_hosts)])
|
||||
|
||||
cert.sign(root_key, 'sha1')
|
||||
|
||||
# Write cert + key
|
||||
CertificateAuthority.write_pem(host_filename, cert, key)
|
||||
return cert, key
|
||||
|
||||
@staticmethod
|
||||
def write_pem(filename, cert, key):
|
||||
with open(filename, 'wb+') as f:
|
||||
f.write(crypto.dump_privatekey(FILETYPE_PEM, key))
|
||||
|
||||
f.write(crypto.dump_certificate(FILETYPE_PEM, cert))
|
||||
|
||||
@staticmethod
|
||||
def read_pem(filename):
|
||||
with open(filename, 'r') as f:
|
||||
cert = crypto.load_certificate(FILETYPE_PEM, f.read())
|
||||
f.seek(0)
|
||||
key = crypto.load_privatekey(FILETYPE_PEM, f.read())
|
||||
|
||||
return cert, key
|
||||
|
||||
|
||||
#=================================================================
|
||||
def main():
|
||||
parser = ArgumentParser(description='Cert Auth Cert Maker')
|
||||
|
||||
parser.add_argument('output_pem_file', help='path to cert .pem file')
|
||||
|
||||
parser.add_argument('-r', '--use-root',
|
||||
help=('use specified root cert (.pem file) ' +
|
||||
'to create signed cert'))
|
||||
|
||||
parser.add_argument('-n', '--name', action='store', default=CERT_NAME,
|
||||
help='name for root certificate')
|
||||
|
||||
parser.add_argument('-d', '--certs-dir', default=CERTS_DIR)
|
||||
|
||||
parser.add_argument('-f', '--force', action='store_true')
|
||||
|
||||
parser.add_argument('-w', '--wildcard_cert', action='store_true',
|
||||
help='add wildcard SAN to host: *.<host>, <host>')
|
||||
|
||||
result = parser.parse_args()
|
||||
|
||||
overwrite = result.force
|
||||
|
||||
# Create a new signed certificate using specified root
|
||||
if result.use_root:
|
||||
certs_dir = result.certs_dir
|
||||
wildcard = result.wildcard
|
||||
ca = CertificateAuthority(ca_file=result.use_root,
|
||||
certs_dir=result.certs_dir,
|
||||
certname=result.name)
|
||||
|
||||
created, host_filename = ca.get_cert_for_host(result.output_pem_file,
|
||||
overwrite, wildcard)
|
||||
|
||||
if created:
|
||||
print ('Created new cert "' + host_filename +
|
||||
'" signed by root cert ' +
|
||||
result.use_root)
|
||||
else:
|
||||
print ('Cert "' + host_filename + '" already exists,' +
|
||||
' use -f to overwrite')
|
||||
|
||||
# Create new root certificate
|
||||
else:
|
||||
created, c, k = (CertificateAuthority.
|
||||
generate_ca_root(result.output_pem_file,
|
||||
result.name,
|
||||
overwrite))
|
||||
|
||||
if created:
|
||||
print 'Created new root cert: "' + result.output_pem_file + '"'
|
||||
else:
|
||||
print ('Root cert "' + result.output_pem_file + '" already exists,' +
|
||||
' use -f to overwrite')
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -4,8 +4,17 @@ from archivalrouter import ArchivalRouter
|
||||
import urlparse
|
||||
import base64
|
||||
|
||||
import socket
|
||||
import ssl
|
||||
|
||||
from pywb.rewrite.url_rewriter import HttpsUrlRewriter
|
||||
from pywb.utils.statusandheaders import StatusAndHeaders
|
||||
from pywb.utils.wbexception import BadRequestException
|
||||
|
||||
from pywb.utils.bufferedreaders import BufferedReader
|
||||
|
||||
from certauth import CertificateAuthority
|
||||
|
||||
from proxy_resolvers import ProxyAuthResolver, CookieResolver
|
||||
|
||||
|
||||
#=================================================================
|
||||
@ -44,8 +53,17 @@ class ProxyRouter(object):
|
||||
for more details.
|
||||
"""
|
||||
|
||||
PAC_PATH = '/proxy.pac'
|
||||
BLOCK_SIZE = 4096
|
||||
DEF_MAGIC_NAME = 'pywb.proxy'
|
||||
|
||||
CERT_DL_PEM = '/pywb-ca.pem'
|
||||
CERT_DL_P12 = '/pywb-ca.p12'
|
||||
|
||||
EXTRA_HEADERS = {'cache-control': 'no-cache',
|
||||
'p3p': 'CP="NOI ADM DEV COM NAV OUR STP"'}
|
||||
|
||||
def __init__(self, routes, **kwargs):
|
||||
self.routes = routes
|
||||
self.hostpaths = kwargs.get('hostpaths')
|
||||
|
||||
self.error_view = kwargs.get('error_view')
|
||||
@ -54,61 +72,124 @@ class ProxyRouter(object):
|
||||
if proxy_options:
|
||||
proxy_options = proxy_options.get('proxy_options', {})
|
||||
|
||||
self.auth_msg = proxy_options.get('auth_msg',
|
||||
'Please enter name of a collection to use for proxy mode')
|
||||
self.magic_name = proxy_options.get('magic_name')
|
||||
if not self.magic_name:
|
||||
self.magic_name = self.DEF_MAGIC_NAME
|
||||
proxy_options['magic_name'] = self.magic_name
|
||||
|
||||
self.use_default_coll = proxy_options.get('use_default_coll', True)
|
||||
self.extra_headers = proxy_options.get('extra_headers')
|
||||
if not self.extra_headers:
|
||||
self.extra_headers = self.EXTRA_HEADERS
|
||||
proxy_options['extra_headers'] = self.extra_headers
|
||||
|
||||
if proxy_options.get('cookie_resolver'):
|
||||
self.resolver = CookieResolver(routes, proxy_options)
|
||||
else:
|
||||
self.resolver = ProxyAuthResolver(routes, proxy_options)
|
||||
|
||||
self.unaltered = proxy_options.get('unaltered_replay', False)
|
||||
|
||||
self.proxy_pac_path = proxy_options.get('pac_path', self.PAC_PATH)
|
||||
|
||||
|
||||
if not proxy_options.get('enable_https_proxy'):
|
||||
self.ca = None
|
||||
self.proxy_cert_dl_view = None
|
||||
return
|
||||
|
||||
# HTTPS Only Options
|
||||
ca_file = proxy_options.get('root_ca_file')
|
||||
|
||||
# attempt to create the root_ca_file if doesn't exist
|
||||
# (generally recommended to create this seperately)
|
||||
certname = proxy_options.get('root_ca_name')
|
||||
CertificateAuthority.generate_ca_root(certname, ca_file)
|
||||
|
||||
certs_dir = proxy_options.get('certs_dir')
|
||||
self.ca = CertificateAuthority(ca_file=ca_file,
|
||||
certs_dir=certs_dir)
|
||||
|
||||
self.proxy_cert_dl_view = proxy_options.get('proxy_cert_download_view')
|
||||
|
||||
def __call__(self, env):
|
||||
url = env['REL_REQUEST_URI']
|
||||
is_https = (env['REQUEST_METHOD'] == 'CONNECT')
|
||||
|
||||
if url.endswith('/proxy.pac'):
|
||||
return self.make_pac_response(env)
|
||||
# for non-https requests, check pac path and non-proxy urls
|
||||
if not is_https:
|
||||
url = env['REL_REQUEST_URI']
|
||||
|
||||
if not url.startswith('http://'):
|
||||
return None
|
||||
if url == self.proxy_pac_path:
|
||||
return self.make_pac_response(env)
|
||||
|
||||
proxy_auth = env.get('HTTP_PROXY_AUTHORIZATION')
|
||||
if not url.startswith(('http://', 'https://')):
|
||||
return None
|
||||
|
||||
env['pywb.proxy_scheme'] = 'http'
|
||||
|
||||
route = None
|
||||
coll = None
|
||||
matcher = None
|
||||
response = None
|
||||
ts = None
|
||||
|
||||
if proxy_auth:
|
||||
proxy_coll = self.read_basic_auth_coll(proxy_auth)
|
||||
# check resolver, for pre connect resolve
|
||||
if self.resolver.pre_connect:
|
||||
route, coll, matcher, ts, response = self.resolver.resolve(env)
|
||||
if response:
|
||||
return response
|
||||
|
||||
if not proxy_coll:
|
||||
return self.proxy_auth_coll_response()
|
||||
# do connect, then get updated url
|
||||
if is_https:
|
||||
response = self.handle_connect(env)
|
||||
if response:
|
||||
return response
|
||||
|
||||
proxy_coll = '/' + proxy_coll + '/'
|
||||
|
||||
for r in self.routes:
|
||||
matcher, c = r.is_handling(proxy_coll)
|
||||
if matcher:
|
||||
route = r
|
||||
coll = c
|
||||
break
|
||||
|
||||
if not route:
|
||||
return self.proxy_auth_coll_response()
|
||||
|
||||
# if 'use_default_coll' or only one collection, use that
|
||||
# for proxy mode
|
||||
elif self.use_default_coll or len(self.routes) == 1:
|
||||
route = self.routes[0]
|
||||
coll = self.routes[0].regex.pattern
|
||||
|
||||
# otherwise, require proxy auth 407 to select collection
|
||||
url = env['REL_REQUEST_URI']
|
||||
else:
|
||||
return self.proxy_auth_coll_response()
|
||||
parts = urlparse.urlsplit(env['REL_REQUEST_URI'])
|
||||
hostport = parts.netloc.split(':', 1)
|
||||
env['pywb.proxy_host'] = hostport[0]
|
||||
env['pywb.proxy_port'] = hostport[1] if len(hostport) == 2 else ''
|
||||
env['pywb.proxy_req_uri'] = parts.path
|
||||
if parts.query:
|
||||
env['pywb.proxy_req_uri'] += '?' + parts.query
|
||||
|
||||
env['pywb_proxy_magic'] = self.magic_name
|
||||
|
||||
# route (static) and other resources to archival replay
|
||||
if env['pywb.proxy_host'] == self.magic_name:
|
||||
env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri']
|
||||
|
||||
# special case for proxy install
|
||||
response = self.handle_cert_install(env)
|
||||
if response:
|
||||
return response
|
||||
|
||||
return None
|
||||
|
||||
# check resolver, post connect
|
||||
if not self.resolver.pre_connect:
|
||||
route, coll, matcher, ts, response = self.resolver.resolve(env)
|
||||
if response:
|
||||
return response
|
||||
|
||||
host_prefix = env['pywb.proxy_scheme'] + '://' + self.magic_name
|
||||
rel_prefix = ''
|
||||
|
||||
# special case for proxy calendar
|
||||
if (env['pywb.proxy_host'] == 'query.' + self.magic_name):
|
||||
url = env['pywb.proxy_req_uri'][1:]
|
||||
rel_prefix = '/'
|
||||
|
||||
if ts is not None:
|
||||
url = ts + '/' + url
|
||||
|
||||
wbrequest = route.request_class(env,
|
||||
request_uri=url,
|
||||
wb_url_str=url,
|
||||
coll=coll,
|
||||
host_prefix=self.hostpaths[0],
|
||||
host_prefix=host_prefix,
|
||||
rel_prefix=rel_prefix,
|
||||
wburl_class=route.handler.get_wburl_type(),
|
||||
urlrewriter_class=HttpsUrlRewriter,
|
||||
use_abs_prefix=False,
|
||||
@ -119,13 +200,170 @@ class ProxyRouter(object):
|
||||
|
||||
if self.unaltered:
|
||||
wbrequest.wb_url.mod = 'id_'
|
||||
elif is_https:
|
||||
wbrequest.wb_url.mod = 'bn_'
|
||||
|
||||
return route.handler(wbrequest)
|
||||
response = route.handler(wbrequest)
|
||||
|
||||
if wbrequest.wb_url and wbrequest.wb_url.is_replay():
|
||||
response.status_headers.replace_headers(self.extra_headers)
|
||||
|
||||
return response
|
||||
|
||||
def get_request_socket(self, env):
|
||||
if not self.ca:
|
||||
return None
|
||||
|
||||
sock = None
|
||||
|
||||
if env.get('uwsgi.version'):
|
||||
try:
|
||||
import uwsgi
|
||||
fd = uwsgi.connection_fd()
|
||||
conn = socket.fromfd(fd, socket.AF_INET, socket.SOCK_STREAM)
|
||||
sock = socket.socket(_sock=conn)
|
||||
except Exception:
|
||||
pass
|
||||
elif env.get('gunicorn.socket'):
|
||||
sock = env['gunicorn.socket']
|
||||
|
||||
if not sock:
|
||||
# attempt to find socket from wsgi.input
|
||||
input_ = env.get('wsgi.input')
|
||||
if input_ and hasattr(input_, '_sock'):
|
||||
sock = socket.socket(_sock=input_._sock)
|
||||
|
||||
return sock
|
||||
|
||||
def handle_connect(self, env):
|
||||
sock = self.get_request_socket(env)
|
||||
if not sock:
|
||||
return WbResponse.text_response('HTTPS Proxy Not Supported',
|
||||
'405 HTTPS Proxy Not Supported')
|
||||
|
||||
sock.send('HTTP/1.0 200 Connection Established\r\n')
|
||||
sock.send('Server: pywb proxy\r\n')
|
||||
sock.send('\r\n')
|
||||
|
||||
hostname, port = env['REL_REQUEST_URI'].split(':')
|
||||
cert_host = hostname
|
||||
|
||||
host_parts = hostname.split('.', 1)
|
||||
if len(host_parts) == 2 and '.' in host_parts[1]:
|
||||
cert_host = host_parts[1]
|
||||
|
||||
created, certfile = self.ca.get_cert_for_host(cert_host,
|
||||
wildcard=True)
|
||||
|
||||
try:
|
||||
ssl_sock = ssl.wrap_socket(sock,
|
||||
server_side=True,
|
||||
certfile=certfile,
|
||||
ciphers="ALL",
|
||||
suppress_ragged_eofs=False,
|
||||
#ssl_version=ssl.PROTOCOL_TLSv1)
|
||||
ssl_version=ssl.PROTOCOL_SSLv23)
|
||||
env['pywb.proxy_ssl_sock'] = ssl_sock
|
||||
|
||||
buffreader = BufferedReader(ssl_sock, block_size=self.BLOCK_SIZE)
|
||||
|
||||
statusline = buffreader.readline().rstrip()
|
||||
|
||||
except Exception as se:
|
||||
raise BadRequestException(se.message)
|
||||
|
||||
statusparts = statusline.split(' ')
|
||||
|
||||
if len(statusparts) < 3:
|
||||
raise BadRequestException('Invalid Proxy Request: ' + statusline)
|
||||
|
||||
env['REQUEST_METHOD'] = statusparts[0]
|
||||
env['REL_REQUEST_URI'] = ('https://' +
|
||||
env['REL_REQUEST_URI'].replace(':443', '') +
|
||||
statusparts[1])
|
||||
|
||||
env['SERVER_PROTOCOL'] = statusparts[2].strip()
|
||||
|
||||
env['pywb.proxy_scheme'] = 'https'
|
||||
|
||||
env['pywb.proxy_host'] = hostname
|
||||
env['pywb.proxy_port'] = port
|
||||
env['pywb.proxy_req_uri'] = statusparts[1]
|
||||
|
||||
queryparts = env['REL_REQUEST_URI'].split('?', 1)
|
||||
env['PATH_INFO'] = queryparts[0]
|
||||
env['QUERY_STRING'] = queryparts[1] if len(queryparts) > 1 else ''
|
||||
|
||||
while True:
|
||||
line = buffreader.readline()
|
||||
if line:
|
||||
line = line.rstrip()
|
||||
|
||||
if not line:
|
||||
break
|
||||
|
||||
parts = line.split(':', 1)
|
||||
if len(parts) < 2:
|
||||
continue
|
||||
|
||||
name = parts[0].strip()
|
||||
value = parts[1].strip()
|
||||
|
||||
name = name.replace('-', '_').upper()
|
||||
|
||||
if not name in ('CONTENT_LENGTH', 'CONTENT_TYPE'):
|
||||
name = 'HTTP_' + name
|
||||
|
||||
env[name] = value
|
||||
|
||||
remain = buffreader.rem_length()
|
||||
if remain > 0:
|
||||
remainder = buffreader.read(self.BLOCK_SIZE)
|
||||
env['wsgi.input'] = BufferedReader(ssl_sock,
|
||||
block_size=self.BLOCK_SIZE,
|
||||
starting_data=remainder)
|
||||
|
||||
def handle_cert_install(self, env):
|
||||
if env['pywb.proxy_req_uri'] in ('/', '/index.html', '/index.html'):
|
||||
available = (self.ca is not None)
|
||||
|
||||
if self.proxy_cert_dl_view:
|
||||
return (self.proxy_cert_dl_view.
|
||||
render_response(available=available,
|
||||
pem_path=self.CERT_DL_PEM,
|
||||
p12_path=self.CERT_DL_P12))
|
||||
else:
|
||||
return None
|
||||
|
||||
elif env['pywb.proxy_req_uri'] == self.CERT_DL_PEM:
|
||||
if not self.ca:
|
||||
return None
|
||||
|
||||
buff = ''
|
||||
with open(self.ca.ca_file) as fh:
|
||||
buff = fh.read()
|
||||
|
||||
content_type = 'application/x-x509-ca-cert'
|
||||
|
||||
return WbResponse.text_response(buff,
|
||||
content_type=content_type)
|
||||
|
||||
elif env['pywb.proxy_req_uri'] == self.CERT_DL_P12:
|
||||
if not self.ca:
|
||||
return None
|
||||
|
||||
buff = self.ca.get_root_PKCS12()
|
||||
|
||||
content_type = 'application/x-pkcs12'
|
||||
|
||||
return WbResponse.text_response(buff,
|
||||
content_type=content_type)
|
||||
else:
|
||||
return None
|
||||
|
||||
# Proxy Auto-Config (PAC) script for the proxy
|
||||
def make_pac_response(self, env):
|
||||
import os
|
||||
hostname = os.environ.get('PYWB_HOST_NAME')
|
||||
hostname = env.get('HTTP_HOST')
|
||||
if not hostname:
|
||||
server_hostport = env['SERVER_NAME'] + ':' + env['SERVER_PORT']
|
||||
hostonly = env['SERVER_NAME']
|
||||
@ -143,33 +381,8 @@ class ProxyRouter(object):
|
||||
|
||||
buff += direct.format(hostonly)
|
||||
|
||||
#buff += '\n return "PROXY {0}";\n}}\n'.format(self.hostpaths[0])
|
||||
buff += '\n return "PROXY {0}";\n}}\n'.format(server_hostport)
|
||||
|
||||
content_type = 'application/x-ns-proxy-autoconfig'
|
||||
|
||||
return WbResponse.text_response(buff, content_type=content_type)
|
||||
|
||||
def proxy_auth_coll_response(self):
|
||||
proxy_msg = 'Basic realm="{0}"'.format(self.auth_msg)
|
||||
|
||||
headers = [('Content-Type', 'text/plain'),
|
||||
('Proxy-Authenticate', proxy_msg)]
|
||||
|
||||
status_headers = StatusAndHeaders('407 Proxy Authentication', headers)
|
||||
|
||||
value = self.auth_msg
|
||||
|
||||
return WbResponse(status_headers, value=[value])
|
||||
|
||||
@staticmethod
|
||||
def read_basic_auth_coll(value):
|
||||
parts = value.split(' ')
|
||||
if parts[0].lower() != 'basic':
|
||||
return ''
|
||||
|
||||
if len(parts) != 2:
|
||||
return ''
|
||||
|
||||
user_pass = base64.b64decode(parts[1])
|
||||
return user_pass.split(':')[0]
|
||||
|
340
pywb/framework/proxy_resolvers.py
Normal file
340
pywb/framework/proxy_resolvers.py
Normal file
@ -0,0 +1,340 @@
|
||||
from wbrequestresponse import WbResponse, WbRequest
|
||||
from pywb.utils.statusandheaders import StatusAndHeaders
|
||||
from pywb.rewrite.wburl import WbUrl
|
||||
|
||||
import urlparse
|
||||
import base64
|
||||
import os
|
||||
|
||||
try:
|
||||
import uwsgi
|
||||
uwsgi_cache = True
|
||||
except ImportError:
|
||||
uwsgi_cache = False
|
||||
|
||||
|
||||
#=================================================================
|
||||
class UwsgiCache(object):
|
||||
def __setitem__(self, item, value):
|
||||
uwsgi.cache_update(item, value)
|
||||
|
||||
def __getitem__(self, item):
|
||||
return uwsgi.cache_get(item)
|
||||
|
||||
def __contains__(self, item):
|
||||
return uwsgi.cache_exists(item)
|
||||
|
||||
def __delitem__(self, item):
|
||||
uwsgi.cache_del(item)
|
||||
|
||||
|
||||
#=================================================================
|
||||
class BaseCollResolver(object):
|
||||
def __init__(self, routes, config):
|
||||
self.routes = routes
|
||||
self.pre_connect = config.get('pre_connect', False)
|
||||
self.use_default_coll = config.get('use_default_coll', True)
|
||||
|
||||
def resolve(self, env):
|
||||
route = None
|
||||
coll = None
|
||||
matcher = None
|
||||
ts = None
|
||||
|
||||
proxy_coll, ts = self.get_proxy_coll_ts(env)
|
||||
|
||||
# invalid parsing
|
||||
if proxy_coll == '':
|
||||
return None, None, None, None, self.select_coll_response(env)
|
||||
|
||||
if proxy_coll is None and isinstance(self.use_default_coll, str):
|
||||
proxy_coll = self.use_default_coll
|
||||
|
||||
if proxy_coll:
|
||||
proxy_coll = '/' + proxy_coll + '/'
|
||||
|
||||
for r in self.routes:
|
||||
matcher, c = r.is_handling(proxy_coll)
|
||||
if matcher:
|
||||
route = r
|
||||
coll = c
|
||||
break
|
||||
|
||||
# if no match, return coll selection response
|
||||
if not route:
|
||||
return None, None, None, None, self.select_coll_response(env)
|
||||
|
||||
# if 'use_default_coll'
|
||||
elif self.use_default_coll == True or len(self.routes) == 1:
|
||||
route = self.routes[0]
|
||||
coll = self.routes[0].path
|
||||
|
||||
# otherwise, return the appropriate coll selection response
|
||||
else:
|
||||
return None, None, None, None, self.select_coll_response(env)
|
||||
|
||||
return route, coll, matcher, ts, None
|
||||
|
||||
|
||||
#=================================================================
|
||||
class ProxyAuthResolver(BaseCollResolver):
|
||||
DEFAULT_MSG = 'Please enter name of a collection to use with proxy mode'
|
||||
|
||||
def __init__(self, routes, config):
|
||||
config['pre_connect'] = True
|
||||
super(ProxyAuthResolver, self).__init__(routes, config)
|
||||
self.auth_msg = config.get('auth_msg', self.DEFAULT_MSG)
|
||||
|
||||
def get_proxy_coll_ts(self, env):
|
||||
proxy_auth = env.get('HTTP_PROXY_AUTHORIZATION')
|
||||
|
||||
if not proxy_auth:
|
||||
return None, None
|
||||
|
||||
proxy_coll = self.read_basic_auth_coll(proxy_auth)
|
||||
return proxy_coll, None
|
||||
|
||||
def select_coll_response(self, env):
|
||||
proxy_msg = 'Basic realm="{0}"'.format(self.auth_msg)
|
||||
|
||||
headers = [('Content-Type', 'text/plain'),
|
||||
('Proxy-Authenticate', proxy_msg)]
|
||||
|
||||
status_headers = StatusAndHeaders('407 Proxy Authentication', headers)
|
||||
|
||||
value = self.auth_msg
|
||||
|
||||
return WbResponse(status_headers, value=[value])
|
||||
|
||||
@staticmethod
|
||||
def read_basic_auth_coll(value):
|
||||
parts = value.split(' ')
|
||||
if parts[0].lower() != 'basic':
|
||||
return ''
|
||||
|
||||
if len(parts) != 2:
|
||||
return ''
|
||||
|
||||
user_pass = base64.b64decode(parts[1])
|
||||
return user_pass.split(':')[0]
|
||||
|
||||
|
||||
#=================================================================
|
||||
# Experimental CookieResolver
|
||||
class CookieResolver(BaseCollResolver): # pragma: no cover
|
||||
def __init__(self, routes, config):
|
||||
config['pre_connect'] = False
|
||||
super(CookieResolver, self).__init__(routes, config)
|
||||
self.magic_name = config['magic_name']
|
||||
self.sethost_prefix = '-sethost.' + self.magic_name + '.'
|
||||
self.set_prefix = '-set.' + self.magic_name
|
||||
|
||||
self.cookie_name = config.get('cookie_name', '__pywb_coll')
|
||||
self.proxy_select_view = config.get('proxy_select_view')
|
||||
|
||||
self.extra_headers = config.get('extra_headers')
|
||||
|
||||
if uwsgi_cache:
|
||||
self.cache = UwsgiCache()
|
||||
else:
|
||||
self.cache = {}
|
||||
|
||||
def get_proxy_coll_ts(self, env):
|
||||
coll, ts, sesh_id = self.get_coll(env)
|
||||
return coll, ts
|
||||
|
||||
def select_coll_response(self, env):
|
||||
return self.make_magic_response('auto',
|
||||
env['REL_REQUEST_URI'],
|
||||
env)
|
||||
|
||||
def resolve(self, env):
|
||||
server_name = env['pywb.proxy_host']
|
||||
|
||||
if ('.' + self.magic_name) in server_name:
|
||||
response = self.handle_magic_page(env)
|
||||
if response:
|
||||
return None, None, None, None, response
|
||||
|
||||
return super(CookieResolver, self).resolve(env)
|
||||
|
||||
def handle_magic_page(self, env):
|
||||
request_url = env['REL_REQUEST_URI']
|
||||
parts = urlparse.urlsplit(request_url)
|
||||
server_name = env['pywb.proxy_host']
|
||||
|
||||
path_url = parts.path[1:]
|
||||
if parts.query:
|
||||
path_url += '?' + parts.query
|
||||
|
||||
if server_name.startswith('auto'):
|
||||
coll, ts, sesh_id = self.get_coll(env)
|
||||
|
||||
if coll:
|
||||
return self.make_sethost_cookie_response(sesh_id, path_url, env)
|
||||
else:
|
||||
return self.make_magic_response('select', path_url, env)
|
||||
|
||||
elif server_name.startswith('query.'):
|
||||
wb_url = WbUrl(path_url)
|
||||
|
||||
# only dealing with specific timestamp setting
|
||||
if wb_url.is_query():
|
||||
return None
|
||||
|
||||
coll, ts, sesh_id = self.get_coll(env)
|
||||
if not coll:
|
||||
return self.make_magic_response('select', path_url, env)
|
||||
|
||||
self.set_ts(sesh_id, wb_url.timestamp)
|
||||
return self.make_redir_response(wb_url.url)
|
||||
|
||||
elif server_name.endswith(self.set_prefix):
|
||||
old_sesh_id = self.extract_client_cookie(env, self.cookie_name)
|
||||
sesh_id = self.create_renew_sesh_id(old_sesh_id)
|
||||
|
||||
if sesh_id != old_sesh_id:
|
||||
headers = self.make_cookie_headers(sesh_id, self.magic_name)
|
||||
else:
|
||||
headers = None
|
||||
|
||||
coll = server_name[:-len(self.set_prefix)]
|
||||
|
||||
# set sesh value
|
||||
self.set_coll(sesh_id, coll)
|
||||
|
||||
return self.make_sethost_cookie_response(sesh_id, path_url, env,
|
||||
headers=headers)
|
||||
|
||||
elif self.sethost_prefix in server_name:
|
||||
inx = server_name.find(self.sethost_prefix)
|
||||
sesh_id = server_name[:inx]
|
||||
|
||||
domain = server_name[inx + len(self.sethost_prefix):]
|
||||
|
||||
headers = self.make_cookie_headers(sesh_id, domain)
|
||||
|
||||
full_url = env['pywb.proxy_scheme'] + '://' + domain
|
||||
full_url += '/' + path_url
|
||||
return self.make_redir_response(full_url, headers=headers)
|
||||
|
||||
elif 'select.' in server_name:
|
||||
if not self.proxy_select_view:
|
||||
return WbResponse.text_response('select text for ' + path_url)
|
||||
|
||||
coll, ts, sesh_id = self.get_coll(env)
|
||||
|
||||
#scheme = env['pywb.proxy_scheme'] + '://'
|
||||
route_temp = '-set.' + self.magic_name + '/' + path_url
|
||||
|
||||
try:
|
||||
return (self.proxy_select_view.
|
||||
render_response(routes=self.routes,
|
||||
route_temp=route_temp,
|
||||
coll=coll,
|
||||
url=path_url))
|
||||
except Exception as exc:
|
||||
raise
|
||||
|
||||
#else:
|
||||
# msg = 'Invalid Magic Path: ' + url
|
||||
# print msg
|
||||
# return WbResponse.text_response(msg, status='404 Not Found')
|
||||
|
||||
def make_cookie_headers(self, sesh_id, domain):
|
||||
cookie_val = '{0}={1}; Path=/; Domain=.{2}; HttpOnly'
|
||||
cookie_val = cookie_val.format(self.cookie_name, sesh_id, domain)
|
||||
headers = [('Set-Cookie', cookie_val)]
|
||||
return headers
|
||||
|
||||
def make_sethost_cookie_response(self, sesh_id, path_url,
|
||||
env, headers=None):
|
||||
if '://' not in path_url:
|
||||
path_url = 'http://' + path_url
|
||||
|
||||
path_parts = urlparse.urlsplit(path_url)
|
||||
|
||||
new_url = path_parts.path[1:]
|
||||
if path_parts.query:
|
||||
new_url += '?' + path_parts.query
|
||||
|
||||
return self.make_magic_response(sesh_id + '-sethost', new_url, env,
|
||||
suffix=path_parts.netloc,
|
||||
headers=headers)
|
||||
|
||||
|
||||
def make_magic_response(self, prefix, url, env,
|
||||
suffix=None, headers=None):
|
||||
full_url = env['pywb.proxy_scheme'] + '://' + prefix + '.'
|
||||
full_url += self.magic_name
|
||||
if suffix:
|
||||
full_url += '.' + suffix
|
||||
full_url += '/' + url
|
||||
return self.make_redir_response(full_url, headers=headers)
|
||||
|
||||
def set_coll(self, sesh_id, coll):
|
||||
self.cache[sesh_id + ':c'] = coll
|
||||
|
||||
def set_ts(self, sesh_id, ts):
|
||||
if ts:
|
||||
self.cache[sesh_id + ':t'] = ts
|
||||
# this ensures that omitting timestamp will reset to latest
|
||||
# capture by deleting the cache entry
|
||||
else:
|
||||
del self.cache[sesh_id + ':t']
|
||||
|
||||
def get_coll(self, env):
|
||||
sesh_id = self.extract_client_cookie(env, self.cookie_name)
|
||||
|
||||
coll = None
|
||||
ts = None
|
||||
if sesh_id:
|
||||
coll = self.cache[sesh_id + ':c']
|
||||
try:
|
||||
ts = self.cache[sesh_id + ':t']
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
return coll, ts, sesh_id
|
||||
|
||||
def create_renew_sesh_id(self, sesh_id, force=False):
|
||||
#if sesh_id in self.cache and not force:
|
||||
if sesh_id and ((sesh_id + ':c') in self.cache) and not force:
|
||||
return sesh_id
|
||||
|
||||
sesh_id = base64.b32encode(os.urandom(5)).lower()
|
||||
return sesh_id
|
||||
|
||||
def make_redir_response(self, url, headers=None):
|
||||
if not headers:
|
||||
headers = []
|
||||
|
||||
if self.extra_headers:
|
||||
for name, value in self.extra_headers.iteritems():
|
||||
headers.append((name, value))
|
||||
|
||||
return WbResponse.redir_response(url, headers=headers)
|
||||
|
||||
@staticmethod
|
||||
def extract_client_cookie(env, cookie_name):
|
||||
cookie_header = env.get('HTTP_COOKIE')
|
||||
if not cookie_header:
|
||||
return None
|
||||
|
||||
# attempt to extract cookie_name only
|
||||
inx = cookie_header.find(cookie_name)
|
||||
if inx < 0:
|
||||
return None
|
||||
|
||||
end_inx = cookie_header.find(';', inx)
|
||||
if end_inx > 0:
|
||||
value = cookie_header[inx:end_inx]
|
||||
else:
|
||||
value = cookie_header[inx:]
|
||||
|
||||
value = value.split('=')
|
||||
if len(value) < 2:
|
||||
return None
|
||||
|
||||
value = value[1].strip()
|
||||
return value
|
@ -50,6 +50,42 @@ class WSGIApp(object):
|
||||
|
||||
# Top-level wsgi application
|
||||
def __call__(self, env, start_response):
|
||||
if env['REQUEST_METHOD'] == 'CONNECT':
|
||||
return self.handle_connect(env, start_response)
|
||||
else:
|
||||
return self.handle_methods(env, start_response)
|
||||
|
||||
def handle_connect(self, env, start_response):
|
||||
def ssl_start_response(statusline, headers):
|
||||
ssl_sock = env.get('pywb.proxy_ssl_sock')
|
||||
if not ssl_sock:
|
||||
start_response(statusline, headers)
|
||||
return
|
||||
|
||||
env['pywb.proxy_statusline'] = statusline
|
||||
|
||||
ssl_sock.write('HTTP/1.1 ' + statusline + '\r\n')
|
||||
for name, value in headers:
|
||||
ssl_sock.write(name + ': ' + value + '\r\n')
|
||||
|
||||
resp_iter = self.handle_methods(env, ssl_start_response)
|
||||
|
||||
ssl_sock = env.get('pywb.proxy_ssl_sock')
|
||||
if not ssl_sock:
|
||||
return resp_iter
|
||||
|
||||
ssl_sock.write('\r\n')
|
||||
|
||||
for obj in resp_iter:
|
||||
if obj:
|
||||
ssl_sock.write(obj)
|
||||
ssl_sock.close()
|
||||
|
||||
start_response(env['pywb.proxy_statusline'], [])
|
||||
|
||||
return []
|
||||
|
||||
def handle_methods(self, env, start_response):
|
||||
if env.get('SCRIPT_NAME') or not env.get('REQUEST_URI'):
|
||||
env['REL_REQUEST_URI'] = rel_request_uri(env)
|
||||
else:
|
||||
@ -89,22 +125,29 @@ class WSGIApp(object):
|
||||
else:
|
||||
err_url = None
|
||||
|
||||
try:
|
||||
err_msg = exc.message.encode('utf-8')
|
||||
except Exception:
|
||||
err_msg = exc.message
|
||||
err_url = ''
|
||||
|
||||
if print_trace:
|
||||
import traceback
|
||||
err_details = traceback.format_exc(exc)
|
||||
print err_details
|
||||
else:
|
||||
logging.info(str(exc))
|
||||
logging.info(err_msg)
|
||||
err_details = None
|
||||
|
||||
if error_view:
|
||||
return error_view.render_response(exc_type=type(exc).__name__,
|
||||
err_msg=str(exc),
|
||||
err_msg=err_msg,
|
||||
err_details=err_details,
|
||||
status=status,
|
||||
env=env,
|
||||
err_url=err_url)
|
||||
else:
|
||||
return WbResponse.text_response(status + ' Error: ' + str(exc),
|
||||
return WbResponse.text_response(status + ' Error: ' + err_msg,
|
||||
status=status)
|
||||
|
||||
#=================================================================
|
||||
@ -145,6 +188,10 @@ def init_app(init_func, load_yaml=True, config_file=None, config={}):
|
||||
def start_wsgi_server(the_app, name, default_port=None): # pragma: no cover
|
||||
from wsgiref.simple_server import make_server
|
||||
|
||||
# disable is_hop_by_hop restrictions
|
||||
import wsgiref.handlers
|
||||
wsgiref.handlers.is_hop_by_hop = lambda x: False
|
||||
|
||||
port = the_app.port
|
||||
|
||||
if not port:
|
||||
|
@ -37,7 +37,8 @@ class HeaderRewriter:
|
||||
|
||||
ENCODING_HEADERS = ['content-encoding']
|
||||
|
||||
REMOVE_HEADERS = ['transfer-encoding']
|
||||
REMOVE_HEADERS = ['transfer-encoding', 'content-security-policy',
|
||||
'strict-transport-security']
|
||||
|
||||
PROXY_NO_REWRITE_HEADERS = ['content-length']
|
||||
|
||||
@ -90,7 +91,10 @@ class HeaderRewriter:
|
||||
new_headers = []
|
||||
removed_header_dict = {}
|
||||
|
||||
cookie_rewriter = urlrewriter.get_cookie_rewriter()
|
||||
if urlrewriter:
|
||||
cookie_rewriter = urlrewriter.get_cookie_rewriter()
|
||||
else:
|
||||
cookie_rewriter = None
|
||||
|
||||
for (name, value) in headers:
|
||||
|
||||
@ -99,7 +103,7 @@ class HeaderRewriter:
|
||||
if lowername in self.PROXY_HEADERS:
|
||||
new_headers.append((name, value))
|
||||
|
||||
elif lowername in self.URL_REWRITE_HEADERS:
|
||||
elif urlrewriter and lowername in self.URL_REWRITE_HEADERS:
|
||||
new_headers.append((name, urlrewriter.rewrite(value)))
|
||||
|
||||
elif lowername in self.ENCODING_HEADERS:
|
||||
@ -109,7 +113,8 @@ class HeaderRewriter:
|
||||
new_headers.append((name, value))
|
||||
|
||||
elif lowername in self.REMOVE_HEADERS:
|
||||
removed_header_dict[lowername] = value
|
||||
removed_header_dict[lowername] = value
|
||||
new_headers.append((self.header_prefix + name, value))
|
||||
|
||||
elif (lowername in self.PROXY_NO_REWRITE_HEADERS and
|
||||
not content_rewritten):
|
||||
@ -120,7 +125,9 @@ class HeaderRewriter:
|
||||
cookie_list = cookie_rewriter.rewrite(value)
|
||||
new_headers.extend(cookie_list)
|
||||
|
||||
else:
|
||||
elif urlrewriter:
|
||||
new_headers.append((self.header_prefix + name, value))
|
||||
else:
|
||||
new_headers.append((name, value))
|
||||
|
||||
return (new_headers, removed_header_dict)
|
||||
|
@ -69,6 +69,10 @@ class RewriteContent:
|
||||
status_headers, stream = self.sanitize_content(headers, stream)
|
||||
return (status_headers, self.stream_to_gen(stream), False)
|
||||
|
||||
|
||||
if wb_url.is_banner_only:
|
||||
urlrewriter = None
|
||||
|
||||
(rewritten_headers, stream) = self.rewrite_headers(urlrewriter,
|
||||
headers,
|
||||
stream)
|
||||
|
@ -40,17 +40,19 @@ HTTP Headers Rewriting
|
||||
'removed_header_dict': {'content-encoding': 'gzip',
|
||||
'transfer-encoding': 'chunked'},
|
||||
'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Content-Length', '199999'),
|
||||
('Content-Type', 'text/javascript')]),
|
||||
('Content-Type', 'text/javascript'),
|
||||
('X-Archive-Orig-Transfer-Encoding', 'chunked')]),
|
||||
'text_type': 'js'}
|
||||
|
||||
# Binary -- transfer-encoding removed
|
||||
# Binary -- transfer-encoding rewritten
|
||||
>>> _test_headers([('Content-Length', '200000'), ('Content-Type', 'image/png'), ('Set-Cookie', 'foo=bar; Path=/;'), ('Content-Encoding', 'gzip'), ('Transfer-Encoding', 'chunked')])
|
||||
{'charset': None,
|
||||
'removed_header_dict': {'transfer-encoding': 'chunked'},
|
||||
'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('Content-Length', '200000'),
|
||||
('Content-Type', 'image/png'),
|
||||
('Set-Cookie', 'foo=bar; Path=/web/20131010/http://example.com/'),
|
||||
('Content-Encoding', 'gzip')]),
|
||||
('Content-Encoding', 'gzip'),
|
||||
('X-Archive-Orig-Transfer-Encoding', 'chunked')]),
|
||||
'text_type': None}
|
||||
|
||||
"""
|
||||
|
@ -142,7 +142,7 @@ class HttpsUrlRewriter(UrlRewriter):
|
||||
else:
|
||||
return url
|
||||
|
||||
def get_timestamp_url(self, timestamp, url):
|
||||
def get_timestamp_url(self, timestamp, url=''):
|
||||
return url
|
||||
|
||||
def get_abs_url(self, url=''):
|
||||
|
@ -72,6 +72,14 @@ function init_banner() {
|
||||
}
|
||||
|
||||
text += "<b id='_wb_capture_info'>" + capture_str + "</b>";
|
||||
|
||||
if (wbinfo.proxy_magic && wbinfo.url) {
|
||||
var select_url = wbinfo.proxy_magic + "/" + wbinfo.url;
|
||||
var query_url = wbinfo.proxy_magic + "/*/" + wbinfo.url;
|
||||
text += ' <a href="//query.' + query_url + '">All Capture Times</a>';
|
||||
text += '<br/>'
|
||||
text += 'From collection <b>"' + wbinfo.coll + '"</b> <a href="//select.' + select_url + '">All Collections</a>';
|
||||
}
|
||||
|
||||
banner.innerHTML = text;
|
||||
|
||||
|
@ -9,3 +9,10 @@
|
||||
</pre>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
{% if env.pywb_proxy_magic and err_url and status == '404 Not Found' %}
|
||||
<p>
|
||||
<a href="//select.{{ env.pywb_proxy_magic }}/{{ err_url }}">Try Different Collection</a>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
{% if rule.js_rewrite_location and include_wombat %}
|
||||
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wombat.js'> </script>
|
||||
<script>
|
||||
{% set urlsplit = cdx['original'] | urlsplit %}
|
||||
{% set urlsplit = cdx.original | urlsplit %}
|
||||
WB_wombat_init("{{ wbrequest.wb_prefix}}",
|
||||
"{{ cdx['timestamp'] if include_ts else ''}}",
|
||||
"{{ urlsplit.scheme }}",
|
||||
@ -12,13 +12,15 @@
|
||||
{% endif %}
|
||||
<script>
|
||||
wbinfo = {}
|
||||
wbinfo.url = "{{ cdx.original }}";
|
||||
wbinfo.timestamp = "{{ cdx.timestamp }}";
|
||||
wbinfo.prefix = "{{ wbrequest.wb_prefix }}";
|
||||
wbinfo.is_embed = {{"true" if wbrequest.wb_url.is_embed else "false"}};
|
||||
wbinfo.is_frame_mp = {{"true" if wbrequest.wb_url.mod == 'mp_' else "false"}};
|
||||
wbinfo.canon_url = "{{ canon_url }}";
|
||||
wbinfo.is_live = {{ "true" if cdx.is_live else "false" }};
|
||||
wbinfo.is_proxy_mode = {{ "true" if wbrequest.options.is_proxy else "false" }};
|
||||
wbinfo.coll = "{{ wbrequest.coll }}";
|
||||
wbinfo.proxy_magic = "{{ wbrequest.env.pywb_proxy_magic }}";
|
||||
</script>
|
||||
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.js'> </script>
|
||||
<link rel='stylesheet' href='{{ wbrequest.host_prefix }}/{{ static_path }}/wb.css'/>
|
||||
|
14
pywb/ui/proxy_cert_download.html
Normal file
14
pywb/ui/proxy_cert_download.html
Normal file
@ -0,0 +1,14 @@
|
||||
<h2>HTTPS Certificate For PyWb Web Archive Replay</h2>
|
||||
{% if not available %}
|
||||
<p>Sorry, HTTPS support is not configured for this proxy. However, the proxy should work in HTTP mode.</p>
|
||||
{% else %}
|
||||
<p>Download for all platforms (except Windows):</p>
|
||||
<p><b><a href="{{ pem_path }}">Download Certificate (All except Windows)</a></b></p>
|
||||
|
||||
<p>(If you see the <i>Already Installed</i> message, then no further action is necessary and you may start browsing!</p>
|
||||
{% endif %}
|
||||
|
||||
<p>Download for Windows platforms:</p>
|
||||
<p><b><a href="{{ p12_path }}">Download Certificate (Window Only)</a></b></p>
|
||||
|
||||
|
25
pywb/ui/proxy_select.html
Normal file
25
pywb/ui/proxy_select.html
Normal file
@ -0,0 +1,25 @@
|
||||
<html>
|
||||
<body>
|
||||
<h2>Pywb Proxy Collection Selector</h1>
|
||||
{% if coll %}
|
||||
<p>
|
||||
Current collection is: <b>{{ coll }}</b>
|
||||
</p>
|
||||
{% else %}
|
||||
<p>You have attempted to load the url <b>{{ url }}</b>, but there are multiple collections available.</p>
|
||||
{% endif %}
|
||||
|
||||
<p>Please select which collection you would like to use (You will be redirected back to <b>{{ url }}</b>):
|
||||
</p>
|
||||
|
||||
<ul>
|
||||
{% for route in routes %}
|
||||
{% if route.path and route | is_wb_handler %}
|
||||
<li><a href="//{{ route.path }}{{ route_temp }}">{{ route.path }}</a></li>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</ul>
|
||||
|
||||
<p>(Once selected, you will not be prompted again, however you can return to this page to switch collections.)</p>
|
||||
</body>
|
||||
</html>
|
@ -3,6 +3,7 @@ Representation and parsing of HTTP-style status + headers
|
||||
"""
|
||||
|
||||
import pprint
|
||||
from copy import copy
|
||||
|
||||
|
||||
#=================================================================
|
||||
@ -44,9 +45,26 @@ class StatusAndHeaders(object):
|
||||
self.headers.append((name, value))
|
||||
return None
|
||||
|
||||
def replace_headers(self, header_dict):
|
||||
"""
|
||||
replace all headers in header_dict that already exist
|
||||
add any remaining headers
|
||||
"""
|
||||
header_dict = copy(header_dict)
|
||||
|
||||
for index in xrange(len(self.headers) - 1, -1, -1):
|
||||
curr_name, curr_value = self.headers[index]
|
||||
name_lower = curr_name.lower()
|
||||
if name_lower in header_dict:
|
||||
self.headers[index] = (curr_name, header_dict[name_lower])
|
||||
del header_dict[name_lower]
|
||||
|
||||
for name, value in header_dict.iteritems():
|
||||
self.headers.append((name, value))
|
||||
|
||||
def remove_header(self, name):
|
||||
"""
|
||||
remove header (case-insensitive)
|
||||
Remove header (case-insensitive)
|
||||
return True if header removed, False otherwise
|
||||
"""
|
||||
name_lower = name.lower()
|
||||
|
@ -34,6 +34,9 @@ DEFAULTS = {
|
||||
'home_html': 'ui/index.html',
|
||||
'error_html': 'ui/error.html',
|
||||
|
||||
'proxy_select_html': 'ui/proxy_select.html',
|
||||
'proxy_cert_download_html': 'ui/proxy_cert_download.html',
|
||||
|
||||
'template_globals': {'static_path': 'static/default'},
|
||||
|
||||
'static_routes': {'static/default': 'pywb/static/'},
|
||||
@ -80,7 +83,7 @@ def create_live_handler(config):
|
||||
|
||||
#=================================================================
|
||||
def init_route_config(value, config):
|
||||
if isinstance(value, str):
|
||||
if isinstance(value, str) or isinstance(value, list):
|
||||
value = dict(index_paths=value)
|
||||
|
||||
route_config = DictChain(value, config)
|
||||
@ -226,10 +229,27 @@ def create_wb_router(passed_config={}):
|
||||
if hasattr(route.handler, 'resolve_refs'):
|
||||
route.handler.resolve_refs(handler_dict)
|
||||
|
||||
|
||||
# Check for new proxy mode!
|
||||
if config.get('enable_http_proxy', False):
|
||||
router = ProxyArchivalRouter
|
||||
|
||||
view = J2TemplateView.create_template(
|
||||
config.get('proxy_select_html'),
|
||||
'Proxy Coll Selector')
|
||||
|
||||
if not 'proxy_options' in passed_config:
|
||||
passed_config['proxy_options'] = {}
|
||||
|
||||
if view:
|
||||
passed_config['proxy_options']['proxy_select_view'] = view
|
||||
|
||||
view = J2TemplateView.create_template(
|
||||
config.get('proxy_cert_download_html'),
|
||||
'Proxy Cert Download')
|
||||
|
||||
if view:
|
||||
passed_config['proxy_options']['proxy_cert_download_view'] = view
|
||||
|
||||
else:
|
||||
router = ArchivalRouter
|
||||
|
||||
@ -250,6 +270,5 @@ def create_wb_router(passed_config={}):
|
||||
|
||||
error_view=J2TemplateView.create_template(config.get('error_html'),
|
||||
'Error Page'),
|
||||
|
||||
config=config
|
||||
)
|
||||
|
4
setup.py
4
setup.py
@ -34,7 +34,7 @@ class PyTest(TestCommand):
|
||||
|
||||
setup(
|
||||
name='pywb',
|
||||
version='0.5.3',
|
||||
version='0.6.0',
|
||||
url='https://github.com/ikreymer/pywb',
|
||||
author='Ilya Kreymer',
|
||||
author_email='ikreymer@gmail.com',
|
||||
@ -70,6 +70,7 @@ setup(
|
||||
'jinja2',
|
||||
'surt',
|
||||
'pyyaml',
|
||||
'pyopenssl',
|
||||
],
|
||||
tests_require=[
|
||||
'pytest',
|
||||
@ -86,6 +87,7 @@ setup(
|
||||
cdx-server = pywb.apps.cdx_server:main
|
||||
cdx-indexer = pywb.warc.cdxindexer:main
|
||||
live-rewrite-server = pywb.apps.live_rewrite_server:main
|
||||
proxy-cert-auth = pywb.framework.certauth:main
|
||||
""",
|
||||
zip_safe=False,
|
||||
classifiers=[
|
||||
|
@ -389,7 +389,7 @@ class TestWb:
|
||||
assert resp.status_int == 407
|
||||
|
||||
def test_proxy_pac(self):
|
||||
resp = self.testapp.get('/proxy.pac', extra_environ = dict(SERVER_NAME='pywb-proxy', SERVER_PORT='8080'))
|
||||
resp = self.testapp.get('/proxy.pac', headers = [('Host', 'pywb-proxy:8080')])
|
||||
assert resp.content_type == 'application/x-ns-proxy-autoconfig'
|
||||
assert '"PROXY pywb-proxy:8080"' in resp.body
|
||||
assert '"localhost"' in resp.body
|
||||
|
Loading…
x
Reference in New Issue
Block a user