From 61bf5e09ca3f84ab07ac76434695c6ecea16871e Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Tue, 27 Feb 2018 15:52:19 -0800 Subject: [PATCH] proxy-mode tweaks: (fixes #302): (#304) - don't include wombat.js in banner only mode, including in proxy mode (instead, do set devicePixelRatio to fix certain fidelity issues) - default_banner: set title to document.title on load when frameless, including in proxy mode - improve docs for configuring proxy mode cert - tests: update tests to ensure no wombat.js injected in proxy or banner-only mode --- docs/manual/configuring.rst | 20 ++++++++++++-------- docs/manual/usage.rst | 2 ++ pywb/static/default_banner.js | 3 ++- pywb/templates/head_insert.html | 13 +++++++++---- tests/test_integration.py | 4 ++-- tests/test_proxy.py | 6 ++++++ 6 files changed, 33 insertions(+), 15 deletions(-) diff --git a/docs/manual/configuring.rst b/docs/manual/configuring.rst index 643095b9..f3972d9b 100644 --- a/docs/manual/configuring.rst +++ b/docs/manual/configuring.rst @@ -403,7 +403,7 @@ Configuring HTTP Proxy At this time, pywb requires the collection to be configured at setup time (though collection switching will be added soon). -The collection can be specified by running: ``wayback --proxy my-coll`` or by adding to the config:: +To enable proxy mode, the collection can be specified by running: ``wayback --proxy my-coll`` or by adding to the config:: proxy: coll: my-coll @@ -432,24 +432,28 @@ HTTPS Proxy and pywb Certificate Authority For HTTPS proxy access, pywb provides its own Certificate Authority and dynamically generates certificates for each host and signs the responses with these certificates. By design, this allows pywb to act as "man-in-the-middle" serving archived copies of a given site. -However, the pywb certificate authority (CA) will need to be accepted by the browser. The CA cert can be downloaded from pywb directly +However, the pywb Certificate Authority (CA) certificate will need to be accepted by the browser. The CA cert can be downloaded from pywb directly using the special download paths. Recommended set up for using the proxy is as follows: -1. Configure the browser proxy settings host port, for example ``localhost`` and ``8080`` (if running locally) +1. Start pywb with proxy mode enabled (with ``--proxy`` option or with a ``proxy:`` option block present in the config). -2. Download the CA: + (The CA root certificate will be auto-created when first starting pywb with proxy mode if it doesn't exist.) + +2. Configure the browser proxy settings host port, for example ``localhost`` and ``8080`` (if running locally) + +3. Download the CA: * For most browsers, use the PEM format: ``http://wsgiprox/download/pem`` * For windows, use the PKCS12 format: ``http://wsgiprox/download/p12`` -3. You may need to agree to "Trust this CA" to identify websites. +4. You may need to agree to "Trust this CA" to identify websites. -The pywb CA file is automatically generated if it does not exist, and may be added to the key store directly. +The auto-generated pywb CA, created at ``./proxy-certs/pywb-ca.pem`` may also be added to a keystore directly. -Additional proxy options ``ca_name`` and ``ca_file_cache`` allow configuring the location and name of the CA file. +The location of the CA file and the CA name displayed can be changed by setting the ``ca_file_cache`` and ``ca_name`` proxy options, respectively. -The following are all the available proxy options (only ``coll`` is required):: +The following are all the available proxy options -- only ``coll`` is required:: proxy: coll: my-coll diff --git a/docs/manual/usage.rst b/docs/manual/usage.rst index 451d2c5f..70db6581 100644 --- a/docs/manual/usage.rst +++ b/docs/manual/usage.rst @@ -142,6 +142,8 @@ For example, ``wayback --proxy my-web-archive`` will start pywb and enable proxy You can then configure a browser to Proxy Settings host port to: ``localhost:8080`` and then loading any url, eg. ``http://example.com/`` should load the latest copy from the ``my-web-archive`` collection. +See :ref:`https-proxy` section for additional configuration details. + Deployment ---------- diff --git a/pywb/static/default_banner.js b/pywb/static/default_banner.js index 81aefe56..04a7ac9e 100644 --- a/pywb/static/default_banner.js +++ b/pywb/static/default_banner.js @@ -94,7 +94,8 @@ This file is part of pywb, https://github.com/webrecorder/pywb set_banner(window.wbinfo.url, window.wbinfo.timestamp, - window.wbinfo.is_live); + window.wbinfo.is_live, + window.wbinfo.is_framed ? "" : document.title); } else { init("_wb_frame_top_banner"); diff --git a/pywb/templates/head_insert.html b/pywb/templates/head_insert.html index 5aa84b12..95fb2301 100644 --- a/pywb/templates/head_insert.html +++ b/pywb/templates/head_insert.html @@ -1,7 +1,6 @@ - {% if not wb_url.is_banner_only %} + + +{% else %} + +{% endif %} {% if config.enable_flash_video_rewrite %} diff --git a/tests/test_integration.py b/tests/test_integration.py index afdabcb9..643e4495 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -163,8 +163,8 @@ class TestWbIntegration(BaseConfigTest): def test_replay_banner_only(self): resp = self.testapp.get('/pywb/20140126201054bn_/http://www.iana.org/domains/reserved') - # wombat.js header insertion - assert 'wombat.js' in resp.text + # wombat.js header not inserted + assert 'wombat.js' not in resp.text # no wombat present assert '_WBWombat' not in resp.text diff --git a/tests/test_proxy.py b/tests/test_proxy.py index 14254177..dc69a443 100644 --- a/tests/test_proxy.py +++ b/tests/test_proxy.py @@ -60,6 +60,9 @@ class TestProxy(BaseTestProxy): assert 'WB Insert' in res.text assert 'Example Domain' in res.text + # no wombat.js + assert 'wombat.js' not in res.text + assert res.headers['Link'] == '; rel="memento"; datetime="Mon, 27 Jan 2014 17:12:51 GMT"; collection="pywb"' assert res.headers['Memento-Datetime'] == 'Mon, 27 Jan 2014 17:12:51 GMT' @@ -73,6 +76,9 @@ class TestProxy(BaseTestProxy): assert 'WB Insert' in res.text assert 'Example Domain' in res.text + # no wombat.js + assert 'wombat.js' not in res.text + assert res.headers['Link'] == '; rel="memento"; datetime="Mon, 29 Jul 2013 19:51:51 GMT"; collection="pywb"' assert res.headers['Memento-Datetime'] == 'Mon, 29 Jul 2013 19:51:51 GMT'