mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Merge d6bb03409f2f408c77a5b579f2263587d0d5c276 into 7b0f8b58607fb0ed338f0cfddeb80c629582d8f6
This commit is contained in:
commit
7c3465c8e2
2
.github/workflows/ci.yaml
vendored
2
.github/workflows/ci.yaml
vendored
@ -8,7 +8,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
max-parallel: 3
|
max-parallel: 3
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
|
python-version: ['3.9', '3.10', '3.11']
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: checkout
|
- name: checkout
|
||||||
|
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,3 +1,6 @@
|
|||||||
|
# wabac sw
|
||||||
|
pywb/static/wabacSW.js
|
||||||
|
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
|
|
||||||
# C extensions
|
# C extensions
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
ARG PYTHON=python:3.8
|
ARG PYTHON=python:3.11
|
||||||
|
|
||||||
FROM $PYTHON
|
FROM $PYTHON
|
||||||
|
|
||||||
|
@ -27,6 +27,10 @@ enable_memento: true
|
|||||||
# Replay content in an iframe
|
# Replay content in an iframe
|
||||||
framed_replay: true
|
framed_replay: true
|
||||||
|
|
||||||
|
# Use wabac.js-style client-side replay system for framed replay
|
||||||
|
client_side_replay: false
|
||||||
|
|
||||||
|
# Enable classic redirect behavior
|
||||||
redirect_to_exact: true
|
redirect_to_exact: true
|
||||||
|
|
||||||
# Uncomment and change to set default locale
|
# Uncomment and change to set default locale
|
||||||
|
@ -44,6 +44,19 @@ To disable framed replay add:
|
|||||||
|
|
||||||
Note: pywb also supports HTTP/S **proxy mode** which requires additional setup. See :ref:`https-proxy` for more details.
|
Note: pywb also supports HTTP/S **proxy mode** which requires additional setup. See :ref:`https-proxy` for more details.
|
||||||
|
|
||||||
|
.. _client_side_replay:
|
||||||
|
|
||||||
|
Client-side replay
|
||||||
|
------------------
|
||||||
|
|
||||||
|
In pywb 2.9.0+, client-side replay can optionally be enabled for framed replay.
|
||||||
|
|
||||||
|
To enable client-side framed replay add:
|
||||||
|
|
||||||
|
``client_side_replay: true`` to your config.yaml
|
||||||
|
|
||||||
|
In this mode, pywb will use the `wabac.js <https://github.com/webrecorder/wabac.js>`_ service worker-based replay system that underlies `ReplayWeb.page <https://replayweb.page/>`_ in live proxy mode. This may result in better replay for certain JavaScript-heavy sites.
|
||||||
|
|
||||||
|
|
||||||
.. _dir_structure:
|
.. _dir_structure:
|
||||||
|
|
||||||
|
@ -81,6 +81,8 @@ class FrontEndApp(object):
|
|||||||
|
|
||||||
self.debug = config.get('debug', False)
|
self.debug = config.get('debug', False)
|
||||||
|
|
||||||
|
self.client_side_replay = config.get('client_side_replay', False)
|
||||||
|
|
||||||
self.warcserver_server = GeventServer(self.warcserver, port=0)
|
self.warcserver_server = GeventServer(self.warcserver, port=0)
|
||||||
|
|
||||||
self.proxy_prefix = None # the URL prefix to be used for the collection with proxy mode (e.g. /coll/id_/)
|
self.proxy_prefix = None # the URL prefix to be used for the collection with proxy mode (e.g. /coll/id_/)
|
||||||
@ -130,6 +132,9 @@ class FrontEndApp(object):
|
|||||||
coll_prefix = '/<coll>'
|
coll_prefix = '/<coll>'
|
||||||
self.url_map.add(Rule('/', endpoint=self.serve_home))
|
self.url_map.add(Rule('/', endpoint=self.serve_home))
|
||||||
|
|
||||||
|
if self.client_side_replay:
|
||||||
|
self.url_map.add(Rule('/static/sw.js', endpoint=self.serve_wabac_service_worker))
|
||||||
|
|
||||||
self._init_coll_routes(coll_prefix)
|
self._init_coll_routes(coll_prefix)
|
||||||
|
|
||||||
if self.proxy_prefix is not None:
|
if self.proxy_prefix is not None:
|
||||||
@ -818,6 +823,17 @@ class FrontEndApp(object):
|
|||||||
response.add_access_control_headers(env=env)
|
response.add_access_control_headers(env=env)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
def serve_wabac_service_worker(self, env):
|
||||||
|
"""Serve wabac.js service worker.
|
||||||
|
|
||||||
|
:param dict env: The WSGI environment dictionary
|
||||||
|
:return: WbResponse with service worker
|
||||||
|
:rtype: WbResponse
|
||||||
|
"""
|
||||||
|
response = self.serve_static(env, coll='', filepath='wabacSW.js')
|
||||||
|
response.status_headers['Service-Worker-Allowed'] = '/'
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class MetadataCache(object):
|
class MetadataCache(object):
|
||||||
|
@ -84,6 +84,8 @@ class RewriterApp(object):
|
|||||||
self._html_templ('head_insert_html'),
|
self._html_templ('head_insert_html'),
|
||||||
self.custom_banner_view)
|
self.custom_banner_view)
|
||||||
|
|
||||||
|
self.client_side_replay = self.config.get('client_side_replay', False)
|
||||||
|
|
||||||
self.frame_insert_view = TopFrameView(self.jinja_env,
|
self.frame_insert_view = TopFrameView(self.jinja_env,
|
||||||
self._html_templ('frame_insert_html'),
|
self._html_templ('frame_insert_html'),
|
||||||
self.banner_view)
|
self.banner_view)
|
||||||
@ -933,6 +935,7 @@ class RewriterApp(object):
|
|||||||
environ,
|
environ,
|
||||||
self.frame_mod,
|
self.frame_mod,
|
||||||
self.replay_mod,
|
self.replay_mod,
|
||||||
|
self.client_side_replay,
|
||||||
coll='',
|
coll='',
|
||||||
extra_params=extra_params)
|
extra_params=extra_params)
|
||||||
|
|
||||||
|
@ -388,6 +388,7 @@ class TopFrameView(BaseInsertView):
|
|||||||
env,
|
env,
|
||||||
frame_mod,
|
frame_mod,
|
||||||
replay_mod,
|
replay_mod,
|
||||||
|
client_side_replay,
|
||||||
coll='',
|
coll='',
|
||||||
extra_params=None):
|
extra_params=None):
|
||||||
"""
|
"""
|
||||||
@ -397,6 +398,7 @@ class TopFrameView(BaseInsertView):
|
|||||||
:param dict env: The WSGI environment dictionary for the request this template is being rendered for
|
:param dict env: The WSGI environment dictionary for the request this template is being rendered for
|
||||||
:param str frame_mod: The modifier to be used for framing (e.g. if_)
|
:param str frame_mod: The modifier to be used for framing (e.g. if_)
|
||||||
:param str replay_mod: The modifier to be used in the URL of the page being replayed (e.g. mp_)
|
:param str replay_mod: The modifier to be used in the URL of the page being replayed (e.g. mp_)
|
||||||
|
:param bool client_side_replay: Boolean indicating whether to use wabac.js-based client side replay
|
||||||
:param str coll: The name of the collection this template is being rendered for
|
:param str coll: The name of the collection this template is being rendered for
|
||||||
:param dict extra_params: Additional parameters to be supplied to the Jninja template render method
|
:param dict extra_params: Additional parameters to be supplied to the Jninja template render method
|
||||||
:return: The frame insert string
|
:return: The frame insert string
|
||||||
@ -423,6 +425,7 @@ class TopFrameView(BaseInsertView):
|
|||||||
|
|
||||||
'embed_url': embed_url,
|
'embed_url': embed_url,
|
||||||
'is_proxy': is_proxy,
|
'is_proxy': is_proxy,
|
||||||
|
'client_side_replay': client_side_replay,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'url': wb_url.get_url()
|
'url': wb_url.get_url()
|
||||||
}
|
}
|
||||||
|
84
pywb/static/loadWabac.js
Normal file
84
pywb/static/loadWabac.js
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
class WabacReplay
|
||||||
|
{
|
||||||
|
constructor(prefix, url, ts) {
|
||||||
|
this.prefix = prefix;
|
||||||
|
this.url = url;
|
||||||
|
this.ts = ts;
|
||||||
|
this.collName = new URL(prefix, "http://dummy").pathname.split('/')[1];
|
||||||
|
this.adblockUrl = undefined;
|
||||||
|
|
||||||
|
this.queryParams = {};
|
||||||
|
}
|
||||||
|
|
||||||
|
async init() {
|
||||||
|
const scope = "/";
|
||||||
|
|
||||||
|
await navigator.serviceWorker.register(
|
||||||
|
"/static/sw.js?" + new URLSearchParams(this.queryParams).toString(),
|
||||||
|
{ scope },
|
||||||
|
);
|
||||||
|
|
||||||
|
let initedResolve = null;
|
||||||
|
|
||||||
|
const inited = new Promise((resolve) => initedResolve = resolve);
|
||||||
|
|
||||||
|
navigator.serviceWorker.addEventListener("message", (event) => {
|
||||||
|
if (event.data.msg_type === "collAdded") {
|
||||||
|
// the replay is ready to be loaded when this message is received
|
||||||
|
initedResolve();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const baseUrl = new URL(window.location);
|
||||||
|
baseUrl.hash = "";
|
||||||
|
|
||||||
|
const proxyPrefix = "";
|
||||||
|
|
||||||
|
const msg = {
|
||||||
|
msg_type: "addColl",
|
||||||
|
name: this.collName,
|
||||||
|
type: "live",
|
||||||
|
file: {"sourceUrl": `proxy:${proxyPrefix}`},
|
||||||
|
skipExisting: true,
|
||||||
|
extraConfig: {
|
||||||
|
prefix: proxyPrefix,
|
||||||
|
isLive: false,
|
||||||
|
baseUrl: baseUrl.href,
|
||||||
|
baseUrlHashReplay: true,
|
||||||
|
noPostToGet: false,
|
||||||
|
archivePrefix: `/${this.collName}/`,
|
||||||
|
archiveMod: "ir_",
|
||||||
|
adblockUrl: this.adblockUrl
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!navigator.serviceWorker.controller) {
|
||||||
|
navigator.serviceWorker.addEventListener("controllerchange", () => {
|
||||||
|
navigator.serviceWorker.controller.postMessage(msg);
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
navigator.serviceWorker.controller.postMessage(msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
window.addEventListener("message", event => {
|
||||||
|
let data = event.data;
|
||||||
|
if (data.wb_type !== "load") return;
|
||||||
|
history.replaceState({}, data.title, this.prefix + data.ts + '/' + data.url);
|
||||||
|
window.WBBanner.onMessage(event);
|
||||||
|
});
|
||||||
|
|
||||||
|
window.cframe = this;
|
||||||
|
|
||||||
|
if (inited) {
|
||||||
|
await inited;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.load_url(this.url, this.ts);
|
||||||
|
}
|
||||||
|
|
||||||
|
// called by the Vue banner when the timeline is clicked
|
||||||
|
load_url(url, ts) {
|
||||||
|
const iframe = document.querySelector('#replay_iframe');
|
||||||
|
iframe.src = `/w/${this.collName}/${ts}mp_/${url}`;
|
||||||
|
}
|
||||||
|
}
|
@ -12,7 +12,15 @@ html, body
|
|||||||
}
|
}
|
||||||
|
|
||||||
</style>
|
</style>
|
||||||
|
|
||||||
|
{% if client_side_replay %}
|
||||||
|
<script src='{{ static_prefix }}/loadWabac.js'></script>
|
||||||
|
<script>
|
||||||
|
new WabacReplay("{{ wb_prefix }}", "{{ url }}", "{{ timestamp }}").init();
|
||||||
|
</script>
|
||||||
|
{% else %}
|
||||||
<script src='{{ static_prefix }}/wb_frame.js'> </script>
|
<script src='{{ static_prefix }}/wb_frame.js'> </script>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
{% autoescape false %}
|
{% autoescape false %}
|
||||||
|
|
||||||
@ -45,6 +53,8 @@ html, body
|
|||||||
<div id="wb_iframe_div">
|
<div id="wb_iframe_div">
|
||||||
<iframe id="replay_iframe" frameborder="0" seamless="seamless" scrolling="yes" class="wb_iframe" allow="autoplay; fullscreen"></iframe>
|
<iframe id="replay_iframe" frameborder="0" seamless="seamless" scrolling="yes" class="wb_iframe" allow="autoplay; fullscreen"></iframe>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{% if not client_side_replay %}
|
||||||
<script>
|
<script>
|
||||||
var cframe = new ContentFrame({"url": "{{ url }}" + window.location.hash,
|
var cframe = new ContentFrame({"url": "{{ url }}" + window.location.hash,
|
||||||
"prefix": "{{ wb_prefix }}",
|
"prefix": "{{ wb_prefix }}",
|
||||||
@ -52,6 +62,7 @@ html, body
|
|||||||
"iframe": "#replay_iframe"});
|
"iframe": "#replay_iframe"});
|
||||||
|
|
||||||
</script>
|
</script>
|
||||||
|
{% endif %}
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
{% endautoescape %}
|
{% endautoescape %}
|
||||||
|
18
setup.py
18
setup.py
@ -5,11 +5,29 @@ from setuptools import setup, find_packages
|
|||||||
from setuptools.command.test import test as TestCommand
|
from setuptools.command.test import test as TestCommand
|
||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
|
import pathlib
|
||||||
import sys
|
import sys
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
from pywb import __version__
|
from pywb import __version__
|
||||||
|
|
||||||
|
|
||||||
|
root_dir = pathlib.Path(__file__).parent
|
||||||
|
|
||||||
|
|
||||||
|
WABAC_SW_URL = "https://cdn.jsdelivr.net/npm/@webrecorder/wabac@2.21.4/dist/sw.js"
|
||||||
|
|
||||||
|
|
||||||
|
def download_wabac_sw():
|
||||||
|
print(f"Downloading {WABAC_SW_URL}")
|
||||||
|
with urllib.request.urlopen(WABAC_SW_URL) as response: # nosec
|
||||||
|
with open(root_dir.joinpath("pywb", "static", "wabacSW.js"), "wb") as fh:
|
||||||
|
fh.write(response.read())
|
||||||
|
|
||||||
|
|
||||||
|
download_wabac_sw()
|
||||||
|
|
||||||
|
|
||||||
def get_long_description():
|
def get_long_description():
|
||||||
with open('README.rst', 'r') as fh:
|
with open('README.rst', 'r') as fh:
|
||||||
long_description = fh.read()
|
long_description = fh.read()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user