1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

Merge d6bb03409f2f408c77a5b579f2263587d0d5c276 into 7b0f8b58607fb0ed338f0cfddeb80c629582d8f6

This commit is contained in:
Tessa Walsh 2025-03-13 17:33:41 +00:00 committed by GitHub
commit 7c3465c8e2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 157 additions and 2 deletions

View File

@ -8,7 +8,7 @@ jobs:
strategy:
max-parallel: 3
matrix:
python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
python-version: ['3.9', '3.10', '3.11']
steps:
- name: checkout

3
.gitignore vendored
View File

@ -1,3 +1,6 @@
# wabac sw
pywb/static/wabacSW.js
*.py[cod]
# C extensions

View File

@ -1,4 +1,4 @@
ARG PYTHON=python:3.8
ARG PYTHON=python:3.11
FROM $PYTHON

View File

@ -27,6 +27,10 @@ enable_memento: true
# Replay content in an iframe
framed_replay: true
# Use wabac.js-style client-side replay system for framed replay
client_side_replay: false
# Enable classic redirect behavior
redirect_to_exact: true
# Uncomment and change to set default locale

View File

@ -44,6 +44,19 @@ To disable framed replay add:
Note: pywb also supports HTTP/S **proxy mode** which requires additional setup. See :ref:`https-proxy` for more details.
.. _client_side_replay:
Client-side replay
------------------
In pywb 2.9.0+, client-side replay can optionally be enabled for framed replay.
To enable client-side framed replay add:
``client_side_replay: true`` to your config.yaml
In this mode, pywb will use the `wabac.js <https://github.com/webrecorder/wabac.js>`_ service worker-based replay system that underlies `ReplayWeb.page <https://replayweb.page/>`_ in live proxy mode. This may result in better replay for certain JavaScript-heavy sites.
.. _dir_structure:

View File

@ -81,6 +81,8 @@ class FrontEndApp(object):
self.debug = config.get('debug', False)
self.client_side_replay = config.get('client_side_replay', False)
self.warcserver_server = GeventServer(self.warcserver, port=0)
self.proxy_prefix = None # the URL prefix to be used for the collection with proxy mode (e.g. /coll/id_/)
@ -130,6 +132,9 @@ class FrontEndApp(object):
coll_prefix = '/<coll>'
self.url_map.add(Rule('/', endpoint=self.serve_home))
if self.client_side_replay:
self.url_map.add(Rule('/static/sw.js', endpoint=self.serve_wabac_service_worker))
self._init_coll_routes(coll_prefix)
if self.proxy_prefix is not None:
@ -818,6 +823,17 @@ class FrontEndApp(object):
response.add_access_control_headers(env=env)
return response
def serve_wabac_service_worker(self, env):
"""Serve wabac.js service worker.
:param dict env: The WSGI environment dictionary
:return: WbResponse with service worker
:rtype: WbResponse
"""
response = self.serve_static(env, coll='', filepath='wabacSW.js')
response.status_headers['Service-Worker-Allowed'] = '/'
return response
# ============================================================================
class MetadataCache(object):

View File

@ -84,6 +84,8 @@ class RewriterApp(object):
self._html_templ('head_insert_html'),
self.custom_banner_view)
self.client_side_replay = self.config.get('client_side_replay', False)
self.frame_insert_view = TopFrameView(self.jinja_env,
self._html_templ('frame_insert_html'),
self.banner_view)
@ -933,6 +935,7 @@ class RewriterApp(object):
environ,
self.frame_mod,
self.replay_mod,
self.client_side_replay,
coll='',
extra_params=extra_params)

View File

@ -388,6 +388,7 @@ class TopFrameView(BaseInsertView):
env,
frame_mod,
replay_mod,
client_side_replay,
coll='',
extra_params=None):
"""
@ -397,6 +398,7 @@ class TopFrameView(BaseInsertView):
:param dict env: The WSGI environment dictionary for the request this template is being rendered for
:param str frame_mod: The modifier to be used for framing (e.g. if_)
:param str replay_mod: The modifier to be used in the URL of the page being replayed (e.g. mp_)
:param bool client_side_replay: Boolean indicating whether to use wabac.js-based client side replay
:param str coll: The name of the collection this template is being rendered for
:param dict extra_params: Additional parameters to be supplied to the Jninja template render method
:return: The frame insert string
@ -423,6 +425,7 @@ class TopFrameView(BaseInsertView):
'embed_url': embed_url,
'is_proxy': is_proxy,
'client_side_replay': client_side_replay,
'timestamp': timestamp,
'url': wb_url.get_url()
}

84
pywb/static/loadWabac.js Normal file
View File

@ -0,0 +1,84 @@
class WabacReplay
{
constructor(prefix, url, ts) {
this.prefix = prefix;
this.url = url;
this.ts = ts;
this.collName = new URL(prefix, "http://dummy").pathname.split('/')[1];
this.adblockUrl = undefined;
this.queryParams = {};
}
async init() {
const scope = "/";
await navigator.serviceWorker.register(
"/static/sw.js?" + new URLSearchParams(this.queryParams).toString(),
{ scope },
);
let initedResolve = null;
const inited = new Promise((resolve) => initedResolve = resolve);
navigator.serviceWorker.addEventListener("message", (event) => {
if (event.data.msg_type === "collAdded") {
// the replay is ready to be loaded when this message is received
initedResolve();
}
});
const baseUrl = new URL(window.location);
baseUrl.hash = "";
const proxyPrefix = "";
const msg = {
msg_type: "addColl",
name: this.collName,
type: "live",
file: {"sourceUrl": `proxy:${proxyPrefix}`},
skipExisting: true,
extraConfig: {
prefix: proxyPrefix,
isLive: false,
baseUrl: baseUrl.href,
baseUrlHashReplay: true,
noPostToGet: false,
archivePrefix: `/${this.collName}/`,
archiveMod: "ir_",
adblockUrl: this.adblockUrl
},
};
if (!navigator.serviceWorker.controller) {
navigator.serviceWorker.addEventListener("controllerchange", () => {
navigator.serviceWorker.controller.postMessage(msg);
});
} else {
navigator.serviceWorker.controller.postMessage(msg);
}
window.addEventListener("message", event => {
let data = event.data;
if (data.wb_type !== "load") return;
history.replaceState({}, data.title, this.prefix + data.ts + '/' + data.url);
window.WBBanner.onMessage(event);
});
window.cframe = this;
if (inited) {
await inited;
}
this.load_url(this.url, this.ts);
}
// called by the Vue banner when the timeline is clicked
load_url(url, ts) {
const iframe = document.querySelector('#replay_iframe');
iframe.src = `/w/${this.collName}/${ts}mp_/${url}`;
}
}

View File

@ -12,7 +12,15 @@ html, body
}
</style>
{% if client_side_replay %}
<script src='{{ static_prefix }}/loadWabac.js'></script>
<script>
new WabacReplay("{{ wb_prefix }}", "{{ url }}", "{{ timestamp }}").init();
</script>
{% else %}
<script src='{{ static_prefix }}/wb_frame.js'> </script>
{% endif %}
{% autoescape false %}
@ -45,6 +53,8 @@ html, body
<div id="wb_iframe_div">
<iframe id="replay_iframe" frameborder="0" seamless="seamless" scrolling="yes" class="wb_iframe" allow="autoplay; fullscreen"></iframe>
</div>
{% if not client_side_replay %}
<script>
var cframe = new ContentFrame({"url": "{{ url }}" + window.location.hash,
"prefix": "{{ wb_prefix }}",
@ -52,6 +62,7 @@ html, body
"iframe": "#replay_iframe"});
</script>
{% endif %}
</body>
</html>
{% endautoescape %}

View File

@ -5,11 +5,29 @@ from setuptools import setup, find_packages
from setuptools.command.test import test as TestCommand
import glob
import os
import pathlib
import sys
import urllib.request
from pywb import __version__
root_dir = pathlib.Path(__file__).parent
WABAC_SW_URL = "https://cdn.jsdelivr.net/npm/@webrecorder/wabac@2.21.4/dist/sw.js"
def download_wabac_sw():
print(f"Downloading {WABAC_SW_URL}")
with urllib.request.urlopen(WABAC_SW_URL) as response: # nosec
with open(root_dir.joinpath("pywb", "static", "wabacSW.js"), "wb") as fh:
fh.write(response.read())
download_wabac_sw()
def get_long_description():
with open('README.rst', 'r') as fh:
long_description = fh.read()