1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

Merge d6bb03409f2f408c77a5b579f2263587d0d5c276 into 7b0f8b58607fb0ed338f0cfddeb80c629582d8f6

This commit is contained in:
Tessa Walsh 2025-03-13 17:33:41 +00:00 committed by GitHub
commit 7c3465c8e2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 157 additions and 2 deletions

View File

@ -8,7 +8,7 @@ jobs:
strategy: strategy:
max-parallel: 3 max-parallel: 3
matrix: matrix:
python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] python-version: ['3.9', '3.10', '3.11']
steps: steps:
- name: checkout - name: checkout

3
.gitignore vendored
View File

@ -1,3 +1,6 @@
# wabac sw
pywb/static/wabacSW.js
*.py[cod] *.py[cod]
# C extensions # C extensions

View File

@ -1,4 +1,4 @@
ARG PYTHON=python:3.8 ARG PYTHON=python:3.11
FROM $PYTHON FROM $PYTHON

View File

@ -27,6 +27,10 @@ enable_memento: true
# Replay content in an iframe # Replay content in an iframe
framed_replay: true framed_replay: true
# Use wabac.js-style client-side replay system for framed replay
client_side_replay: false
# Enable classic redirect behavior
redirect_to_exact: true redirect_to_exact: true
# Uncomment and change to set default locale # Uncomment and change to set default locale

View File

@ -44,6 +44,19 @@ To disable framed replay add:
Note: pywb also supports HTTP/S **proxy mode** which requires additional setup. See :ref:`https-proxy` for more details. Note: pywb also supports HTTP/S **proxy mode** which requires additional setup. See :ref:`https-proxy` for more details.
.. _client_side_replay:
Client-side replay
------------------
In pywb 2.9.0+, client-side replay can optionally be enabled for framed replay.
To enable client-side framed replay add:
``client_side_replay: true`` to your config.yaml
In this mode, pywb will use the `wabac.js <https://github.com/webrecorder/wabac.js>`_ service worker-based replay system that underlies `ReplayWeb.page <https://replayweb.page/>`_ in live proxy mode. This may result in better replay for certain JavaScript-heavy sites.
.. _dir_structure: .. _dir_structure:

View File

@ -81,6 +81,8 @@ class FrontEndApp(object):
self.debug = config.get('debug', False) self.debug = config.get('debug', False)
self.client_side_replay = config.get('client_side_replay', False)
self.warcserver_server = GeventServer(self.warcserver, port=0) self.warcserver_server = GeventServer(self.warcserver, port=0)
self.proxy_prefix = None # the URL prefix to be used for the collection with proxy mode (e.g. /coll/id_/) self.proxy_prefix = None # the URL prefix to be used for the collection with proxy mode (e.g. /coll/id_/)
@ -130,6 +132,9 @@ class FrontEndApp(object):
coll_prefix = '/<coll>' coll_prefix = '/<coll>'
self.url_map.add(Rule('/', endpoint=self.serve_home)) self.url_map.add(Rule('/', endpoint=self.serve_home))
if self.client_side_replay:
self.url_map.add(Rule('/static/sw.js', endpoint=self.serve_wabac_service_worker))
self._init_coll_routes(coll_prefix) self._init_coll_routes(coll_prefix)
if self.proxy_prefix is not None: if self.proxy_prefix is not None:
@ -818,6 +823,17 @@ class FrontEndApp(object):
response.add_access_control_headers(env=env) response.add_access_control_headers(env=env)
return response return response
def serve_wabac_service_worker(self, env):
"""Serve wabac.js service worker.
:param dict env: The WSGI environment dictionary
:return: WbResponse with service worker
:rtype: WbResponse
"""
response = self.serve_static(env, coll='', filepath='wabacSW.js')
response.status_headers['Service-Worker-Allowed'] = '/'
return response
# ============================================================================ # ============================================================================
class MetadataCache(object): class MetadataCache(object):

View File

@ -84,6 +84,8 @@ class RewriterApp(object):
self._html_templ('head_insert_html'), self._html_templ('head_insert_html'),
self.custom_banner_view) self.custom_banner_view)
self.client_side_replay = self.config.get('client_side_replay', False)
self.frame_insert_view = TopFrameView(self.jinja_env, self.frame_insert_view = TopFrameView(self.jinja_env,
self._html_templ('frame_insert_html'), self._html_templ('frame_insert_html'),
self.banner_view) self.banner_view)
@ -933,6 +935,7 @@ class RewriterApp(object):
environ, environ,
self.frame_mod, self.frame_mod,
self.replay_mod, self.replay_mod,
self.client_side_replay,
coll='', coll='',
extra_params=extra_params) extra_params=extra_params)

View File

@ -388,6 +388,7 @@ class TopFrameView(BaseInsertView):
env, env,
frame_mod, frame_mod,
replay_mod, replay_mod,
client_side_replay,
coll='', coll='',
extra_params=None): extra_params=None):
""" """
@ -397,6 +398,7 @@ class TopFrameView(BaseInsertView):
:param dict env: The WSGI environment dictionary for the request this template is being rendered for :param dict env: The WSGI environment dictionary for the request this template is being rendered for
:param str frame_mod: The modifier to be used for framing (e.g. if_) :param str frame_mod: The modifier to be used for framing (e.g. if_)
:param str replay_mod: The modifier to be used in the URL of the page being replayed (e.g. mp_) :param str replay_mod: The modifier to be used in the URL of the page being replayed (e.g. mp_)
:param bool client_side_replay: Boolean indicating whether to use wabac.js-based client side replay
:param str coll: The name of the collection this template is being rendered for :param str coll: The name of the collection this template is being rendered for
:param dict extra_params: Additional parameters to be supplied to the Jninja template render method :param dict extra_params: Additional parameters to be supplied to the Jninja template render method
:return: The frame insert string :return: The frame insert string
@ -423,6 +425,7 @@ class TopFrameView(BaseInsertView):
'embed_url': embed_url, 'embed_url': embed_url,
'is_proxy': is_proxy, 'is_proxy': is_proxy,
'client_side_replay': client_side_replay,
'timestamp': timestamp, 'timestamp': timestamp,
'url': wb_url.get_url() 'url': wb_url.get_url()
} }

84
pywb/static/loadWabac.js Normal file
View File

@ -0,0 +1,84 @@
class WabacReplay
{
constructor(prefix, url, ts) {
this.prefix = prefix;
this.url = url;
this.ts = ts;
this.collName = new URL(prefix, "http://dummy").pathname.split('/')[1];
this.adblockUrl = undefined;
this.queryParams = {};
}
async init() {
const scope = "/";
await navigator.serviceWorker.register(
"/static/sw.js?" + new URLSearchParams(this.queryParams).toString(),
{ scope },
);
let initedResolve = null;
const inited = new Promise((resolve) => initedResolve = resolve);
navigator.serviceWorker.addEventListener("message", (event) => {
if (event.data.msg_type === "collAdded") {
// the replay is ready to be loaded when this message is received
initedResolve();
}
});
const baseUrl = new URL(window.location);
baseUrl.hash = "";
const proxyPrefix = "";
const msg = {
msg_type: "addColl",
name: this.collName,
type: "live",
file: {"sourceUrl": `proxy:${proxyPrefix}`},
skipExisting: true,
extraConfig: {
prefix: proxyPrefix,
isLive: false,
baseUrl: baseUrl.href,
baseUrlHashReplay: true,
noPostToGet: false,
archivePrefix: `/${this.collName}/`,
archiveMod: "ir_",
adblockUrl: this.adblockUrl
},
};
if (!navigator.serviceWorker.controller) {
navigator.serviceWorker.addEventListener("controllerchange", () => {
navigator.serviceWorker.controller.postMessage(msg);
});
} else {
navigator.serviceWorker.controller.postMessage(msg);
}
window.addEventListener("message", event => {
let data = event.data;
if (data.wb_type !== "load") return;
history.replaceState({}, data.title, this.prefix + data.ts + '/' + data.url);
window.WBBanner.onMessage(event);
});
window.cframe = this;
if (inited) {
await inited;
}
this.load_url(this.url, this.ts);
}
// called by the Vue banner when the timeline is clicked
load_url(url, ts) {
const iframe = document.querySelector('#replay_iframe');
iframe.src = `/w/${this.collName}/${ts}mp_/${url}`;
}
}

View File

@ -12,7 +12,15 @@ html, body
} }
</style> </style>
{% if client_side_replay %}
<script src='{{ static_prefix }}/loadWabac.js'></script>
<script>
new WabacReplay("{{ wb_prefix }}", "{{ url }}", "{{ timestamp }}").init();
</script>
{% else %}
<script src='{{ static_prefix }}/wb_frame.js'> </script> <script src='{{ static_prefix }}/wb_frame.js'> </script>
{% endif %}
{% autoescape false %} {% autoescape false %}
@ -45,6 +53,8 @@ html, body
<div id="wb_iframe_div"> <div id="wb_iframe_div">
<iframe id="replay_iframe" frameborder="0" seamless="seamless" scrolling="yes" class="wb_iframe" allow="autoplay; fullscreen"></iframe> <iframe id="replay_iframe" frameborder="0" seamless="seamless" scrolling="yes" class="wb_iframe" allow="autoplay; fullscreen"></iframe>
</div> </div>
{% if not client_side_replay %}
<script> <script>
var cframe = new ContentFrame({"url": "{{ url }}" + window.location.hash, var cframe = new ContentFrame({"url": "{{ url }}" + window.location.hash,
"prefix": "{{ wb_prefix }}", "prefix": "{{ wb_prefix }}",
@ -52,6 +62,7 @@ html, body
"iframe": "#replay_iframe"}); "iframe": "#replay_iframe"});
</script> </script>
{% endif %}
</body> </body>
</html> </html>
{% endautoescape %} {% endautoescape %}

View File

@ -5,11 +5,29 @@ from setuptools import setup, find_packages
from setuptools.command.test import test as TestCommand from setuptools.command.test import test as TestCommand
import glob import glob
import os import os
import pathlib
import sys import sys
import urllib.request
from pywb import __version__ from pywb import __version__
root_dir = pathlib.Path(__file__).parent
WABAC_SW_URL = "https://cdn.jsdelivr.net/npm/@webrecorder/wabac@2.21.4/dist/sw.js"
def download_wabac_sw():
print(f"Downloading {WABAC_SW_URL}")
with urllib.request.urlopen(WABAC_SW_URL) as response: # nosec
with open(root_dir.joinpath("pywb", "static", "wabacSW.js"), "wb") as fh:
fh.write(response.read())
download_wabac_sw()
def get_long_description(): def get_long_description():
with open('README.rst', 'r') as fh: with open('README.rst', 'r') as fh:
long_description = fh.read() long_description = fh.read()