mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
record-history experiments via custom metadata record, head_insert
work-in-progress.. highly experimental
This commit is contained in:
parent
0c74616070
commit
dc7b8956bb
@ -14,7 +14,7 @@ from pywb.utils.loaders import extract_client_cookie
|
||||
from pywb.utils.io import BUFF_SIZE
|
||||
from pywb.utils.memento import MementoUtils
|
||||
|
||||
from warcio.timeutils import http_date_to_timestamp, timestamp_to_http_date
|
||||
from warcio.timeutils import http_date_to_timestamp, timestamp_to_http_date, iso_date_to_timestamp
|
||||
from warcio.bufferedreaders import BufferedReader
|
||||
from warcio.recordloader import ArcWarcRecordLoader
|
||||
|
||||
@ -24,7 +24,7 @@ from pywb.apps.wbrequestresponse import WbResponse
|
||||
from pywb.rewrite.rewriteinputreq import RewriteInputRequest
|
||||
from pywb.rewrite.templateview import JinjaEnv, HeadInsertView, TopFrameView, BaseInsertView
|
||||
|
||||
|
||||
import re
|
||||
from io import BytesIO
|
||||
from copy import copy
|
||||
|
||||
@ -144,6 +144,7 @@ class RewriterApp(object):
|
||||
full_prefix = host_prefix + rel_prefix
|
||||
|
||||
is_proxy = ('wsgiprox.proxy_host' in environ)
|
||||
is_ajax = self.is_ajax(environ)
|
||||
|
||||
response = self.handle_custom_response(environ, wb_url,
|
||||
full_prefix, host_prefix,
|
||||
@ -223,6 +224,13 @@ class RewriterApp(object):
|
||||
res = self.cookie_tracker.get_cookie_headers(wb_url.url, urlrewriter, cookie_key)
|
||||
inputreq.extra_cookie, setcookie_headers = res
|
||||
|
||||
# TWITTER TEST
|
||||
if is_ajax:
|
||||
print('AJAX')
|
||||
m = re.match('https://twitter[.]com/[^/]+/status/([^/]+)', wb_url.url)
|
||||
if m:
|
||||
wb_url.url += '?conversation'
|
||||
|
||||
r = self._do_req(inputreq, wb_url, kwargs, skip)
|
||||
|
||||
if r.status_code >= 400:
|
||||
@ -293,6 +301,22 @@ class RewriterApp(object):
|
||||
|
||||
return resp
|
||||
|
||||
if record.rec_type == 'metadata' and record.rec_headers.get('WARC-Profile') == 'history':
|
||||
history_state = record.content_stream().read().decode('utf-8')
|
||||
orig_wb_url = WbUrl(record.rec_headers.get('WARC-Refers-To-Target-URI'))
|
||||
orig_wb_url.timestamp = iso_date_to_timestamp(record.rec_headers.get('WARC-Refers-To-Date'))
|
||||
print(orig_wb_url.url, orig_wb_url.timestamp)
|
||||
new_r = self._do_req(inputreq, orig_wb_url, kwargs, skip)
|
||||
|
||||
stream = BufferedReader(new_r.raw, block_size=BUFF_SIZE)
|
||||
record = self.loader.parse_record_stream(stream,
|
||||
ensure_http_headers=True)
|
||||
|
||||
print(record.rec_headers)
|
||||
|
||||
else:
|
||||
history_state = 'undefined'
|
||||
|
||||
self._add_custom_params(cdx, r.headers, kwargs)
|
||||
|
||||
if readd_range and record.http_headers.get_statuscode() == '200':
|
||||
@ -318,7 +342,8 @@ class RewriterApp(object):
|
||||
top_url,
|
||||
environ,
|
||||
framed_replay,
|
||||
config=self.config))
|
||||
config=self.config,
|
||||
history_state=history_state))
|
||||
|
||||
cookie_rewriter = None
|
||||
if self.cookie_tracker:
|
||||
|
@ -60,6 +60,10 @@ rules:
|
||||
- url_prefix: 'com,twitter)/i/videos/tweet'
|
||||
|
||||
fuzzy_lookup: '()'
|
||||
|
||||
- url_prefix: 'com,twitter)/'
|
||||
|
||||
fuzzy_lookup: '.*(conversation)?.*'
|
||||
|
||||
|
||||
# facebook rules
|
||||
|
@ -796,7 +796,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
}
|
||||
|
||||
//============================================
|
||||
function send_history_update(url, title) {
|
||||
function send_history_update(state, type, url, title) {
|
||||
if ($wbwindow.__WB_top_frame && $wbwindow == $wbwindow.__WB_replay_top) {
|
||||
var message = {
|
||||
"url": url,
|
||||
@ -805,6 +805,8 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
"is_live": wb_info.is_live,
|
||||
"title": title,
|
||||
"wb_type": "replace-url",
|
||||
"state": state,
|
||||
"change_type": type,
|
||||
}
|
||||
|
||||
$wbwindow.__WB_top_frame.postMessage(message, wb_info.top_host);
|
||||
@ -817,7 +819,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
override_history_func("replaceState");
|
||||
|
||||
$wbwindow.addEventListener("popstate", function(event) {
|
||||
send_history_update($wbwindow.WB_wombat_location.href, $wbwindow.document.title);
|
||||
send_history_update(event.state, "popState", $wbwindow.WB_wombat_location.href, $wbwindow.document.title);
|
||||
});
|
||||
}
|
||||
|
||||
@ -847,7 +849,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
|
||||
if (url && (url != $wbwindow.WB_wombat_location.origin && $wbwindow.WB_wombat_location.href != "about:blank") &&
|
||||
!starts_with(url, $wbwindow.WB_wombat_location.origin + "/")) {
|
||||
throw new DOMException("Invalid history change: " + url);
|
||||
//throw new DOMException("Invalid history change: " + url);
|
||||
}
|
||||
} else {
|
||||
url = $wbwindow.WB_wombat_location.href;
|
||||
@ -855,7 +857,7 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
|
||||
orig_func.call(this, state_obj, title, rewritten_url);
|
||||
|
||||
send_history_update(url, title);
|
||||
send_history_update(state_obj, func_name, url, title);
|
||||
}
|
||||
|
||||
$wbwindow.history[func_name] = rewritten_func;
|
||||
@ -3036,6 +3038,52 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
// End Proxy Obj Override System
|
||||
|
||||
|
||||
function init_history_replay($wbwindow, wbinfo) {
|
||||
if (!wbinfo.history_state) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ($wbwindow.__WB_replay_top != $wbwindow) {
|
||||
return;
|
||||
}
|
||||
|
||||
// replace initial state
|
||||
var orig_url = wbinfo.prefix + wbinfo.history_state.base_timestamp + wbinfo.mod + "/" + wbinfo.history_state.base_url;
|
||||
$wbwindow.history.replaceState(wbinfo.history_state.base_state || null, "Title", orig_url);
|
||||
|
||||
var replayed = false;
|
||||
|
||||
$wbwindow.addEventListener("load", function() {
|
||||
setTimeout(replay, 500);
|
||||
});
|
||||
|
||||
function replay() {
|
||||
if (replayed) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (document.readyState != "complete") {
|
||||
return;
|
||||
}
|
||||
|
||||
replayed = true;
|
||||
|
||||
var states = wbinfo.history_state.states;
|
||||
var lastState = states[states.length - 1][0];
|
||||
|
||||
if ($wbwindow.history.state == lastState) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (var i = 0; i < states.length; i++) {
|
||||
console.log(JSON.stringify(states[i]));
|
||||
$wbwindow.history.pushState.apply($wbwindow.history, states[i]);
|
||||
}
|
||||
|
||||
$wbwindow.dispatchEvent(new PopStateEvent('popstate', { state: lastState}));
|
||||
};
|
||||
}
|
||||
|
||||
//============================================
|
||||
function wombat_init(wbinfo) {
|
||||
init_paths(wbinfo);
|
||||
@ -3044,6 +3092,8 @@ var _WBWombat = function($wbwindow, wbinfo) {
|
||||
|
||||
init_wombat_loc($wbwindow);
|
||||
|
||||
init_history_replay($wbwindow, wbinfo);
|
||||
|
||||
// archival mode: init url-rewriting intercepts
|
||||
if (!wb_is_proxy) {
|
||||
init_wombat_top($wbwindow);
|
||||
|
@ -15,6 +15,8 @@
|
||||
wbinfo.proxy_magic = "{{ env.pywb_proxy_magic }}";
|
||||
wbinfo.static_prefix = "{{ host_prefix }}/{{ static_path }}/";
|
||||
|
||||
wbinfo.history_state = {{ history_state }};
|
||||
|
||||
{% if not wb_url.is_banner_only %}
|
||||
wbinfo.wombat_ts = "{{ wombat_ts }}";
|
||||
wbinfo.wombat_sec = "{{ wombat_sec }}";
|
||||
|
Loading…
x
Reference in New Issue
Block a user