mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 08:04:49 +01:00
rangecache: always bound range, set default bound of 16384
wombat: work on date override, disable for now head_insert: check for wombat not being inited to avoid undef error
This commit is contained in:
parent
88f553dce7
commit
c6a2c83b66
@ -138,8 +138,8 @@ rules:
|
||||
|
||||
- url_prefix: 'com,googlevideo,'
|
||||
|
||||
#fuzzy_lookup: 'com,googlevideo.*/videoplayback?.*(id=[^&]).*(mime=[^&]).*(signature=[^&])'
|
||||
fuzzy_lookup: 'com,googlevideo.*/videoplayback?.*(id=[^&]).*(mime=[^&])'
|
||||
fuzzy_lookup: 'com,googlevideo.*/videoplayback?.*(id=[^&]).*(mime=[^&]).*(signature=[^&])'
|
||||
#fuzzy_lookup: 'com,googlevideo.*/videoplayback?.*(id=[^&]).*(mime=[^&])'
|
||||
|
||||
|
||||
# testing rules -- not for valid domain
|
||||
|
@ -20,7 +20,7 @@ This file is part of pywb, https://github.com/ikreymer/pywb
|
||||
//============================================
|
||||
// Wombat JS-Rewriting Library v2.1
|
||||
//============================================
|
||||
window._WBWombat = (function() {
|
||||
_WBWombat = (function() {
|
||||
|
||||
// Globals
|
||||
var wb_replay_prefix;
|
||||
@ -511,6 +511,7 @@ window._WBWombat = (function() {
|
||||
|
||||
//============================================
|
||||
function init_image_override() {
|
||||
window.__Image = window.Image;
|
||||
window.Image = function (Image) {
|
||||
return function (width, height) {
|
||||
var image = new Image(width, height);
|
||||
@ -522,16 +523,40 @@ window._WBWombat = (function() {
|
||||
|
||||
//============================================
|
||||
function init_date_override(timestamp) {
|
||||
timestamp = parseInt(timestamp) * 1000;
|
||||
var timediff = Date.now() - timestamp;
|
||||
|
||||
window.__Date = window.Date;
|
||||
window.__Date_now = window.Date.now;
|
||||
|
||||
window.Date = function (Date) {
|
||||
return function (A, B, C, D, E, F, G) {
|
||||
if (arguments.length == 0) {
|
||||
timestamp = parseInt(timestamp) * 1000;
|
||||
return new Date(timestamp);
|
||||
// Apply doesn't work for constructors and Date doesn't
|
||||
// seem to like undefined args, so must explicitly
|
||||
// call constructor for each possible args 0..7
|
||||
if (A === undefined) {
|
||||
return new Date(window.Date.now());
|
||||
} else if (B === undefined) {
|
||||
return new Date(A);
|
||||
} else if (C === undefined) {
|
||||
return new Date(A, B);
|
||||
} else if (D === undefined) {
|
||||
return new Date(A, B, C);
|
||||
} else if (E === undefined) {
|
||||
return new Date(A, B, C, D);
|
||||
} else if (F === undefined) {
|
||||
return new Date(A, B, C, D, E);
|
||||
} else if (G === undefined) {
|
||||
return new Date(A, B, C, D, E, F);
|
||||
} else {
|
||||
return new Date(A, B, C, D, E, F, G);
|
||||
}
|
||||
}
|
||||
}(window.Date);
|
||||
|
||||
window.Date.now = function() {
|
||||
return __Date_now() - timediff;
|
||||
}
|
||||
}
|
||||
|
||||
//============================================
|
||||
@ -874,7 +899,7 @@ window._WBWombat = (function() {
|
||||
init_seeded_random(timestamp);
|
||||
|
||||
// Date
|
||||
init_date_override(timestamp);
|
||||
// init_date_override(timestamp);
|
||||
|
||||
// expose functions
|
||||
this.extract_orig = extract_orig;
|
||||
@ -882,4 +907,6 @@ window._WBWombat = (function() {
|
||||
|
||||
return wombat_init;
|
||||
|
||||
})(this);
|
||||
})();
|
||||
|
||||
window._WBWombat = _WBWombat;
|
||||
|
@ -3,12 +3,14 @@
|
||||
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wombat.js'> </script>
|
||||
<script>
|
||||
{% set urlsplit = cdx.original | urlsplit %}
|
||||
if (window && window._WBWombat) {
|
||||
var _wb_wombat = new _WBWombat("{{ wbrequest.wb_prefix}}",
|
||||
"{{ cdx['timestamp'] if include_ts else ''}}",
|
||||
"{{ urlsplit.scheme }}",
|
||||
"{{ urlsplit.netloc }}",
|
||||
"{{ cdx.timestamp | format_ts('%s') }}",
|
||||
"{{ wbrequest.wb_url.mod }}");
|
||||
}
|
||||
</script>
|
||||
{% endif %}
|
||||
<script>
|
||||
|
@ -74,18 +74,27 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
||||
proxies = None # default
|
||||
ping_url = None
|
||||
ping_cache_key = None
|
||||
ping_range_header = None
|
||||
|
||||
if self.default_proxy and range_cache:
|
||||
rangeres = range_cache.is_ranged(wbrequest)
|
||||
if rangeres:
|
||||
url, start, end, use_206 = rangeres
|
||||
proxies = False
|
||||
|
||||
# force a bound on unbounded range
|
||||
if use_206 and wbrequest.env['HTTP_RANGE'].endswith('-'):
|
||||
range_h = 'bytes={0}-{1}'.format(start, end)
|
||||
wbrequest.env['HTTP_RANGE'] = range_h
|
||||
print('BOUNDING: ' + range_h)
|
||||
|
||||
hash_ = hashlib.md5()
|
||||
hash_.update(rangeres[0])
|
||||
hash_.update(url)
|
||||
ping_cache_key = hash_.hexdigest()
|
||||
|
||||
if ping_cache_key not in range_cache.cache:
|
||||
ping_url = rangeres[0]
|
||||
ping_url = url
|
||||
|
||||
|
||||
result = self.rewriter.fetch_request(wbrequest.wb_url.url,
|
||||
wbrequest.urlrewriter,
|
||||
@ -120,21 +129,25 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
||||
'https': self.default_proxy}
|
||||
|
||||
headers = self._live_request_headers(wbrequest)
|
||||
print('PINGING PROXY: ' + url)
|
||||
resp = requests.get(url=url,
|
||||
headers=headers,
|
||||
proxies=proxies,
|
||||
verify=False,
|
||||
stream=True)
|
||||
headers['Connection'] = 'close'
|
||||
|
||||
# don't actually read whole response, proxy response for writing it
|
||||
resp.raw.close()
|
||||
resp.close()
|
||||
if key in range_cache.cache:
|
||||
return
|
||||
|
||||
# mark as pinged
|
||||
range_cache.cache[key] = '1'
|
||||
try:
|
||||
# mark as pinged
|
||||
range_cache.cache[key] = '1'
|
||||
|
||||
return None
|
||||
resp = requests.get(url=url,
|
||||
headers=headers,
|
||||
proxies=proxies,
|
||||
verify=False,
|
||||
stream=True)
|
||||
|
||||
# don't actually read whole response, proxy response for writing it
|
||||
resp.close()
|
||||
except:
|
||||
del range_cache.cache[key]
|
||||
|
||||
def check_buff_gen(gen):
|
||||
for x in gen:
|
||||
|
@ -14,6 +14,8 @@ class RangeCache(object):
|
||||
YOUTUBE_RX = re.compile('.*.googlevideo.com/videoplayback')
|
||||
YT_EXTRACT_RX = re.compile('&range=([^&]+)')
|
||||
|
||||
DEFAULT_BUFF = 16384
|
||||
|
||||
@staticmethod
|
||||
def match_yt(url):
|
||||
if not RangeCache.YOUTUBE_RX.match(url):
|
||||
@ -51,7 +53,17 @@ class RangeCache(object):
|
||||
|
||||
use_206 = True
|
||||
|
||||
return url, range_h, use_206
|
||||
# force bounded range
|
||||
range_h = range_h.split('=')[-1]
|
||||
range_h = range_h.rstrip()
|
||||
parts = range_h.split('-', 1)
|
||||
start = int(parts[0])
|
||||
if len(parts) == 2 and parts[1]:
|
||||
end = int(parts[1])
|
||||
else:
|
||||
end = start + self.DEFAULT_BUFF - 1
|
||||
|
||||
return url, start, end, use_206
|
||||
|
||||
def __call__(self, wbrequest, digest, wbresponse_func):
|
||||
result = self.is_ranged(wbrequest)
|
||||
@ -62,9 +74,8 @@ class RangeCache(object):
|
||||
*result)
|
||||
|
||||
def handle_range(self, wbrequest, digest, wbresponse_func,
|
||||
url, range_h, use_206):
|
||||
url, start, end, use_206):
|
||||
|
||||
range_h = range_h.split('=')[-1]
|
||||
key = digest
|
||||
if not key in self.cache:
|
||||
response = wbresponse_func()
|
||||
@ -90,20 +101,10 @@ class RangeCache(object):
|
||||
|
||||
filelen = os.path.getsize(spec['name'])
|
||||
|
||||
range_h = range_h.rstrip()
|
||||
|
||||
if range_h == '0-':
|
||||
range_h = '0-120000'
|
||||
|
||||
parts = range_h.rstrip().split('-')
|
||||
start = parts[0]
|
||||
#start = start.split('=')[1]
|
||||
start = int(start)
|
||||
|
||||
maxlen = filelen - start
|
||||
|
||||
if len(parts) == 2 and parts[1]:
|
||||
maxlen = min(maxlen, int(parts[1]) - start + 1)
|
||||
if end:
|
||||
maxlen = min(maxlen, end - start + 1)
|
||||
|
||||
def read_range():
|
||||
with open(spec['name']) as fh:
|
||||
|
Loading…
x
Reference in New Issue
Block a user