mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
rangecache: always bound range, set default bound of 16384
wombat: work on date override, disable for now head_insert: check for wombat not being inited to avoid undef error
This commit is contained in:
parent
88f553dce7
commit
c6a2c83b66
@ -138,8 +138,8 @@ rules:
|
|||||||
|
|
||||||
- url_prefix: 'com,googlevideo,'
|
- url_prefix: 'com,googlevideo,'
|
||||||
|
|
||||||
#fuzzy_lookup: 'com,googlevideo.*/videoplayback?.*(id=[^&]).*(mime=[^&]).*(signature=[^&])'
|
fuzzy_lookup: 'com,googlevideo.*/videoplayback?.*(id=[^&]).*(mime=[^&]).*(signature=[^&])'
|
||||||
fuzzy_lookup: 'com,googlevideo.*/videoplayback?.*(id=[^&]).*(mime=[^&])'
|
#fuzzy_lookup: 'com,googlevideo.*/videoplayback?.*(id=[^&]).*(mime=[^&])'
|
||||||
|
|
||||||
|
|
||||||
# testing rules -- not for valid domain
|
# testing rules -- not for valid domain
|
||||||
|
@ -20,7 +20,7 @@ This file is part of pywb, https://github.com/ikreymer/pywb
|
|||||||
//============================================
|
//============================================
|
||||||
// Wombat JS-Rewriting Library v2.1
|
// Wombat JS-Rewriting Library v2.1
|
||||||
//============================================
|
//============================================
|
||||||
window._WBWombat = (function() {
|
_WBWombat = (function() {
|
||||||
|
|
||||||
// Globals
|
// Globals
|
||||||
var wb_replay_prefix;
|
var wb_replay_prefix;
|
||||||
@ -511,6 +511,7 @@ window._WBWombat = (function() {
|
|||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
function init_image_override() {
|
function init_image_override() {
|
||||||
|
window.__Image = window.Image;
|
||||||
window.Image = function (Image) {
|
window.Image = function (Image) {
|
||||||
return function (width, height) {
|
return function (width, height) {
|
||||||
var image = new Image(width, height);
|
var image = new Image(width, height);
|
||||||
@ -522,16 +523,40 @@ window._WBWombat = (function() {
|
|||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
function init_date_override(timestamp) {
|
function init_date_override(timestamp) {
|
||||||
|
timestamp = parseInt(timestamp) * 1000;
|
||||||
|
var timediff = Date.now() - timestamp;
|
||||||
|
|
||||||
|
window.__Date = window.Date;
|
||||||
|
window.__Date_now = window.Date.now;
|
||||||
|
|
||||||
window.Date = function (Date) {
|
window.Date = function (Date) {
|
||||||
return function (A, B, C, D, E, F, G) {
|
return function (A, B, C, D, E, F, G) {
|
||||||
if (arguments.length == 0) {
|
// Apply doesn't work for constructors and Date doesn't
|
||||||
timestamp = parseInt(timestamp) * 1000;
|
// seem to like undefined args, so must explicitly
|
||||||
return new Date(timestamp);
|
// call constructor for each possible args 0..7
|
||||||
|
if (A === undefined) {
|
||||||
|
return new Date(window.Date.now());
|
||||||
|
} else if (B === undefined) {
|
||||||
|
return new Date(A);
|
||||||
|
} else if (C === undefined) {
|
||||||
|
return new Date(A, B);
|
||||||
|
} else if (D === undefined) {
|
||||||
|
return new Date(A, B, C);
|
||||||
|
} else if (E === undefined) {
|
||||||
|
return new Date(A, B, C, D);
|
||||||
|
} else if (F === undefined) {
|
||||||
|
return new Date(A, B, C, D, E);
|
||||||
|
} else if (G === undefined) {
|
||||||
|
return new Date(A, B, C, D, E, F);
|
||||||
} else {
|
} else {
|
||||||
return new Date(A, B, C, D, E, F, G);
|
return new Date(A, B, C, D, E, F, G);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}(window.Date);
|
}(window.Date);
|
||||||
|
|
||||||
|
window.Date.now = function() {
|
||||||
|
return __Date_now() - timediff;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//============================================
|
//============================================
|
||||||
@ -874,7 +899,7 @@ window._WBWombat = (function() {
|
|||||||
init_seeded_random(timestamp);
|
init_seeded_random(timestamp);
|
||||||
|
|
||||||
// Date
|
// Date
|
||||||
init_date_override(timestamp);
|
// init_date_override(timestamp);
|
||||||
|
|
||||||
// expose functions
|
// expose functions
|
||||||
this.extract_orig = extract_orig;
|
this.extract_orig = extract_orig;
|
||||||
@ -882,4 +907,6 @@ window._WBWombat = (function() {
|
|||||||
|
|
||||||
return wombat_init;
|
return wombat_init;
|
||||||
|
|
||||||
})(this);
|
})();
|
||||||
|
|
||||||
|
window._WBWombat = _WBWombat;
|
||||||
|
@ -3,12 +3,14 @@
|
|||||||
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wombat.js'> </script>
|
<script src='{{ wbrequest.host_prefix }}/{{ static_path }}/wombat.js'> </script>
|
||||||
<script>
|
<script>
|
||||||
{% set urlsplit = cdx.original | urlsplit %}
|
{% set urlsplit = cdx.original | urlsplit %}
|
||||||
|
if (window && window._WBWombat) {
|
||||||
var _wb_wombat = new _WBWombat("{{ wbrequest.wb_prefix}}",
|
var _wb_wombat = new _WBWombat("{{ wbrequest.wb_prefix}}",
|
||||||
"{{ cdx['timestamp'] if include_ts else ''}}",
|
"{{ cdx['timestamp'] if include_ts else ''}}",
|
||||||
"{{ urlsplit.scheme }}",
|
"{{ urlsplit.scheme }}",
|
||||||
"{{ urlsplit.netloc }}",
|
"{{ urlsplit.netloc }}",
|
||||||
"{{ cdx.timestamp | format_ts('%s') }}",
|
"{{ cdx.timestamp | format_ts('%s') }}",
|
||||||
"{{ wbrequest.wb_url.mod }}");
|
"{{ wbrequest.wb_url.mod }}");
|
||||||
|
}
|
||||||
</script>
|
</script>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<script>
|
<script>
|
||||||
|
@ -74,18 +74,27 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
|||||||
proxies = None # default
|
proxies = None # default
|
||||||
ping_url = None
|
ping_url = None
|
||||||
ping_cache_key = None
|
ping_cache_key = None
|
||||||
|
ping_range_header = None
|
||||||
|
|
||||||
if self.default_proxy and range_cache:
|
if self.default_proxy and range_cache:
|
||||||
rangeres = range_cache.is_ranged(wbrequest)
|
rangeres = range_cache.is_ranged(wbrequest)
|
||||||
if rangeres:
|
if rangeres:
|
||||||
|
url, start, end, use_206 = rangeres
|
||||||
proxies = False
|
proxies = False
|
||||||
|
|
||||||
|
# force a bound on unbounded range
|
||||||
|
if use_206 and wbrequest.env['HTTP_RANGE'].endswith('-'):
|
||||||
|
range_h = 'bytes={0}-{1}'.format(start, end)
|
||||||
|
wbrequest.env['HTTP_RANGE'] = range_h
|
||||||
|
print('BOUNDING: ' + range_h)
|
||||||
|
|
||||||
hash_ = hashlib.md5()
|
hash_ = hashlib.md5()
|
||||||
hash_.update(rangeres[0])
|
hash_.update(url)
|
||||||
ping_cache_key = hash_.hexdigest()
|
ping_cache_key = hash_.hexdigest()
|
||||||
|
|
||||||
if ping_cache_key not in range_cache.cache:
|
if ping_cache_key not in range_cache.cache:
|
||||||
ping_url = rangeres[0]
|
ping_url = url
|
||||||
|
|
||||||
|
|
||||||
result = self.rewriter.fetch_request(wbrequest.wb_url.url,
|
result = self.rewriter.fetch_request(wbrequest.wb_url.url,
|
||||||
wbrequest.urlrewriter,
|
wbrequest.urlrewriter,
|
||||||
@ -120,21 +129,25 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
|||||||
'https': self.default_proxy}
|
'https': self.default_proxy}
|
||||||
|
|
||||||
headers = self._live_request_headers(wbrequest)
|
headers = self._live_request_headers(wbrequest)
|
||||||
print('PINGING PROXY: ' + url)
|
headers['Connection'] = 'close'
|
||||||
resp = requests.get(url=url,
|
|
||||||
headers=headers,
|
|
||||||
proxies=proxies,
|
|
||||||
verify=False,
|
|
||||||
stream=True)
|
|
||||||
|
|
||||||
# don't actually read whole response, proxy response for writing it
|
if key in range_cache.cache:
|
||||||
resp.raw.close()
|
return
|
||||||
resp.close()
|
|
||||||
|
|
||||||
# mark as pinged
|
try:
|
||||||
range_cache.cache[key] = '1'
|
# mark as pinged
|
||||||
|
range_cache.cache[key] = '1'
|
||||||
|
|
||||||
return None
|
resp = requests.get(url=url,
|
||||||
|
headers=headers,
|
||||||
|
proxies=proxies,
|
||||||
|
verify=False,
|
||||||
|
stream=True)
|
||||||
|
|
||||||
|
# don't actually read whole response, proxy response for writing it
|
||||||
|
resp.close()
|
||||||
|
except:
|
||||||
|
del range_cache.cache[key]
|
||||||
|
|
||||||
def check_buff_gen(gen):
|
def check_buff_gen(gen):
|
||||||
for x in gen:
|
for x in gen:
|
||||||
|
@ -14,6 +14,8 @@ class RangeCache(object):
|
|||||||
YOUTUBE_RX = re.compile('.*.googlevideo.com/videoplayback')
|
YOUTUBE_RX = re.compile('.*.googlevideo.com/videoplayback')
|
||||||
YT_EXTRACT_RX = re.compile('&range=([^&]+)')
|
YT_EXTRACT_RX = re.compile('&range=([^&]+)')
|
||||||
|
|
||||||
|
DEFAULT_BUFF = 16384
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def match_yt(url):
|
def match_yt(url):
|
||||||
if not RangeCache.YOUTUBE_RX.match(url):
|
if not RangeCache.YOUTUBE_RX.match(url):
|
||||||
@ -51,7 +53,17 @@ class RangeCache(object):
|
|||||||
|
|
||||||
use_206 = True
|
use_206 = True
|
||||||
|
|
||||||
return url, range_h, use_206
|
# force bounded range
|
||||||
|
range_h = range_h.split('=')[-1]
|
||||||
|
range_h = range_h.rstrip()
|
||||||
|
parts = range_h.split('-', 1)
|
||||||
|
start = int(parts[0])
|
||||||
|
if len(parts) == 2 and parts[1]:
|
||||||
|
end = int(parts[1])
|
||||||
|
else:
|
||||||
|
end = start + self.DEFAULT_BUFF - 1
|
||||||
|
|
||||||
|
return url, start, end, use_206
|
||||||
|
|
||||||
def __call__(self, wbrequest, digest, wbresponse_func):
|
def __call__(self, wbrequest, digest, wbresponse_func):
|
||||||
result = self.is_ranged(wbrequest)
|
result = self.is_ranged(wbrequest)
|
||||||
@ -62,9 +74,8 @@ class RangeCache(object):
|
|||||||
*result)
|
*result)
|
||||||
|
|
||||||
def handle_range(self, wbrequest, digest, wbresponse_func,
|
def handle_range(self, wbrequest, digest, wbresponse_func,
|
||||||
url, range_h, use_206):
|
url, start, end, use_206):
|
||||||
|
|
||||||
range_h = range_h.split('=')[-1]
|
|
||||||
key = digest
|
key = digest
|
||||||
if not key in self.cache:
|
if not key in self.cache:
|
||||||
response = wbresponse_func()
|
response = wbresponse_func()
|
||||||
@ -90,20 +101,10 @@ class RangeCache(object):
|
|||||||
|
|
||||||
filelen = os.path.getsize(spec['name'])
|
filelen = os.path.getsize(spec['name'])
|
||||||
|
|
||||||
range_h = range_h.rstrip()
|
|
||||||
|
|
||||||
if range_h == '0-':
|
|
||||||
range_h = '0-120000'
|
|
||||||
|
|
||||||
parts = range_h.rstrip().split('-')
|
|
||||||
start = parts[0]
|
|
||||||
#start = start.split('=')[1]
|
|
||||||
start = int(start)
|
|
||||||
|
|
||||||
maxlen = filelen - start
|
maxlen = filelen - start
|
||||||
|
|
||||||
if len(parts) == 2 and parts[1]:
|
if end:
|
||||||
maxlen = min(maxlen, int(parts[1]) - start + 1)
|
maxlen = min(maxlen, end - start + 1)
|
||||||
|
|
||||||
def read_range():
|
def read_range():
|
||||||
with open(spec['name']) as fh:
|
with open(spec['name']) as fh:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user