mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
refactor: live rewrite handler uses new range functions in
wbrequest/response, also add 'remove range' for 0- unbounded requests..
This commit is contained in:
parent
07e46f4b6c
commit
68e7a70777
@ -1,6 +1,7 @@
|
|||||||
from pywb.framework.basehandlers import WbUrlHandler
|
from pywb.framework.basehandlers import WbUrlHandler
|
||||||
from pywb.framework.wbrequestresponse import WbResponse
|
from pywb.framework.wbrequestresponse import WbResponse
|
||||||
from pywb.framework.archivalrouter import ArchivalRouter, Route
|
from pywb.framework.archivalrouter import ArchivalRouter, Route
|
||||||
|
from pywb.framework.cache import create_cache
|
||||||
|
|
||||||
from pywb.rewrite.rewrite_live import LiveRewriter
|
from pywb.rewrite.rewrite_live import LiveRewriter
|
||||||
from pywb.rewrite.wburl import WbUrl
|
from pywb.rewrite.wburl import WbUrl
|
||||||
@ -15,8 +16,6 @@ import json
|
|||||||
import requests
|
import requests
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
from rangecache import range_cache
|
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class LiveResourceException(WbException):
|
class LiveResourceException(WbException):
|
||||||
@ -42,8 +41,12 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
|||||||
|
|
||||||
self.live_cookie = config.get('live-cookie', self.LIVE_COOKIE)
|
self.live_cookie = config.get('live-cookie', self.LIVE_COOKIE)
|
||||||
|
|
||||||
|
self.no_proxy_range = config.get('no_proxy_range', True)
|
||||||
|
|
||||||
self.ydl = None
|
self.ydl = None
|
||||||
|
|
||||||
|
self._cache = None
|
||||||
|
|
||||||
def handle_request(self, wbrequest):
|
def handle_request(self, wbrequest):
|
||||||
try:
|
try:
|
||||||
return self.render_content(wbrequest)
|
return self.render_content(wbrequest)
|
||||||
@ -72,33 +75,29 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
|||||||
wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url
|
wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url
|
||||||
|
|
||||||
proxies = None # default
|
proxies = None # default
|
||||||
ping_url = None
|
use_206 = False
|
||||||
ping_cache_key = None
|
url = None
|
||||||
ping_range_header = None
|
|
||||||
|
readd_range = False
|
||||||
|
cache_key = None
|
||||||
|
|
||||||
|
if self.default_proxy and self.no_proxy_range:
|
||||||
|
rangeres = wbrequest.extract_range()
|
||||||
|
|
||||||
if self.default_proxy and range_cache:
|
|
||||||
rangeres = range_cache.is_ranged(wbrequest)
|
|
||||||
if rangeres:
|
if rangeres:
|
||||||
url, start, end, use_206 = rangeres
|
url, start, end, use_206 = rangeres
|
||||||
proxies = False
|
|
||||||
|
|
||||||
# force a bound on unbounded range, if specified
|
# if bytes=0- Range request, simply remove the range and still proxy
|
||||||
if use_206 and end and wbrequest.env['HTTP_RANGE'].endswith('-'):
|
if start == 0 and not end and use_206:
|
||||||
range_h = 'bytes={0}-{1}'.format(start, end)
|
wbrequest.wb_url.url = url
|
||||||
wbrequest.env['HTTP_RANGE'] = range_h
|
del wbrequest.env['HTTP_RANGE']
|
||||||
|
readd_range = True
|
||||||
|
else:
|
||||||
|
# disables proxy
|
||||||
|
proxies = False
|
||||||
|
|
||||||
hash_ = hashlib.md5()
|
# sets cache_key only if not already cached
|
||||||
hash_.update(url)
|
cache_key = self._check_url_cache(url)
|
||||||
ping_cache_key = hash_.hexdigest()
|
|
||||||
|
|
||||||
if ping_cache_key not in range_cache.cache:
|
|
||||||
ping_url = url
|
|
||||||
|
|
||||||
# if non-206, (eg. youtube) generate a videoinfo page
|
|
||||||
if not use_206 and ref_wburl_str:
|
|
||||||
resp = self.get_video_info(wbrequest,
|
|
||||||
info_url=wbrequest.env['REL_REFERER'],
|
|
||||||
video_url=url)
|
|
||||||
|
|
||||||
result = self.rewriter.fetch_request(wbrequest.wb_url.url,
|
result = self.rewriter.fetch_request(wbrequest.wb_url.url,
|
||||||
wbrequest.urlrewriter,
|
wbrequest.urlrewriter,
|
||||||
@ -109,9 +108,16 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
|||||||
|
|
||||||
wbresponse = self._make_response(wbrequest, *result)
|
wbresponse = self._make_response(wbrequest, *result)
|
||||||
|
|
||||||
if ping_url:
|
if readd_range:
|
||||||
self._proxy_ping(wbrequest, wbresponse,
|
content_length = wbresponse.status_headers.get_header('Content-Length')
|
||||||
ping_url, ping_cache_key)
|
try:
|
||||||
|
content_length = int(content_length)
|
||||||
|
wbresponse.add_range(0, content_length, content_length)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if cache_key:
|
||||||
|
self._add_proxy_ping(cache_key, url, wbrequest, wbresponse)
|
||||||
|
|
||||||
return wbresponse
|
return wbresponse
|
||||||
|
|
||||||
@ -127,20 +133,32 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
|||||||
|
|
||||||
return WbResponse(status_headers, gen)
|
return WbResponse(status_headers, gen)
|
||||||
|
|
||||||
def _proxy_ping(self, wbrequest, wbresponse, url, key):
|
def _check_url_cache(self, url):
|
||||||
def do_proxy_ping():
|
if not self._cache:
|
||||||
|
self._cache = create_cache()
|
||||||
|
|
||||||
|
hash_ = hashlib.md5()
|
||||||
|
hash_.update(url)
|
||||||
|
key = hash_.hexdigest()
|
||||||
|
|
||||||
|
if key in self._cache:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return key
|
||||||
|
|
||||||
|
def _add_proxy_ping(self, key, url, wbrequest, wbresponse):
|
||||||
|
referrer = wbrequest.env.get('REL_REFERER')
|
||||||
|
|
||||||
|
def do_ping():
|
||||||
proxies = {'http': self.default_proxy,
|
proxies = {'http': self.default_proxy,
|
||||||
'https': self.default_proxy}
|
'https': self.default_proxy}
|
||||||
|
|
||||||
headers = self._live_request_headers(wbrequest)
|
headers = self._live_request_headers(wbrequest)
|
||||||
headers['Connection'] = 'close'
|
headers['Connection'] = 'close'
|
||||||
|
|
||||||
if key in range_cache.cache:
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# mark as pinged
|
# mark as pinged
|
||||||
range_cache.cache[key] = '1'
|
self._cache[key] = '1'
|
||||||
|
|
||||||
resp = requests.get(url=url,
|
resp = requests.get(url=url,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
@ -151,15 +169,24 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
|||||||
# don't actually read whole response, proxy response for writing it
|
# don't actually read whole response, proxy response for writing it
|
||||||
resp.close()
|
resp.close()
|
||||||
except:
|
except:
|
||||||
del range_cache.cache[key]
|
del self._cache[key]
|
||||||
|
|
||||||
def check_buff_gen(gen):
|
# also ping video info
|
||||||
|
if referrer:
|
||||||
|
resp = self.get_video_info(wbrequest,
|
||||||
|
info_url=referrer,
|
||||||
|
video_url=url)
|
||||||
|
def wrap_buff_gen(gen):
|
||||||
for x in gen:
|
for x in gen:
|
||||||
yield x
|
yield x
|
||||||
|
|
||||||
do_proxy_ping()
|
try:
|
||||||
|
do_ping()
|
||||||
|
except:
|
||||||
|
raise
|
||||||
|
pass
|
||||||
|
|
||||||
wbresponse.body = check_buff_gen(wbresponse.body)
|
wbresponse.body = wrap_buff_gen(wbresponse.body)
|
||||||
return wbresponse
|
return wbresponse
|
||||||
|
|
||||||
def get_video_info(self, wbrequest, info_url=None, video_url=None):
|
def get_video_info(self, wbrequest, info_url=None, video_url=None):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user