diff --git a/pywb/webapp/live_rewrite_handler.py b/pywb/webapp/live_rewrite_handler.py index bc4a1742..03e2268f 100644 --- a/pywb/webapp/live_rewrite_handler.py +++ b/pywb/webapp/live_rewrite_handler.py @@ -35,9 +35,11 @@ class RewriteHandler(SearchPageWbUrlHandler): def __init__(self, config): super(RewriteHandler, self).__init__(config) - self.proxy = config.get('proxyhostport') + proxyhostport = config.get('proxyhostport') self.rewriter = LiveRewriter(is_framed_replay=self.is_frame_mode, - proxies=self.proxy) + proxies=proxyhostport) + + self.proxies = self.rewriter.proxies self.head_insert_view = HeadInsertView.init_from_config(config) @@ -81,7 +83,7 @@ class RewriteHandler(SearchPageWbUrlHandler): readd_range = False cache_key = None - if self.proxy: + if self.proxies: rangeres = wbrequest.extract_range() if rangeres: @@ -113,7 +115,7 @@ class RewriteHandler(SearchPageWbUrlHandler): try: content_length = int(content_length) wbresponse.status_headers.add_range(0, content_length, content_length) - except ValueError: + except (ValueError, TypeError): pass if cache_key: @@ -150,9 +152,6 @@ class RewriteHandler(SearchPageWbUrlHandler): referrer = wbrequest.env.get('REL_REFERER') def do_ping(): - proxies = {'http': self.proxy, - 'https': self.proxy} - headers = self._live_request_headers(wbrequest) headers['Connection'] = 'close' @@ -162,7 +161,7 @@ class RewriteHandler(SearchPageWbUrlHandler): resp = requests.get(url=url, headers=headers, - proxies=proxies, + proxies=self.proxies, verify=False, stream=True) @@ -170,6 +169,7 @@ class RewriteHandler(SearchPageWbUrlHandler): resp.close() except: del self._cache[key] + raise # also ping video info if referrer: @@ -183,9 +183,9 @@ class RewriteHandler(SearchPageWbUrlHandler): try: do_ping() except: - raise pass + #do_ping() wbresponse.body = wrap_buff_gen(wbresponse.body) return wbresponse @@ -204,9 +204,7 @@ class RewriteHandler(SearchPageWbUrlHandler): content_type = self.YT_DL_TYPE metadata = json.dumps(info) - if self.proxy: - proxies = {'http': self.proxy} - + if self.proxies: headers = self._live_request_headers(wbrequest) headers['Content-Type'] = content_type @@ -216,7 +214,7 @@ class RewriteHandler(SearchPageWbUrlHandler): url=info_url, data=metadata, headers=headers, - proxies=proxies, + proxies=self.proxies, verify=False) return WbResponse.text_response(metadata, content_type=content_type) diff --git a/tests/server_thread.py b/tests/server_thread.py new file mode 100644 index 00000000..e794a0a1 --- /dev/null +++ b/tests/server_thread.py @@ -0,0 +1,32 @@ +import threading + +from pywb.webapp.pywb_init import create_wb_router +from pywb.framework.wsgi_wrappers import init_app + + +class ServerThreadRunner(object): + def __init__(self, make_httpd, config_file=None): + + if config_file: + self.app = init_app(create_wb_router, + load_yaml=True, + config_file=config_file) + else: + self.app = None + + self.httpd = make_httpd(self.app) + self.port = self.httpd.socket.getsockname()[1] + + proxy_str = 'http://localhost:' + str(self.port) + self.proxy_dict = {'http': proxy_str, + 'https': proxy_str} + + def run(): + self.httpd.serve_forever() + + self.thread = threading.Thread(target=run) + self.thread.daemon = True + self.thread.start() + + def stop_thread(self): + self.httpd.shutdown() diff --git a/tests/test_live_proxy.py b/tests/test_live_proxy.py new file mode 100644 index 00000000..0ce8829e --- /dev/null +++ b/tests/test_live_proxy.py @@ -0,0 +1,179 @@ +from SocketServer import ThreadingMixIn +from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler + +from server_thread import ServerThreadRunner + +from pywb.webapp.live_rewrite_handler import create_live_rewriter_app, RewriteHandler + +from pywb.framework.wsgi_wrappers import init_app +import webtest +import shutil + + +#================================================================= +#ThreadingMixIn.deamon_threads = True + +#class ProxyServer(ThreadingMixIn, HTTPServer): +class ProxyServer(HTTPServer): + pass + + +class ProxyRequest(BaseHTTPRequestHandler): + def do_GET(self): + if self.server.force_err: + # just close connection + self.wfile.close() + return + + buff = '' + buff += self.command + ' ' + self.path + ' ' + self.request_version + '\n' + for n in self.headers: + buff += n + ': ' + self.headers[n] + '\n' + + self.server.requestlog.append(buff) + + self.send_response(200) + + self.send_header('x-proxy', 'test') + self.send_header('content-length', str(len(buff))) + self.send_header('content-type', 'text/plain') + self.end_headers() + self.wfile.write(buff) + self.wfile.close() + + def do_PUTMETA(self): + self.do_GET() + + +#================================================================= +class TestProxyLiveRewriter: + def setup(self): + self.requestlog = [] + + def make_httpd(app): + proxyserv = ProxyServer(('', 0), ProxyRequest) + proxyserv.requestlog = self.requestlog + proxyserv.force_err = False + self.proxyserv = proxyserv + return proxyserv + + self.server = ServerThreadRunner(make_httpd) + + self.app = init_app(create_live_rewriter_app, load_yaml=False, + config=dict(framed_replay=True, + proxyhostport=self.server.proxy_dict)) + + print(self.server.proxy_dict) + self.testapp = webtest.TestApp(self.app) + + def teardown(self): + self.server.stop_thread() + + def test_echo_proxy_referrer(self): + headers = [('User-Agent', 'python'), ('Referer', 'http://localhost:80/rewrite/other.example.com')] + resp = self.testapp.get('/rewrite/http://example.com/', headers=headers) + + # ensure just one request + assert len(self.requestlog) == 1 + + # equal to returned response (echo) + assert self.requestlog[0] == resp.body + assert resp.headers['x-archive-orig-x-proxy'] == 'test' + + assert resp.body.startswith('GET http://example.com/ HTTP/1.1') + assert 'referer: http://other.example.com' in resp.body + + def test_echo_proxy_start_unbounded_remove_range(self): + headers = [('Range', 'bytes=0-')] + resp = self.testapp.get('/rewrite/http://example.com/', headers=headers) + + # actual response is with range + assert resp.status_int == 206 + assert 'Content-Range' in resp.headers + assert resp.headers['Accept-Ranges'] == 'bytes' + + assert len(self.requestlog) == 1 + + # proxied, but without range + assert self.requestlog[0] == resp.body + assert resp.headers['x-archive-orig-x-proxy'] == 'test' + + assert self.requestlog[0].startswith('GET http://example.com/ HTTP/1.1') + assert 'range: ' not in self.requestlog[0] + + def test_echo_proxy_bounded_noproxy_range(self): + headers = [('Range', 'bytes=10-1000')] + resp = self.testapp.get('/rewrite/http://example.com/foobar', headers=headers) + + # actual response is with range + assert resp.status_int == 206 + assert 'Content-Range' in resp.headers + assert resp.headers['Accept-Ranges'] == 'bytes' + + # not from proxy + assert 'x-archive-orig-x-proxy' not in resp.headers + + # proxy receives a request also, but w/o range + assert len(self.requestlog) == 1 + + # proxy receives different request than our response + assert self.requestlog[0] != resp.body + + assert self.requestlog[0].startswith('GET http://example.com/foobar HTTP/1.1') + + # no range request + assert 'range: ' not in self.requestlog[0] + + # Second Request + # clear log + self.requestlog.pop() + headers = [('Range', 'bytes=1001-1500')] + resp = self.testapp.get('/rewrite/http://example.com/foobar', headers=headers) + + # actual response is with range + assert resp.status_int == 206 + assert 'Content-Range' in resp.headers + assert resp.headers['Accept-Ranges'] == 'bytes' + + # not from proxy + assert 'x-archive-orig-x-proxy' not in resp.headers + + # already pinged proxy, no additional requests set to proxy + assert len(self.requestlog) == 0 + + def test_echo_proxy_video_info(self): + resp = self.testapp.get('/rewrite/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M') + assert resp.status_int == 200 + assert resp.content_type == RewriteHandler.YT_DL_TYPE, resp.content_type + + assert len(self.requestlog) == 1 + assert self.requestlog[0].startswith('PUTMETA http://www.youtube.com/watch?v=DjFZyFWSt1M HTTP/1.1') + + def test_echo_proxy_video_with_referrer(self): + headers = [('Range', 'bytes=1000-2000'), ('Referer', 'http://localhost:80/rewrite/https://example.com/')] + resp = self.testapp.get('/rewrite/http://www.youtube.com/watch?v=DjFZyFWSt1M', headers=headers) + + # not from proxy + assert 'x-archive-orig-x-proxy' not in resp.headers + + # proxy receives two requests + assert len(self.requestlog) == 2 + + # first, non-ranged request for page + assert self.requestlog[0].startswith('GET http://www.youtube.com/watch?v=DjFZyFWSt1M HTTP/1.1') + assert 'range' not in self.requestlog[0] + + # also a video info request recording the page + assert self.requestlog[1].startswith('PUTMETA http://example.com/ HTTP/1.1') + + def test_echo_proxy_error(self): + headers = [('Range', 'bytes=1000-2000'), ('Referer', 'http://localhost:80/rewrite/https://example.com/')] + + self.proxyserv.force_err = True + resp = self.testapp.get('/rewrite/http://www.youtube.com/watch?v=DjFZyFWSt1M', headers=headers) + + # not from proxy + assert 'x-archive-orig-x-proxy' not in resp.headers + + # no proxy requests as we're forcing exception + assert len(self.requestlog) == 0 diff --git a/tests/test_live_rewriter.py b/tests/test_live_rewriter.py index 104c55c7..73b9559b 100644 --- a/tests/test_live_rewriter.py +++ b/tests/test_live_rewriter.py @@ -1,4 +1,4 @@ -from pywb.webapp.live_rewrite_handler import create_live_rewriter_app +from pywb.webapp.live_rewrite_handler import create_live_rewriter_app, RewriteHandler from pywb.framework.wsgi_wrappers import init_app import webtest @@ -41,6 +41,4 @@ class TestLiveRewriter: def test_live_video_info(self): resp = self.testapp.get('/rewrite/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M') assert resp.status_int == 200 - assert resp.content_type == 'application/vnd.youtube-dl_formats+json', resp.content_type - - + assert resp.content_type == RewriteHandler.YT_DL_TYPE, resp.content_type diff --git a/tests/test_proxy_http_cookie.py b/tests/test_proxy_http_cookie.py index a79ef308..16138d01 100644 --- a/tests/test_proxy_http_cookie.py +++ b/tests/test_proxy_http_cookie.py @@ -1,57 +1,31 @@ -from pywb.webapp.pywb_init import create_wb_router -from pywb.framework.wsgi_wrappers import init_app - from wsgiref.simple_server import make_server -from pywb.framework.proxy_resolvers import CookieResolver - -import threading import requests -import shutil -import sys -import os +from server_thread import ServerThreadRunner +#================================================================= TEST_CONFIG = 'tests/test_config_proxy_http_cookie.yaml' server = None sesh_key = None + +#================================================================= +# Inited once per module def setup_module(): + def make_httpd(app): + return make_server('', 0, app) + global server - server = ServeThread() - server.daemon = True - server.start() - - global session - session = requests.Session() - + server = ServerThreadRunner(make_httpd, TEST_CONFIG) def teardown_module(): - try: - server.httpd.shutdown() - threading.current_thread().join(server) - except Exception: - pass - -class ServeThread(threading.Thread): - def __init__(self, *args, **kwargs): - super(ServeThread, self).__init__(*args, **kwargs) - self.app = init_app(create_wb_router, - load_yaml=True, - config_file=TEST_CONFIG) - - # init with port 0 to allow os to pick a port - self.httpd = make_server('', 0, self.app) - port = self.httpd.socket.getsockname()[1] - - proxy_str = 'http://localhost:' + str(port) - self.proxy_dict = {'http': proxy_str} - - def run(self, *args, **kwargs): - self.httpd.serve_forever() + global server + server.stop_thread() +#================================================================= class TestProxyHttpCookie: def setup(self): self.session = requests.Session() diff --git a/tests/test_proxy_https_cookie.py b/tests/test_proxy_https_cookie.py index bfd1d398..97a23a3f 100644 --- a/tests/test_proxy_https_cookie.py +++ b/tests/test_proxy_https_cookie.py @@ -1,69 +1,45 @@ import pytest -from pywb.webapp.pywb_init import create_wb_router -from pywb.framework.wsgi_wrappers import init_app - +from server_thread import ServerThreadRunner from wsgiref.simple_server import make_server -from pywb.framework.proxy_resolvers import CookieResolver - -import threading import requests import shutil import sys import os -TEST_CONFIG = 'tests/test_config_proxy_https_cookie.yaml' - +#================================================================= TEST_CA_DIR = './tests/pywb_test_certs' TEST_CA_ROOT = './tests/pywb_test_ca.pem' +TEST_CONFIG = 'tests/test_config_proxy_https_cookie.yaml' + server = None sesh_key = None + +#================================================================= +# Inited once per module def setup_module(): openssl_support = pytest.importorskip("OpenSSL") + def make_httpd(app): + return make_server('', 0, app) + global server - server = ServeThread() - server.daemon = True - server.start() - - global session - session = requests.Session() - + server = ServerThreadRunner(make_httpd, TEST_CONFIG) def teardown_module(): - try: - server.httpd.shutdown() - threading.current_thread().join(server) - except Exception: - pass + global server + server.stop_thread() # delete test root and certs shutil.rmtree(TEST_CA_DIR) os.remove(TEST_CA_ROOT) -class ServeThread(threading.Thread): - def __init__(self, *args, **kwargs): - super(ServeThread, self).__init__(*args, **kwargs) - self.app = init_app(create_wb_router, - load_yaml=True, - config_file=TEST_CONFIG) - - # init with port 0 to allow os to pick a port - self.httpd = make_server('', 0, self.app) - port = self.httpd.socket.getsockname()[1] - - proxy_str = 'http://localhost:' + str(port) - self.proxy_dict = {'http': proxy_str, 'https': proxy_str} - - def run(self, *args, **kwargs): - self.httpd.serve_forever() - - +#================================================================= class TestProxyHttpsCookie: def setup(self): self.session = requests.Session()