2016-02-23 13:26:53 -08:00
|
|
|
from six.moves.socketserver import ThreadingMixIn
|
|
|
|
from six.moves.BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
|
2014-12-23 11:07:47 -08:00
|
|
|
|
2016-02-23 13:26:53 -08:00
|
|
|
from .server_thread import ServerThreadRunner
|
2014-12-23 11:07:47 -08:00
|
|
|
|
2015-04-03 10:13:27 -07:00
|
|
|
from pywb.webapp.live_rewrite_handler import RewriteHandler
|
|
|
|
from pywb.webapp.pywb_init import create_wb_router
|
2014-12-23 11:07:47 -08:00
|
|
|
|
|
|
|
from pywb.framework.wsgi_wrappers import init_app
|
|
|
|
import webtest
|
|
|
|
import shutil
|
|
|
|
|
2015-10-23 12:19:15 -07:00
|
|
|
import pywb.rewrite.rewrite_live
|
2014-12-26 13:02:47 -08:00
|
|
|
|
2014-12-23 11:07:47 -08:00
|
|
|
|
|
|
|
#=================================================================
|
|
|
|
|
|
|
|
#class ProxyServer(ThreadingMixIn, HTTPServer):
|
|
|
|
class ProxyServer(HTTPServer):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
class ProxyRequest(BaseHTTPRequestHandler):
|
|
|
|
def do_GET(self):
|
|
|
|
if self.server.force_err:
|
|
|
|
# just close connection
|
|
|
|
self.wfile.close()
|
|
|
|
return
|
|
|
|
|
|
|
|
buff = ''
|
|
|
|
buff += self.command + ' ' + self.path + ' ' + self.request_version + '\n'
|
|
|
|
for n in self.headers:
|
|
|
|
buff += n + ': ' + self.headers[n] + '\n'
|
|
|
|
|
|
|
|
self.server.requestlog.append(buff)
|
|
|
|
|
|
|
|
self.send_response(200)
|
|
|
|
|
|
|
|
self.send_header('x-proxy', 'test')
|
|
|
|
self.send_header('content-length', str(len(buff)))
|
2016-02-23 13:26:53 -08:00
|
|
|
self.send_header('content-type', 'text/plain; charset=utf-8')
|
2014-12-23 11:07:47 -08:00
|
|
|
self.end_headers()
|
2016-02-23 13:26:53 -08:00
|
|
|
self.wfile.write(buff.encode('utf-8'))
|
2014-12-23 11:07:47 -08:00
|
|
|
self.wfile.close()
|
|
|
|
|
|
|
|
def do_PUTMETA(self):
|
|
|
|
self.do_GET()
|
|
|
|
|
|
|
|
|
2015-06-27 20:43:15 -07:00
|
|
|
#=================================================================
|
|
|
|
class MockYTDWrapper(object):
|
|
|
|
def extract_info(self, url):
|
|
|
|
return {'mock': 'youtube_dl_data'}
|
|
|
|
|
|
|
|
|
2015-10-23 12:19:15 -07:00
|
|
|
pywb.rewrite.rewrite_live.youtubedl = MockYTDWrapper()
|
2015-06-27 20:43:15 -07:00
|
|
|
|
2015-10-14 20:34:46 -07:00
|
|
|
|
2014-12-23 11:07:47 -08:00
|
|
|
#=================================================================
|
2015-10-14 20:34:46 -07:00
|
|
|
def setup_module():
|
|
|
|
global requestlog
|
|
|
|
requestlog = []
|
|
|
|
|
|
|
|
def make_httpd(app):
|
|
|
|
global proxyserv
|
|
|
|
proxyserv = ProxyServer(('', 0), ProxyRequest)
|
|
|
|
proxyserv.requestlog = requestlog
|
|
|
|
proxyserv.force_err = False
|
|
|
|
return proxyserv
|
|
|
|
|
|
|
|
global server
|
|
|
|
server = ServerThreadRunner(make_httpd)
|
|
|
|
|
|
|
|
config = dict(collections=dict(rewrite='$liveweb'),
|
|
|
|
framed_replay=True,
|
2015-10-23 11:54:50 -07:00
|
|
|
proxyhostport=server.proxy_str)
|
2014-12-23 11:07:47 -08:00
|
|
|
|
2015-10-14 20:34:46 -07:00
|
|
|
global cache
|
|
|
|
cache = {}
|
2014-12-23 11:07:47 -08:00
|
|
|
|
2015-10-14 20:34:46 -07:00
|
|
|
def create_cache():
|
|
|
|
return cache
|
2014-12-23 11:07:47 -08:00
|
|
|
|
2015-10-14 20:34:46 -07:00
|
|
|
pywb.webapp.live_rewrite_handler.create_cache = create_cache
|
2015-04-03 10:13:27 -07:00
|
|
|
|
2015-10-14 20:34:46 -07:00
|
|
|
global app
|
|
|
|
app = init_app(create_wb_router,
|
|
|
|
load_yaml=False,
|
|
|
|
config=config)
|
2014-12-23 11:07:47 -08:00
|
|
|
|
2015-10-14 20:34:46 -07:00
|
|
|
global testapp
|
|
|
|
testapp = webtest.TestApp(app)
|
2014-12-26 13:02:47 -08:00
|
|
|
|
|
|
|
|
2015-10-14 20:34:46 -07:00
|
|
|
def teardown_module(self):
|
|
|
|
server.stop_thread()
|
|
|
|
|
|
|
|
#=================================================================
|
|
|
|
class TestProxyLiveRewriter:
|
|
|
|
def setup(self):
|
|
|
|
self.requestlog = requestlog
|
|
|
|
del self.requestlog[:]
|
|
|
|
|
|
|
|
self.cache = cache
|
|
|
|
self.cache.clear()
|
2014-12-23 11:07:47 -08:00
|
|
|
|
2015-10-14 20:34:46 -07:00
|
|
|
self.app = app
|
|
|
|
self.testapp = testapp
|
2014-12-23 11:07:47 -08:00
|
|
|
|
|
|
|
def test_echo_proxy_referrer(self):
|
|
|
|
headers = [('User-Agent', 'python'), ('Referer', 'http://localhost:80/rewrite/other.example.com')]
|
|
|
|
resp = self.testapp.get('/rewrite/http://example.com/', headers=headers)
|
|
|
|
|
|
|
|
# ensure just one request
|
|
|
|
assert len(self.requestlog) == 1
|
|
|
|
|
|
|
|
# equal to returned response (echo)
|
2016-02-23 13:26:53 -08:00
|
|
|
assert self.requestlog[0] == resp.text
|
2014-12-23 11:07:47 -08:00
|
|
|
assert resp.headers['x-archive-orig-x-proxy'] == 'test'
|
|
|
|
|
2016-02-23 13:26:53 -08:00
|
|
|
assert resp.text.startswith('GET http://example.com/ HTTP/1.1')
|
|
|
|
assert 'referer: http://other.example.com' in resp.text.lower()
|
2014-12-23 11:07:47 -08:00
|
|
|
|
2014-12-26 13:02:47 -08:00
|
|
|
assert len(self.cache) == 0
|
|
|
|
|
2014-12-23 11:07:47 -08:00
|
|
|
def test_echo_proxy_start_unbounded_remove_range(self):
|
|
|
|
headers = [('Range', 'bytes=0-')]
|
|
|
|
resp = self.testapp.get('/rewrite/http://example.com/', headers=headers)
|
|
|
|
|
|
|
|
# actual response is with range
|
|
|
|
assert resp.status_int == 206
|
|
|
|
assert 'Content-Range' in resp.headers
|
|
|
|
assert resp.headers['Accept-Ranges'] == 'bytes'
|
|
|
|
|
|
|
|
assert len(self.requestlog) == 1
|
|
|
|
|
|
|
|
# proxied, but without range
|
2016-02-23 13:26:53 -08:00
|
|
|
assert self.requestlog[0] == resp.text
|
2014-12-23 11:07:47 -08:00
|
|
|
assert resp.headers['x-archive-orig-x-proxy'] == 'test'
|
|
|
|
|
|
|
|
assert self.requestlog[0].startswith('GET http://example.com/ HTTP/1.1')
|
|
|
|
assert 'range: ' not in self.requestlog[0]
|
|
|
|
|
2014-12-26 13:02:47 -08:00
|
|
|
assert len(self.cache) == 0
|
|
|
|
|
2014-12-23 11:07:47 -08:00
|
|
|
def test_echo_proxy_bounded_noproxy_range(self):
|
|
|
|
headers = [('Range', 'bytes=10-1000')]
|
|
|
|
resp = self.testapp.get('/rewrite/http://example.com/foobar', headers=headers)
|
|
|
|
|
|
|
|
# actual response is with range
|
|
|
|
assert resp.status_int == 206
|
|
|
|
assert 'Content-Range' in resp.headers
|
|
|
|
assert resp.headers['Accept-Ranges'] == 'bytes'
|
|
|
|
|
|
|
|
# not from proxy
|
|
|
|
assert 'x-archive-orig-x-proxy' not in resp.headers
|
|
|
|
|
|
|
|
# proxy receives a request also, but w/o range
|
|
|
|
assert len(self.requestlog) == 1
|
|
|
|
|
|
|
|
# proxy receives different request than our response
|
2016-02-23 13:26:53 -08:00
|
|
|
assert self.requestlog[0] != resp.text
|
2014-12-23 11:07:47 -08:00
|
|
|
|
|
|
|
assert self.requestlog[0].startswith('GET http://example.com/foobar HTTP/1.1')
|
|
|
|
|
|
|
|
# no range request
|
|
|
|
assert 'range: ' not in self.requestlog[0]
|
|
|
|
|
2014-12-26 13:02:47 -08:00
|
|
|
# r: key cached
|
|
|
|
assert len(self.cache) == 1
|
|
|
|
assert RewriteHandler.create_cache_key('r:', 'http://example.com/foobar') in self.cache
|
|
|
|
|
2014-12-23 11:07:47 -08:00
|
|
|
# Second Request
|
|
|
|
# clear log
|
|
|
|
self.requestlog.pop()
|
2014-12-23 12:17:51 -08:00
|
|
|
headers = [('Range', 'bytes=101-150')]
|
2014-12-23 11:07:47 -08:00
|
|
|
resp = self.testapp.get('/rewrite/http://example.com/foobar', headers=headers)
|
|
|
|
|
|
|
|
# actual response is with range
|
|
|
|
assert resp.status_int == 206
|
|
|
|
assert 'Content-Range' in resp.headers
|
|
|
|
assert resp.headers['Accept-Ranges'] == 'bytes'
|
|
|
|
|
|
|
|
# not from proxy
|
|
|
|
assert 'x-archive-orig-x-proxy' not in resp.headers
|
|
|
|
|
|
|
|
# already pinged proxy, no additional requests set to proxy
|
|
|
|
assert len(self.requestlog) == 0
|
2014-12-26 13:02:47 -08:00
|
|
|
assert len(self.cache) == 1
|
2014-12-23 11:07:47 -08:00
|
|
|
|
|
|
|
def test_echo_proxy_video_info(self):
|
|
|
|
resp = self.testapp.get('/rewrite/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M')
|
|
|
|
assert resp.status_int == 200
|
|
|
|
assert resp.content_type == RewriteHandler.YT_DL_TYPE, resp.content_type
|
|
|
|
|
|
|
|
assert len(self.requestlog) == 1
|
|
|
|
assert self.requestlog[0].startswith('PUTMETA http://www.youtube.com/watch?v=DjFZyFWSt1M HTTP/1.1')
|
|
|
|
|
2014-12-26 13:02:47 -08:00
|
|
|
# second request, not sent to proxy
|
|
|
|
resp = self.testapp.get('/rewrite/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M')
|
|
|
|
assert len(self.requestlog) == 1
|
|
|
|
|
|
|
|
# v: video info cache
|
|
|
|
assert len(self.cache) == 1
|
|
|
|
assert RewriteHandler.create_cache_key('v:', 'https://www.youtube.com/watch?v=DjFZyFWSt1M') in self.cache
|
|
|
|
|
2014-12-23 11:07:47 -08:00
|
|
|
def test_echo_proxy_video_with_referrer(self):
|
|
|
|
headers = [('Range', 'bytes=1000-2000'), ('Referer', 'http://localhost:80/rewrite/https://example.com/')]
|
|
|
|
resp = self.testapp.get('/rewrite/http://www.youtube.com/watch?v=DjFZyFWSt1M', headers=headers)
|
|
|
|
|
|
|
|
# not from proxy
|
|
|
|
assert 'x-archive-orig-x-proxy' not in resp.headers
|
|
|
|
|
|
|
|
# proxy receives two requests
|
|
|
|
assert len(self.requestlog) == 2
|
|
|
|
|
2014-12-26 13:02:47 -08:00
|
|
|
# first, a video info request recording the page
|
|
|
|
assert self.requestlog[0].startswith('PUTMETA http://example.com/ HTTP/1.1')
|
|
|
|
|
|
|
|
# second, non-ranged request for page
|
|
|
|
assert self.requestlog[1].startswith('GET http://www.youtube.com/watch?v=DjFZyFWSt1M HTTP/1.1')
|
|
|
|
assert 'range' not in self.requestlog[1]
|
|
|
|
|
|
|
|
# both video info and range cached
|
|
|
|
assert len(self.cache) == 2
|
|
|
|
assert RewriteHandler.create_cache_key('v:', 'http://www.youtube.com/watch?v=DjFZyFWSt1M') in self.cache
|
|
|
|
assert RewriteHandler.create_cache_key('r:', 'http://www.youtube.com/watch?v=DjFZyFWSt1M') in self.cache
|
2014-12-23 11:07:47 -08:00
|
|
|
|
|
|
|
|
|
|
|
def test_echo_proxy_error(self):
|
|
|
|
headers = [('Range', 'bytes=1000-2000'), ('Referer', 'http://localhost:80/rewrite/https://example.com/')]
|
|
|
|
|
2015-10-14 20:34:46 -07:00
|
|
|
proxyserv.force_err = True
|
2014-12-23 11:07:47 -08:00
|
|
|
resp = self.testapp.get('/rewrite/http://www.youtube.com/watch?v=DjFZyFWSt1M', headers=headers)
|
|
|
|
|
|
|
|
# not from proxy
|
|
|
|
assert 'x-archive-orig-x-proxy' not in resp.headers
|
|
|
|
|
|
|
|
# no proxy requests as we're forcing exception
|
|
|
|
assert len(self.requestlog) == 0
|
2014-12-26 13:02:47 -08:00
|
|
|
|
|
|
|
assert len(self.cache) == 0
|
|
|
|
|