1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

request-uri handling: use REQUEST_URI if available to maintain %-encoding when constructing WbUrl (#315)

geventserver: use custom handler to set raw 'REQUEST_URI' when running default gevent wsgi server. (uwsgi already sets REQUEST_URI)
testing: add REQUEST_URI check to proxy tests as real server is being used (webtest tests decodes %-encoding)
bump version to 2.0.4
This commit is contained in:
Ilya Kreymer 2018-04-10 17:17:38 -07:00 committed by GitHub
parent 33cca0bc02
commit b7bf693885
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 44 additions and 13 deletions

View File

@ -1,4 +1,4 @@
__version__ = '2.0.3' __version__ = '2.0.4'
DEFAULT_CONFIG = 'pywb/default_config.yaml' DEFAULT_CONFIG = 'pywb/default_config.yaml'

View File

@ -81,9 +81,13 @@ class BaseCli(object):
return self return self
def run_gevent(self): def run_gevent(self):
from gevent.pywsgi import WSGIServer from pywb.utils.geventserver import GeventServer, RequestURIWSGIHandler
logging.info('Starting Gevent Server on ' + str(self.r.port)) logging.info('Starting Gevent Server on ' + str(self.r.port))
WSGIServer((self.r.bind, self.r.port), self.application).serve_forever() ge = GeventServer(self.application,
port=self.r.port,
hostname=self.r.bind,
handler_class=RequestURIWSGIHandler,
direct=True)
#============================================================================= #=============================================================================

View File

@ -257,10 +257,16 @@ class FrontEndApp(object):
self.setup_paths(environ, coll, record) self.setup_paths(environ, coll, record)
wb_url_str = to_native_str(url) request_uri = environ.get('REQUEST_URI')
script_name = environ.get('SCRIPT_NAME', '') + '/'
if request_uri and request_uri.startswith(script_name):
wb_url_str = request_uri[len(script_name):]
if environ.get('QUERY_STRING'): else:
wb_url_str += '?' + environ.get('QUERY_STRING') wb_url_str = to_native_str(url)
if environ.get('QUERY_STRING'):
wb_url_str += '?' + environ.get('QUERY_STRING')
metadata = self.get_metadata(coll) metadata = self.get_metadata(coll)
if record: if record:

View File

@ -1,13 +1,14 @@
from gevent.wsgi import WSGIServer from gevent.wsgi import WSGIServer, WSGIHandler
from gevent import spawn from gevent import spawn
import logging import logging
# ============================================================================ # ============================================================================
class GeventServer(object): class GeventServer(object):
def __init__(self, app, port=0, hostname='localhost', handler_class=None): def __init__(self, app, port=0, hostname='localhost', handler_class=None,
direct=False):
self.port = port self.port = port
self.make_server(app, port, hostname, handler_class) self.make_server(app, port, hostname, handler_class, direct=direct)
def stop(self): def stop(self):
if self.server: if self.server:
@ -22,15 +23,25 @@ class GeventServer(object):
logging.debug('server failed to start on ' + str(port)) logging.debug('server failed to start on ' + str(port))
traceback.print_exc() traceback.print_exc()
def make_server(self, app, port, hostname, handler_class): def make_server(self, app, port, hostname, handler_class, direct=False):
server = WSGIServer((hostname, port), app, handler_class=handler_class) server = WSGIServer((hostname, port), app, handler_class=handler_class)
server.init_socket() server.init_socket()
self.port = server.address[1] self.port = server.address[1]
self.server = server self.server = server
self.ge = spawn(self._run, server, self.port) if direct:
self.ge = None
self._run(server, self.port)
else:
self.ge = spawn(self._run, server, self.port)
def join(self): def join(self):
self.ge.join() self.ge.join()
# ============================================================================
class RequestURIWSGIHandler(WSGIHandler):
def get_environ(self):
environ = super(RequestURIWSGIHandler, self).get_environ()
environ['REQUEST_URI'] = self.path
return environ

View File

@ -1,7 +1,7 @@
from pywb.warcserver.test.testutils import BaseTestClass, TempDirTests from pywb.warcserver.test.testutils import BaseTestClass, TempDirTests
from .base_config_test import CollsDirMixin from .base_config_test import CollsDirMixin
from pywb.utils.geventserver import GeventServer from pywb.utils.geventserver import GeventServer, RequestURIWSGIHandler
from pywb.apps.frontendapp import FrontEndApp from pywb.apps.frontendapp import FrontEndApp
from pywb.manager.manager import main as manager from pywb.manager.manager import main as manager
@ -34,7 +34,7 @@ class BaseTestProxy(TempDirTests, BaseTestClass):
cls.app = FrontEndApp(config_file=config_file, cls.app = FrontEndApp(config_file=config_file,
custom_config={'proxy': opts}) custom_config={'proxy': opts})
cls.server = GeventServer(cls.app) cls.server = GeventServer(cls.app, handler_class=RequestURIWSGIHandler)
cls.proxies = cls.proxy_dict(cls.server.port) cls.proxies = cls.proxy_dict(cls.server.port)
@classmethod @classmethod
@ -127,3 +127,13 @@ class TestRecordingProxy(CollsDirMixin, BaseTestProxy):
assert 'is_live = false' in res.text assert 'is_live = false' in res.text
assert 'httpbin(1)' in res.text assert 'httpbin(1)' in res.text
def test_proxy_record_keep_percent(self, scheme):
self.app.handler.prefix_resolver.fixed_prefix = '/test/record/bn_/'
res = requests.get('{0}://example.com/%2A%2Ffoobar'.format(scheme),
proxies=self.proxies,
verify=self.root_ca_file)
# ensure %-encoded url stays as is
assert '"{0}://example.com/%2A%2Ffoobar"'.format(scheme) in res.text