1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

request-uri handling: use REQUEST_URI if available to maintain %-encoding when constructing WbUrl (#315)

geventserver: use custom handler to set raw 'REQUEST_URI' when running default gevent wsgi server. (uwsgi already sets REQUEST_URI)
testing: add REQUEST_URI check to proxy tests as real server is being used (webtest tests decodes %-encoding)
bump version to 2.0.4
This commit is contained in:
Ilya Kreymer 2018-04-10 17:17:38 -07:00 committed by GitHub
parent 33cca0bc02
commit b7bf693885
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 44 additions and 13 deletions

View File

@ -1,4 +1,4 @@
__version__ = '2.0.3'
__version__ = '2.0.4'
DEFAULT_CONFIG = 'pywb/default_config.yaml'

View File

@ -81,9 +81,13 @@ class BaseCli(object):
return self
def run_gevent(self):
from gevent.pywsgi import WSGIServer
from pywb.utils.geventserver import GeventServer, RequestURIWSGIHandler
logging.info('Starting Gevent Server on ' + str(self.r.port))
WSGIServer((self.r.bind, self.r.port), self.application).serve_forever()
ge = GeventServer(self.application,
port=self.r.port,
hostname=self.r.bind,
handler_class=RequestURIWSGIHandler,
direct=True)
#=============================================================================

View File

@ -257,10 +257,16 @@ class FrontEndApp(object):
self.setup_paths(environ, coll, record)
wb_url_str = to_native_str(url)
request_uri = environ.get('REQUEST_URI')
script_name = environ.get('SCRIPT_NAME', '') + '/'
if request_uri and request_uri.startswith(script_name):
wb_url_str = request_uri[len(script_name):]
if environ.get('QUERY_STRING'):
wb_url_str += '?' + environ.get('QUERY_STRING')
else:
wb_url_str = to_native_str(url)
if environ.get('QUERY_STRING'):
wb_url_str += '?' + environ.get('QUERY_STRING')
metadata = self.get_metadata(coll)
if record:

View File

@ -1,13 +1,14 @@
from gevent.wsgi import WSGIServer
from gevent.wsgi import WSGIServer, WSGIHandler
from gevent import spawn
import logging
# ============================================================================
class GeventServer(object):
def __init__(self, app, port=0, hostname='localhost', handler_class=None):
def __init__(self, app, port=0, hostname='localhost', handler_class=None,
direct=False):
self.port = port
self.make_server(app, port, hostname, handler_class)
self.make_server(app, port, hostname, handler_class, direct=direct)
def stop(self):
if self.server:
@ -22,15 +23,25 @@ class GeventServer(object):
logging.debug('server failed to start on ' + str(port))
traceback.print_exc()
def make_server(self, app, port, hostname, handler_class):
def make_server(self, app, port, hostname, handler_class, direct=False):
server = WSGIServer((hostname, port), app, handler_class=handler_class)
server.init_socket()
self.port = server.address[1]
self.server = server
self.ge = spawn(self._run, server, self.port)
if direct:
self.ge = None
self._run(server, self.port)
else:
self.ge = spawn(self._run, server, self.port)
def join(self):
self.ge.join()
# ============================================================================
class RequestURIWSGIHandler(WSGIHandler):
def get_environ(self):
environ = super(RequestURIWSGIHandler, self).get_environ()
environ['REQUEST_URI'] = self.path
return environ

View File

@ -1,7 +1,7 @@
from pywb.warcserver.test.testutils import BaseTestClass, TempDirTests
from .base_config_test import CollsDirMixin
from pywb.utils.geventserver import GeventServer
from pywb.utils.geventserver import GeventServer, RequestURIWSGIHandler
from pywb.apps.frontendapp import FrontEndApp
from pywb.manager.manager import main as manager
@ -34,7 +34,7 @@ class BaseTestProxy(TempDirTests, BaseTestClass):
cls.app = FrontEndApp(config_file=config_file,
custom_config={'proxy': opts})
cls.server = GeventServer(cls.app)
cls.server = GeventServer(cls.app, handler_class=RequestURIWSGIHandler)
cls.proxies = cls.proxy_dict(cls.server.port)
@classmethod
@ -127,3 +127,13 @@ class TestRecordingProxy(CollsDirMixin, BaseTestProxy):
assert 'is_live = false' in res.text
assert 'httpbin(1)' in res.text
def test_proxy_record_keep_percent(self, scheme):
self.app.handler.prefix_resolver.fixed_prefix = '/test/record/bn_/'
res = requests.get('{0}://example.com/%2A%2Ffoobar'.format(scheme),
proxies=self.proxies,
verify=self.root_ca_file)
# ensure %-encoded url stays as is
assert '"{0}://example.com/%2A%2Ffoobar"'.format(scheme) in res.text