1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

request_uri: only generate REQUEST_URI manually if not provided by wsgi framework

only encode chars that are not allowed in path segment, per
http://tools.ietf.org/html/rfc3986#section-3.3
This commit is contained in:
Ilya Kreymer 2014-01-19 16:51:17 -08:00
parent 628c130261
commit 2e4d78d079
2 changed files with 20 additions and 4 deletions

View File

@ -107,13 +107,27 @@ def iso_date_to_timestamp(string):
return datetime_to_timestamp(iso_date_to_datetime(string))
# adapted from wsgiref.request_uri, but doesn't include domain name and allows ':' in url
# adapted -from wsgiref.request_uri, but doesn't include domain name and allows all characters
# allowed in the path segment according to: http://tools.ietf.org/html/rfc3986#section-3.3
# explained here: http://stackoverflow.com/questions/4669692/valid-characters-for-directory-part-of-a-url-for-short-links
def request_uri(environ, include_query=1):
"""Return the requested path, optionally including the query string"""
"""
Return the requested path, optionally including the query string
# Simple test:
>>> request_uri({'PATH_INFO': '/web/example.com'})
'/web/example.com'
# Test all unecoded special chars and double-quote
# (double-quote must be encoded but not single quote)
>>> request_uri({'PATH_INFO': "/web/example.com/0~!+$&'()*+,;=:\\\""})
"/web/example.com/0~!+$&'()*+,;=:%22"
"""
from urllib import quote
url = quote(environ.get('SCRIPT_NAME', '')+environ.get('PATH_INFO',''),safe='/;=,:')
url = quote(environ.get('SCRIPT_NAME', '') + environ.get('PATH_INFO',''), safe='/~!$&\'()*+,;=:@')
if include_query and environ.get('QUERY_STRING'):
url += '?' + environ['QUERY_STRING']
return url

View File

@ -82,7 +82,9 @@ except:
def application(env, start_response):
env['REQUEST_URI'] = request_uri(env)
if not env.get('REQUEST_URI'):
env['REQUEST_URI'] = request_uri(env)
response = None
try: