mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
remove rfc3987 validation, was rejecting valid urls
add extract_referer_wburl_str() to extract WbUrl str, if any, from the referrer. Use that for live_rewrite_handler to override default referrer
This commit is contained in:
parent
611b9093bd
commit
85593696fa
@ -25,6 +25,16 @@
|
||||
>>> print_req_from_uri('/2010/example.com', {'HTTP_HOST': 'localhost:8080'}, use_abs_prefix = True)
|
||||
{'wb_url': ('latest_replay', '', '', 'http://example.com', 'http://example.com'), 'coll': '2010', 'wb_prefix': '/2010/', 'request_uri': '/2010/example.com'}
|
||||
|
||||
# Referrer extraction
|
||||
>>> WbUrl(req_from_uri('/web/2010/example.com', {'wsgi.url_scheme': 'http', 'HTTP_HOST': 'localhost:8080', 'HTTP_REFERER': 'http://localhost:8080/web/2011/blah.example.com/'}).extract_referrer_wburl_str()).url
|
||||
'http://blah.example.com/'
|
||||
|
||||
# incorrect referer
|
||||
>>> req_from_uri('/web/2010/example.com', {'wsgi.url_scheme': 'http', 'HTTP_HOST': 'localhost:8080', 'HTTP_REFERER': 'http://other.example.com/web/2011/blah.example.com/'}).extract_referrer_wburl_str()
|
||||
|
||||
|
||||
# no referer
|
||||
>>> req_from_uri('/web/2010/example.com', {'wsgi.url_scheme': 'http', 'HTTP_HOST': 'localhost:8080'}).extract_referrer_wburl_str()
|
||||
|
||||
|
||||
# WbResponse Tests
|
||||
|
@ -105,6 +105,16 @@ class WbRequest(object):
|
||||
def _parse_extra(self):
|
||||
pass
|
||||
|
||||
def extract_referrer_wburl_str(self):
|
||||
if not self.referrer:
|
||||
return None
|
||||
|
||||
if not self.referrer.startswith(self.host_prefix + self.rel_prefix):
|
||||
return None
|
||||
|
||||
wburl_str = self.referrer[len(self.host_prefix + self.rel_prefix):]
|
||||
return wburl_str
|
||||
|
||||
|
||||
#=================================================================
|
||||
class WbResponse(object):
|
||||
|
@ -17,7 +17,12 @@ from pywb.rewrite.rewrite_content import RewriteContent
|
||||
|
||||
#=================================================================
|
||||
class LiveRewriter(object):
|
||||
PROXY_HEADER_LIST = [('HTTP_USER_AGENT', 'User-Agent')]
|
||||
PROXY_HEADER_LIST = [('HTTP_USER_AGENT', 'User-Agent'),
|
||||
('HTTP_ACCEPT', 'Accept'),
|
||||
('HTTP_ACCEPT_LANGUAGE', 'Accept-Language'),
|
||||
('HTTP_ACCEPT_CHARSET', 'Accept-Charset'),
|
||||
('HTTP_REFERER', 'Referer'),
|
||||
]
|
||||
|
||||
def __init__(self, defmod=''):
|
||||
self.rewriter = RewriteContent(defmod=defmod)
|
||||
|
@ -60,13 +60,14 @@
|
||||
|
||||
# Error Urls
|
||||
# ======================
|
||||
>>> x = WbUrl('/#$%#/')
|
||||
# no longer rejecting this here
|
||||
#>>> x = WbUrl('/#$%#/')
|
||||
Traceback (most recent call last):
|
||||
Exception: Bad Request Url: http://#$%#/
|
||||
|
||||
>>> x = WbUrl('/http://example.com:abc/')
|
||||
Traceback (most recent call last):
|
||||
Exception: Bad Request Url: http://example.com:abc/
|
||||
#>>> x = WbUrl('/http://example.com:abc/')
|
||||
#Traceback (most recent call last):
|
||||
#Exception: Bad Request Url: http://example.com:abc/
|
||||
|
||||
>>> x = WbUrl('')
|
||||
Traceback (most recent call last):
|
||||
|
@ -39,7 +39,6 @@ wayback url format.
|
||||
"""
|
||||
|
||||
import re
|
||||
import rfc3987
|
||||
|
||||
|
||||
#=================================================================
|
||||
@ -104,14 +103,6 @@ class WbUrl(BaseWbUrl):
|
||||
if inx < len(self.url) and self.url[inx] != '/':
|
||||
self.url = self.url[:inx] + '/' + self.url[inx:]
|
||||
|
||||
# BUG?: adding upper() because rfc3987 lib
|
||||
# rejects lower case %-encoding
|
||||
# %2F is fine, but %2f -- standard supports either
|
||||
matcher = rfc3987.match(self.url.upper(), 'IRI')
|
||||
|
||||
if not matcher:
|
||||
raise Exception('Bad Request Url: ' + self.url)
|
||||
|
||||
# Match query regex
|
||||
# ======================
|
||||
def _init_query(self, url):
|
||||
|
@ -3,6 +3,7 @@ from pywb.framework.wbrequestresponse import WbResponse
|
||||
from pywb.framework.archivalrouter import ArchivalRouter, Route
|
||||
|
||||
from pywb.rewrite.rewrite_live import LiveRewriter
|
||||
from pywb.rewrite.wburl import WbUrl
|
||||
|
||||
from handlers import StaticHandler
|
||||
|
||||
@ -22,16 +23,22 @@ class RewriteHandler(WbUrlHandler):
|
||||
#use_lxml_parser()
|
||||
self.rewriter = LiveRewriter(defmod='mp_')
|
||||
|
||||
head_insert = config.get('head_insert_html',
|
||||
'ui/head_insert.html')
|
||||
view = config.get('head_insert_view')
|
||||
if not view:
|
||||
head_insert = config.get('head_insert_html',
|
||||
'ui/head_insert.html')
|
||||
view = HeadInsertView.create_template(head_insert, 'Head Insert')
|
||||
|
||||
frame_insert = config.get('frame_insert_html',
|
||||
'ui/frame_insert.html')
|
||||
|
||||
view = HeadInsertView.create_template(head_insert, 'Head Insert')
|
||||
self.head_insert_view = view
|
||||
|
||||
view = J2TemplateView.create_template(frame_insert, 'Frame Insert')
|
||||
|
||||
view = config.get('frame_insert_view')
|
||||
if not view:
|
||||
frame_insert = config.get('frame_insert_html',
|
||||
'ui/frame_insert.html')
|
||||
|
||||
view = J2TemplateView.create_template(frame_insert, 'Frame Insert')
|
||||
|
||||
self.frame_insert_view = view
|
||||
|
||||
def __call__(self, wbrequest):
|
||||
@ -49,6 +56,11 @@ class RewriteHandler(WbUrlHandler):
|
||||
|
||||
head_insert_func = self.head_insert_view.create_insert_func(wbrequest)
|
||||
|
||||
ref_wburl_str = wbrequest.extract_referrer_wburl_str()
|
||||
if ref_wburl_str:
|
||||
wbrequest.env['HTTP_REFERER'] = WbUrl(ref_wburl_str).url
|
||||
|
||||
|
||||
result = self.rewriter.fetch_request(url, wbrequest.urlrewriter,
|
||||
head_insert_func=head_insert_func,
|
||||
env=wbrequest.env)
|
||||
|
1
setup.py
1
setup.py
@ -64,7 +64,6 @@ setup(
|
||||
glob.glob('sample_archive/text_content/*')),
|
||||
],
|
||||
install_requires=[
|
||||
'rfc3987',
|
||||
'chardet',
|
||||
'requests',
|
||||
'redis',
|
||||
|
@ -8,7 +8,7 @@ class TestLiveRewriter:
|
||||
self.testapp = webtest.TestApp(self.app)
|
||||
|
||||
def test_live_rewrite_1(self):
|
||||
headers = [('User-Agent', 'python')]
|
||||
headers = [('User-Agent', 'python'), ('Referer', 'http://localhost:80/rewrite/other.example.com')]
|
||||
resp = self.testapp.get('/rewrite/mp_/http://example.com/', headers=headers)
|
||||
assert resp.status_int == 200
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user