mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
merge: additional fixes after merge of ukwa/pywb and 2.2
rewrite: remove custom modifiers for now, use oe_ for non-import css embeds bump version to 2.3.dev0
This commit is contained in:
parent
e92b1969e8
commit
42b8c3a22b
@ -24,7 +24,7 @@ from pywb.warcserver.warcserver import WarcServer
|
|||||||
from pywb.rewrite.templateview import BaseInsertView
|
from pywb.rewrite.templateview import BaseInsertView
|
||||||
|
|
||||||
from pywb.apps.static_handler import StaticHandler
|
from pywb.apps.static_handler import StaticHandler
|
||||||
from pywb.apps.rewriterapp import RewriterApp
|
from pywb.apps.rewriterapp import RewriterApp, UpstreamException
|
||||||
from pywb.apps.wbrequestresponse import WbResponse
|
from pywb.apps.wbrequestresponse import WbResponse
|
||||||
|
|
||||||
import os
|
import os
|
||||||
@ -441,6 +441,13 @@ class FrontEndApp(object):
|
|||||||
coll in self.warcserver.list_dynamic_routes())
|
coll in self.warcserver.list_dynamic_routes())
|
||||||
|
|
||||||
def raise_not_found(self, environ, err_type, url):
|
def raise_not_found(self, environ, err_type, url):
|
||||||
|
"""Utility function for raising a werkzeug.exceptions.NotFound execption with the supplied WSGI environment
|
||||||
|
and message.
|
||||||
|
|
||||||
|
:param dict environ: The WSGI environment dictionary for the request
|
||||||
|
:param str err_type: The identifier for type of error that occured
|
||||||
|
:param str url: The url of the archived page that was requested
|
||||||
|
"""
|
||||||
raise AppPageNotFound(err_type, url)
|
raise AppPageNotFound(err_type, url)
|
||||||
|
|
||||||
def _check_refer_redirect(self, environ):
|
def _check_refer_redirect(self, environ):
|
||||||
|
@ -56,7 +56,7 @@ class HTMLRewriterMixin(StreamingRewriter):
|
|||||||
'archive': 'oe_'},
|
'archive': 'oe_'},
|
||||||
'area': {'href': defmod},
|
'area': {'href': defmod},
|
||||||
'audio': {'src': 'oe_'},
|
'audio': {'src': 'oe_'},
|
||||||
'base': {'href': 'ba_'},
|
'base': {'href': defmod},
|
||||||
'blockquote': {'cite': defmod},
|
'blockquote': {'cite': defmod},
|
||||||
'body': {'background': 'im_'},
|
'body': {'background': 'im_'},
|
||||||
'button': {'formaction': defmod},
|
'button': {'formaction': defmod},
|
||||||
|
@ -25,8 +25,8 @@ class RxRules(object):
|
|||||||
return lambda _, _2: string
|
return lambda _, _2: string
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def archival_rewrite():
|
def archival_rewrite(mod=None):
|
||||||
return lambda string, rewriter: rewriter.rewrite(string)
|
return lambda string, rewriter: rewriter.rewrite(string, mod)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def add_prefix(prefix):
|
def add_prefix(prefix):
|
||||||
@ -327,13 +327,12 @@ class JSReplaceFuzzy(object):
|
|||||||
class CSSRules(RxRules):
|
class CSSRules(RxRules):
|
||||||
CSS_URL_REGEX = "url\\s*\\(\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*([^)'\"]+)\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*\\)"
|
CSS_URL_REGEX = "url\\s*\\(\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*([^)'\"]+)\\s*(?:[\\\\\"']|(?:&.{1,4};))*\\s*\\)"
|
||||||
|
|
||||||
CSS_IMPORT_NO_URL_REGEX = ("@import\\s+(?!url)\\(?\\s*['\"]?" +
|
CSS_IMPORT_REGEX = ("@import\\s+(?:url\\s*)?\\(?\\s*['\"]?([\w.:/\\\\-]+)")
|
||||||
"(?!url[\\s\\(])([\w.:/\\\\-]+)")
|
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
rules = [
|
rules = [
|
||||||
(self.CSS_URL_REGEX, self.archival_rewrite(), 1),
|
(self.CSS_URL_REGEX, self.archival_rewrite('oe_'), 1),
|
||||||
(self.CSS_IMPORT_NO_URL_REGEX, self.archival_rewrite(), 1),
|
(self.CSS_IMPORT_REGEX, self.archival_rewrite('cs_'), 1),
|
||||||
]
|
]
|
||||||
|
|
||||||
super(CSSRules, self).__init__(rules)
|
super(CSSRules, self).__init__(rules)
|
||||||
|
@ -25,23 +25,23 @@ r"""
|
|||||||
|
|
||||||
# Base Tests -- w/ rewrite (default)
|
# Base Tests -- w/ rewrite (default)
|
||||||
>>> parse('<html><head><base href="http://example.com/diff/path/file.html"/>')
|
>>> parse('<html><head><base href="http://example.com/diff/path/file.html"/>')
|
||||||
<html><head><base href="/web/20131226101010ba_/http://example.com/diff/path/file.html"/>
|
<html><head><base href="/web/20131226101010/http://example.com/diff/path/file.html"/>
|
||||||
|
|
||||||
# Full Path
|
# Full Path
|
||||||
>>> parse('<html><head><base href="http://example.com/diff/path/file.html"/>', urlrewriter=full_path_urlrewriter)
|
>>> parse('<html><head><base href="http://example.com/diff/path/file.html"/>', urlrewriter=full_path_urlrewriter)
|
||||||
<html><head><base href="http://localhost:80/web/20131226101010ba_/http://example.com/diff/path/file.html"/>
|
<html><head><base href="http://localhost:80/web/20131226101010/http://example.com/diff/path/file.html"/>
|
||||||
|
|
||||||
# Full Path Scheme Rel Base
|
# Full Path Scheme Rel Base
|
||||||
>>> parse('<base href="//example.com"/><img src="/foo.gif"/>', urlrewriter=full_path_urlrewriter)
|
>>> parse('<base href="//example.com"/><img src="/foo.gif"/>', urlrewriter=full_path_urlrewriter)
|
||||||
<base href="//localhost:80/web/20131226101010ba_///example.com/"/><img src="/web/20131226101010im_/http://example.com/foo.gif"/>
|
<base href="//localhost:80/web/20131226101010///example.com/"/><img src="/web/20131226101010im_/http://example.com/foo.gif"/>
|
||||||
|
|
||||||
# Rel Base
|
# Rel Base
|
||||||
>>> parse('<html><head><base href="/other/file.html"/>', urlrewriter=full_path_urlrewriter)
|
>>> parse('<html><head><base href="/other/file.html"/>', urlrewriter=full_path_urlrewriter)
|
||||||
<html><head><base href="/web/20131226101010ba_/http://example.com/other/file.html"/>
|
<html><head><base href="/web/20131226101010/http://example.com/other/file.html"/>
|
||||||
|
|
||||||
# Rel Base + example
|
# Rel Base + example
|
||||||
>>> parse('<html><head><base href="/other/file.html"/><a href="/path.html">', urlrewriter=full_path_urlrewriter)
|
>>> parse('<html><head><base href="/other/file.html"/><a href="/path.html">', urlrewriter=full_path_urlrewriter)
|
||||||
<html><head><base href="/web/20131226101010ba_/http://example.com/other/file.html"/><a href="/web/20131226101010/http://example.com/path.html">
|
<html><head><base href="/web/20131226101010/http://example.com/other/file.html"/><a href="/web/20131226101010/http://example.com/path.html">
|
||||||
|
|
||||||
# Rel Base
|
# Rel Base
|
||||||
>>> parse('<base href="./static/"/><img src="image.gif"/>', urlrewriter=full_path_urlrewriter)
|
>>> parse('<base href="./static/"/><img src="image.gif"/>', urlrewriter=full_path_urlrewriter)
|
||||||
@ -53,7 +53,7 @@ r"""
|
|||||||
|
|
||||||
# ensure trailing slash added
|
# ensure trailing slash added
|
||||||
>>> parse('<base href="http://example.com"/>')
|
>>> parse('<base href="http://example.com"/>')
|
||||||
<base href="/web/20131226101010ba_/http://example.com/"/>
|
<base href="/web/20131226101010/http://example.com/"/>
|
||||||
|
|
||||||
# Base Tests -- no rewrite
|
# Base Tests -- no rewrite
|
||||||
>>> parse('<html><head><base href="http://example.com/diff/path/file.html"/>', urlrewriter=no_base_canon_rewriter)
|
>>> parse('<html><head><base href="http://example.com/diff/path/file.html"/>', urlrewriter=no_base_canon_rewriter)
|
||||||
@ -244,29 +244,29 @@ r"""
|
|||||||
<div style="background: url('abc.html')" onblah on-click="location = 'redirect.html'"></div>
|
<div style="background: url('abc.html')" onblah on-click="location = 'redirect.html'"></div>
|
||||||
|
|
||||||
>>> parse('<div style="background: url(\'/other_path/abc.html\')" onblah onclick="window.location = \'redirect.html\'"></div>')
|
>>> parse('<div style="background: url(\'/other_path/abc.html\')" onblah onclick="window.location = \'redirect.html\'"></div>')
|
||||||
<div style="background: url('/web/20131226101010ce_/http://example.com/other_path/abc.html')" onblah onclick="window.WB_wombat_location = 'redirect.html'"></div>
|
<div style="background: url('/web/20131226101010oe_/http://example.com/other_path/abc.html')" onblah onclick="window.WB_wombat_location = 'redirect.html'"></div>
|
||||||
|
|
||||||
>>> parse('<i style="background-image: url(http://foo-.bar_.example.com/)"></i>')
|
>>> parse('<i style="background-image: url(http://foo-.bar_.example.com/)"></i>')
|
||||||
<i style="background-image: url(/web/20131226101010ce_/http://foo-.bar_.example.com/)"></i>
|
<i style="background-image: url(/web/20131226101010oe_/http://foo-.bar_.example.com/)"></i>
|
||||||
|
|
||||||
>>> parse('<i style=\'background-image: url("http://foo.example.com/")\'></i>')
|
>>> parse('<i style=\'background-image: url("http://foo.example.com/")\'></i>')
|
||||||
<i style="background-image: url("/web/20131226101010ce_/http://foo.example.com/")"></i>
|
<i style="background-image: url("/web/20131226101010oe_/http://foo.example.com/")"></i>
|
||||||
|
|
||||||
>>> parse('<i style=\'background-image: url("http://foo.example.com/")\'></i>')
|
>>> parse('<i style=\'background-image: url("http://foo.example.com/")\'></i>')
|
||||||
<i style="background-image: url("/web/20131226101010ce_/http://foo.example.com/")"></i>
|
<i style="background-image: url("/web/20131226101010oe_/http://foo.example.com/")"></i>
|
||||||
|
|
||||||
>>> parse('<i style=\'background-image: url('http://foo.example.com/')\'></i>')
|
>>> parse('<i style=\'background-image: url('http://foo.example.com/')\'></i>')
|
||||||
<i style="background-image: url('/web/20131226101010/http://foo.example.com/')"></i>
|
<i style="background-image: url('/web/20131226101010/http://foo.example.com/')"></i>
|
||||||
|
|
||||||
>>> parse("<i style='background-image: url('http://foo.example.com/')'></i>")
|
>>> parse("<i style='background-image: url('http://foo.example.com/')'></i>")
|
||||||
<i style="background-image: url('/web/20131226101010ce_/http://foo.example.com/')"></i>
|
<i style="background-image: url('/web/20131226101010oe_/http://foo.example.com/')"></i>
|
||||||
|
|
||||||
#>>> parse('<i style=\'background-image: url("http://исп/")\'></i>')
|
#>>> parse('<i style=\'background-image: url("http://исп/")\'></i>')
|
||||||
<i style="background-image: url("/web/20131226101010/http://%D0%B8%D1%81%D0%BF/")"></i>
|
<i style="background-image: url("/web/20131226101010/http://%D0%B8%D1%81%D0%BF/")"></i>
|
||||||
|
|
||||||
# Style
|
# Style
|
||||||
>>> parse('<style>@import "/styles.css" .a { font-face: url(\'../myfont.ttf\') }</style>')
|
>>> parse('<style>@import "/styles.css" .a { font-face: url(\'../myfont.ttf\') }</style>')
|
||||||
<style>@import "/web/20131226101010cs_/http://example.com/styles.css" .a { font-face: url('/web/20131226101010ce_/http://example.com/some/myfont.ttf') }</style>
|
<style>@import "/web/20131226101010cs_/http://example.com/styles.css" .a { font-face: url('/web/20131226101010oe_/http://example.com/some/myfont.ttf') }</style>
|
||||||
|
|
||||||
# Unterminated style tag, handle and auto-terminate
|
# Unterminated style tag, handle and auto-terminate
|
||||||
>>> parse('<style>@import url(styles.css)')
|
>>> parse('<style>@import url(styles.css)')
|
||||||
|
@ -251,28 +251,28 @@ r"""
|
|||||||
#=================================================================
|
#=================================================================
|
||||||
|
|
||||||
>>> _test_css("background: url('/some/path.html')")
|
>>> _test_css("background: url('/some/path.html')")
|
||||||
"background: url('/web/20131010ce_/http://example.com/some/path.html')"
|
"background: url('/web/20131010oe_/http://example.com/some/path.html')"
|
||||||
|
|
||||||
>>> _test_css("background: url('../path.html')")
|
>>> _test_css("background: url('../path.html')")
|
||||||
"background: url('/web/20131010ce_/http://example.com/path.html')"
|
"background: url('/web/20131010oe_/http://example.com/path.html')"
|
||||||
|
|
||||||
>>> _test_css("background: url(\"http://domain.com/path.html\")")
|
>>> _test_css("background: url(\"http://domain.com/path.html\")")
|
||||||
'background: url("/web/20131010ce_/http://domain.com/path.html")'
|
'background: url("/web/20131010oe_/http://domain.com/path.html")'
|
||||||
|
|
||||||
>>> _test_css('background: url(" http://domain.com/path.html ")')
|
>>> _test_css('background: url(" http://domain.com/path.html ")')
|
||||||
'background: url(" /web/20131010ce_/http://domain.com/path.html ")'
|
'background: url(" /web/20131010oe_/http://domain.com/path.html ")'
|
||||||
|
|
||||||
>>> _test_css('background: url(" http://domain.com/path.html x ")')
|
>>> _test_css('background: url(" http://domain.com/path.html x ")')
|
||||||
'background: url(" /web/20131010ce_/http://domain.com/path.html x ")'
|
'background: url(" /web/20131010oe_/http://domain.com/path.html x ")'
|
||||||
|
|
||||||
>>> _test_css("background: url(file.jpeg)")
|
>>> _test_css("background: url(file.jpeg)")
|
||||||
'background: url(file.jpeg)'
|
'background: url(file.jpeg)'
|
||||||
|
|
||||||
>>> _test_css("background:#abc url('/static/styles/../images/layout/logo.png')")
|
>>> _test_css("background:#abc url('/static/styles/../images/layout/logo.png')")
|
||||||
"background:#abc url('/web/20131010ce_/http://example.com/static/images/layout/logo.png')"
|
"background:#abc url('/web/20131010oe_/http://example.com/static/images/layout/logo.png')"
|
||||||
|
|
||||||
>>> _test_css("background:#000 url('/static/styles/../../images/layout/logo.png')")
|
>>> _test_css("background:#000 url('/static/styles/../../images/layout/logo.png')")
|
||||||
"background:#000 url('/web/20131010ce_/http://example.com/images/layout/logo.png')"
|
"background:#000 url('/web/20131010oe_/http://example.com/images/layout/logo.png')"
|
||||||
|
|
||||||
>>> _test_css("background: url('')")
|
>>> _test_css("background: url('')")
|
||||||
"background: url('')"
|
"background: url('')"
|
||||||
|
@ -409,7 +409,7 @@ class TestWbIntegration(BaseConfigTest):
|
|||||||
assert resp.content_type == 'text/html'
|
assert resp.content_type == 'text/html'
|
||||||
assert resp.status_int == 404
|
assert resp.status_int == 404
|
||||||
|
|
||||||
assert 'Url Not Found' in resp.text
|
assert 'URL Not Found' in resp.text, resp.text
|
||||||
assert 'The url <b>http://not-exist.example.com/path?A=B</b> could not be found in this collection.' in resp.text
|
assert 'The url <b>http://not-exist.example.com/path?A=B</b> could not be found in this collection.' in resp.text
|
||||||
|
|
||||||
def test_static_content(self):
|
def test_static_content(self):
|
||||||
|
@ -46,7 +46,7 @@ class BasePreferTests(BaseConfigTest):
|
|||||||
assert 'WB Insert' in resp.text
|
assert 'WB Insert' in resp.text
|
||||||
|
|
||||||
assert 'wombat.js' not in resp.text
|
assert 'wombat.js' not in resp.text
|
||||||
assert 'new _WBWombat' not in resp.text, resp.text
|
assert 'WBWombatInit' not in resp.text, resp.text
|
||||||
|
|
||||||
def _assert_rewritten(self, resp):
|
def _assert_rewritten(self, resp):
|
||||||
self._assert_pref_headers(resp, 'rewritten')
|
self._assert_pref_headers(resp, 'rewritten')
|
||||||
@ -55,7 +55,7 @@ class BasePreferTests(BaseConfigTest):
|
|||||||
assert 'WB Insert' in resp.text
|
assert 'WB Insert' in resp.text
|
||||||
|
|
||||||
assert 'wombat.js' in resp.text
|
assert 'wombat.js' in resp.text
|
||||||
assert 'new _WBWombat' in resp.text, resp.text
|
assert 'WBWombatInit' in resp.text, resp.text
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
Loading…
x
Reference in New Issue
Block a user