mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
rewriting: try more granular modifers to distinguish embeds: (in part for ukwa/ukwa-pywb#6)
- 'ba_' - for <base> rewriting - 'je_' - 'javascript-embed' default for client-side rewriting in wombat better modifiers for css rewriting (server and client): - 'ce_' - 'css-embed' for any url() embeds in CSS - 'cs_' - for css stylesheet @import rewriting/other .css
This commit is contained in:
parent
b38cfb8d67
commit
5b7ca18e0f
@ -56,7 +56,7 @@ class HTMLRewriterMixin(StreamingRewriter):
|
||||
'archive': 'oe_'},
|
||||
'area': {'href': defmod},
|
||||
'audio': {'src': 'oe_'},
|
||||
'base': {'href': defmod},
|
||||
'base': {'href': 'ba_'},
|
||||
'blockquote': {'cite': defmod},
|
||||
'body': {'background': 'im_'},
|
||||
'button': {'formaction': defmod},
|
||||
|
@ -25,23 +25,23 @@ r"""
|
||||
|
||||
# Base Tests -- w/ rewrite (default)
|
||||
>>> parse('<html><head><base href="http://example.com/diff/path/file.html"/>')
|
||||
<html><head><base href="/web/20131226101010/http://example.com/diff/path/file.html"/>
|
||||
<html><head><base href="/web/20131226101010ba_/http://example.com/diff/path/file.html"/>
|
||||
|
||||
# Full Path
|
||||
>>> parse('<html><head><base href="http://example.com/diff/path/file.html"/>', urlrewriter=full_path_urlrewriter)
|
||||
<html><head><base href="http://localhost:80/web/20131226101010/http://example.com/diff/path/file.html"/>
|
||||
<html><head><base href="http://localhost:80/web/20131226101010ba_/http://example.com/diff/path/file.html"/>
|
||||
|
||||
# Full Path Scheme Rel Base
|
||||
>>> parse('<base href="//example.com"/><img src="/foo.gif"/>', urlrewriter=full_path_urlrewriter)
|
||||
<base href="//localhost:80/web/20131226101010///example.com/"/><img src="/web/20131226101010im_/http://example.com/foo.gif"/>
|
||||
<base href="//localhost:80/web/20131226101010ba_///example.com/"/><img src="/web/20131226101010im_/http://example.com/foo.gif"/>
|
||||
|
||||
# Rel Base
|
||||
>>> parse('<html><head><base href="/other/file.html"/>', urlrewriter=full_path_urlrewriter)
|
||||
<html><head><base href="/web/20131226101010/http://example.com/other/file.html"/>
|
||||
<html><head><base href="/web/20131226101010ba_/http://example.com/other/file.html"/>
|
||||
|
||||
# Rel Base + example
|
||||
>>> parse('<html><head><base href="/other/file.html"/><a href="/path.html">', urlrewriter=full_path_urlrewriter)
|
||||
<html><head><base href="/web/20131226101010/http://example.com/other/file.html"/><a href="/web/20131226101010/http://example.com/path.html">
|
||||
<html><head><base href="/web/20131226101010ba_/http://example.com/other/file.html"/><a href="/web/20131226101010/http://example.com/path.html">
|
||||
|
||||
# Rel Base
|
||||
>>> parse('<base href="./static/"/><img src="image.gif"/>', urlrewriter=full_path_urlrewriter)
|
||||
@ -53,7 +53,7 @@ r"""
|
||||
|
||||
# ensure trailing slash added
|
||||
>>> parse('<base href="http://example.com"/>')
|
||||
<base href="/web/20131226101010/http://example.com/"/>
|
||||
<base href="/web/20131226101010ba_/http://example.com/"/>
|
||||
|
||||
# Base Tests -- no rewrite
|
||||
>>> parse('<html><head><base href="http://example.com/diff/path/file.html"/>', urlrewriter=no_base_canon_rewriter)
|
||||
@ -244,29 +244,29 @@ r"""
|
||||
<div style="background: url('abc.html')" onblah on-click="location = 'redirect.html'"></div>
|
||||
|
||||
>>> parse('<div style="background: url(\'/other_path/abc.html\')" onblah onclick="window.location = \'redirect.html\'"></div>')
|
||||
<div style="background: url('/web/20131226101010/http://example.com/other_path/abc.html')" onblah onclick="window.WB_wombat_location = 'redirect.html'"></div>
|
||||
<div style="background: url('/web/20131226101010ce_/http://example.com/other_path/abc.html')" onblah onclick="window.WB_wombat_location = 'redirect.html'"></div>
|
||||
|
||||
>>> parse('<i style="background-image: url(http://foo-.bar_.example.com/)"></i>')
|
||||
<i style="background-image: url(/web/20131226101010/http://foo-.bar_.example.com/)"></i>
|
||||
<i style="background-image: url(/web/20131226101010ce_/http://foo-.bar_.example.com/)"></i>
|
||||
|
||||
>>> parse('<i style=\'background-image: url("http://foo.example.com/")\'></i>')
|
||||
<i style="background-image: url("/web/20131226101010/http://foo.example.com/")"></i>
|
||||
<i style="background-image: url("/web/20131226101010ce_/http://foo.example.com/")"></i>
|
||||
|
||||
>>> parse('<i style=\'background-image: url("http://foo.example.com/")\'></i>')
|
||||
<i style="background-image: url("/web/20131226101010/http://foo.example.com/")"></i>
|
||||
<i style="background-image: url("/web/20131226101010ce_/http://foo.example.com/")"></i>
|
||||
|
||||
>>> parse('<i style=\'background-image: url('http://foo.example.com/')\'></i>')
|
||||
<i style="background-image: url('/web/20131226101010/http://foo.example.com/')"></i>
|
||||
|
||||
>>> parse("<i style='background-image: url('http://foo.example.com/')'></i>")
|
||||
<i style="background-image: url('/web/20131226101010/http://foo.example.com/')"></i>
|
||||
<i style="background-image: url('/web/20131226101010ce_/http://foo.example.com/')"></i>
|
||||
|
||||
#>>> parse('<i style=\'background-image: url("http://исп/")\'></i>')
|
||||
<i style="background-image: url("/web/20131226101010/http://%D0%B8%D1%81%D0%BF/")"></i>
|
||||
|
||||
# Style
|
||||
>>> parse('<style>@import "/styles.css" .a { font-face: url(\'../myfont.ttf\') }</style>')
|
||||
<style>@import "/web/20131226101010/http://example.com/styles.css" .a { font-face: url('/web/20131226101010/http://example.com/some/myfont.ttf') }</style>
|
||||
<style>@import "/web/20131226101010cs_/http://example.com/styles.css" .a { font-face: url('/web/20131226101010ce_/http://example.com/some/myfont.ttf') }</style>
|
||||
|
||||
# Unterminated style tag, handle and auto-terminate
|
||||
>>> parse('<style>@import url(styles.css)')
|
||||
|
@ -251,28 +251,28 @@ r"""
|
||||
#=================================================================
|
||||
|
||||
>>> _test_css("background: url('/some/path.html')")
|
||||
"background: url('/web/20131010/http://example.com/some/path.html')"
|
||||
"background: url('/web/20131010ce_/http://example.com/some/path.html')"
|
||||
|
||||
>>> _test_css("background: url('../path.html')")
|
||||
"background: url('/web/20131010/http://example.com/path.html')"
|
||||
"background: url('/web/20131010ce_/http://example.com/path.html')"
|
||||
|
||||
>>> _test_css("background: url(\"http://domain.com/path.html\")")
|
||||
'background: url("/web/20131010/http://domain.com/path.html")'
|
||||
'background: url("/web/20131010ce_/http://domain.com/path.html")'
|
||||
|
||||
>>> _test_css('background: url(" http://domain.com/path.html ")')
|
||||
'background: url(" /web/20131010/http://domain.com/path.html ")'
|
||||
'background: url(" /web/20131010ce_/http://domain.com/path.html ")'
|
||||
|
||||
>>> _test_css('background: url(" http://domain.com/path.html x ")')
|
||||
'background: url(" /web/20131010/http://domain.com/path.html x ")'
|
||||
'background: url(" /web/20131010ce_/http://domain.com/path.html x ")'
|
||||
|
||||
>>> _test_css("background: url(file.jpeg)")
|
||||
'background: url(file.jpeg)'
|
||||
|
||||
>>> _test_css("background:#abc url('/static/styles/../images/layout/logo.png')")
|
||||
"background:#abc url('/web/20131010/http://example.com/static/images/layout/logo.png')"
|
||||
"background:#abc url('/web/20131010ce_/http://example.com/static/images/layout/logo.png')"
|
||||
|
||||
>>> _test_css("background:#000 url('/static/styles/../../images/layout/logo.png')")
|
||||
"background:#000 url('/web/20131010/http://example.com/images/layout/logo.png')"
|
||||
"background:#000 url('/web/20131010ce_/http://example.com/images/layout/logo.png')"
|
||||
|
||||
>>> _test_css("background: url('')")
|
||||
"background: url('')"
|
||||
@ -281,7 +281,7 @@ r"""
|
||||
'background: url ("weirdpath\')'
|
||||
|
||||
>>> _test_css("@import url ('/path.css')")
|
||||
"@import url ('/web/20131010/http://example.com/path.css')"
|
||||
"@import url ('/web/20131010cs_/http://example.com/path.css')"
|
||||
|
||||
>>> _test_css("@import url('path.css')")
|
||||
"@import url('path.css')"
|
||||
@ -290,19 +290,19 @@ r"""
|
||||
"@import ( 'path.css')"
|
||||
|
||||
>>> _test_css("@import \"/path.css\"")
|
||||
'@import "/web/20131010/http://example.com/path.css"'
|
||||
'@import "/web/20131010cs_/http://example.com/path.css"'
|
||||
|
||||
>>> _test_css("@import ('../path.css\"")
|
||||
'@import (\'/web/20131010/http://example.com/path.css"'
|
||||
'@import (\'/web/20131010cs_/http://example.com/path.css"'
|
||||
|
||||
>>> _test_css("@import ('../url.css\"")
|
||||
'@import (\'/web/20131010/http://example.com/url.css"'
|
||||
'@import (\'/web/20131010cs_/http://example.com/url.css"'
|
||||
|
||||
>>> _test_css("@import (\"url.css\")")
|
||||
'@import ("url.css")'
|
||||
|
||||
>>> _test_css("@import url(/url.css)\n@import url(/anotherurl.css)\n @import url(/and_a_third.css)")
|
||||
'@import url(/web/20131010/http://example.com/url.css)\n@import url(/web/20131010/http://example.com/anotherurl.css)\n @import url(/web/20131010/http://example.com/and_a_third.css)'
|
||||
'@import url(/web/20131010cs_/http://example.com/url.css)\n@import url(/web/20131010cs_/http://example.com/anotherurl.css)\n @import url(/web/20131010cs_/http://example.com/and_a_third.css)'
|
||||
|
||||
"""
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user