2014-03-17 15:30:45 -07:00
r """
2014-02-17 02:34:39 -08:00
#=================================================================
# Custom Regex
2014-03-17 15:30:45 -07:00
#=================================================================
2014-02-17 02:34:39 -08:00
# Test https->http converter (other tests below in subclasses)
>> > RegexRewriter ( [ ( RegexRewriter . HTTPX_MATCH_STR , RegexRewriter . remove_https , 0 ) ] ) . rewrite ( ' a = https://example.com; b = http://example.com; c = https://some-url/path/https://embedded.example.com ' )
' a = http://example.com; b = http://example.com; c = http://some-url/path/http://embedded.example.com '
#=================================================================
# JS Rewriting
#=================================================================
>> > _test_js ( ' location = " http://example.com/abc.html " ' )
2014-10-18 11:21:07 -07:00
' WB_wombat_location = " /web/20131010/http://example.com/abc.html " '
2014-02-17 02:34:39 -08:00
>> > _test_js ( r ' location = " http: \ / \ /example.com/abc.html " ' )
2014-10-18 11:21:07 -07:00
' WB_wombat_location = " /web/20131010/http: \\ / \\ /example.com/abc.html " '
2014-02-17 02:34:39 -08:00
>> > _test_js ( r ' location = " http: \\ / \\ /example.com/abc.html " ' )
2014-10-18 11:21:07 -07:00
' WB_wombat_location = " /web/20131010/http: \\ \\ / \\ \\ /example.com/abc.html " '
2014-02-17 02:34:39 -08:00
>> > _test_js ( r " location = ' http://example.com/abc.html/ ' " )
2014-10-18 11:21:07 -07:00
" WB_wombat_location = ' /web/20131010/http://example.com/abc.html/ ' "
2014-02-17 02:34:39 -08:00
>> > _test_js ( r ' location = http://example.com/abc.html/ ' )
' WB_wombat_location = http://example.com/abc.html/ '
# not rewritten -- to be handled on client side
>> > _test_js ( r ' location = " /abc.html " ' )
' WB_wombat_location = " /abc.html " '
>> > _test_js ( r ' location = /http: \ / \ /example.com/abc.html/ ' )
' WB_wombat_location = /http: \\ / \\ /example.com/abc.html/ '
>> > _test_js ( ' " /location " == some_location_val; locations = location; ' )
' " /location " == some_location_val; locations = WB_wombat_location; '
>> > _test_js ( ' cool_Location = " http://example.com/abc.html " ' )
2014-10-18 11:21:07 -07:00
' cool_Location = " /web/20131010/http://example.com/abc.html " '
2014-02-17 02:34:39 -08:00
>> > _test_js ( ' window.location = " http://example.com/abc.html " document.domain = " anotherdomain.com " ' )
2014-10-18 11:21:07 -07:00
' window.WB_wombat_location = " /web/20131010/http://example.com/abc.html " document.WB_wombat_domain = " anotherdomain.com " '
2014-02-17 02:34:39 -08:00
>> > _test_js ( ' document_domain = " anotherdomain.com " ; window.document.domain = " example.com " ' )
' document_domain = " anotherdomain.com " ; window.document.WB_wombat_domain = " example.com " '
2014-11-23 18:56:49 -08:00
# protocol-rel escapes
>> > _test_js ( ' " //example.com/ " ' )
' " /web/20131010/http://example.com/ " '
>> > _test_js ( r ' " \ / \ /example.com/ " ' )
' " /web/20131010/http: \\ / \\ /example.com/ " '
>> > _test_js ( r ' " \\ / \\ /example.com/ " ' )
' " /web/20131010/http: \\ \\ / \\ \\ /example.com/ " '
2014-02-17 02:34:39 -08:00
# custom rules added
2014-02-26 18:02:01 -08:00
>> > _test_js ( ' window.location = " http://example.com/abc.html " ; some_func(); ' , [ ( ' some_func \ ( \ ).* ' , RegexRewriter . format ( ' /* {0} */ ' ) , 0 ) ] )
2014-10-18 11:21:07 -07:00
' window.WB_wombat_location = " /web/20131010/http://example.com/abc.html " ; /*some_func(); */ '
2014-02-17 02:34:39 -08:00
# scheme-agnostic
>> > _test_js ( ' cool_Location = " //example.com/abc.html " //comment ' )
2014-10-18 11:21:07 -07:00
' cool_Location = " /web/20131010/http://example.com/abc.html " //comment '
2014-02-17 02:34:39 -08:00
2014-07-15 12:57:02 -07:00
# document.cookie test
>> > _test_js ( ' document.cookie = " a=b; Path=/ " ' )
' document.WB_wombat_cookie = " a=b; Path=/ " '
2014-08-05 01:47:52 -07:00
# js-escaped
>> > _test_js ( ' "http: \\ / \\ /www.example.com \\ /some \\ /path \\ /?query=1" ' )
2014-10-18 11:21:07 -07:00
' "/web/20131010/http: \\ / \\ /www.example.com \\ /some \\ /path \\ /?query=1" '
2014-08-05 01:47:52 -07:00
2014-11-01 15:39:51 -07:00
>> > _test_js ( ' " http: \ / \ /sub-site.example.com \ /path-dashes \ /path_other \ /foo_bar.txt " ' )
' " /web/20131010/http: \\ / \\ /sub-site.example.com \\ /path-dashes \\ /path_other \\ /foo_bar.txt " '
2014-02-17 02:34:39 -08:00
#=================================================================
# XML Rewriting
#=================================================================
>> > _test_xml ( ' <tag xmlns= " http://www.example.com/ns " attr= " http://example.com " ></tag> ' )
2014-10-18 11:21:07 -07:00
' <tag xmlns= " http://www.example.com/ns " attr= " /web/20131010/http://example.com " ></tag> '
2014-02-17 02:34:39 -08:00
>> > _test_xml ( ' <tag xmlns:xsi= " http://www.example.com/ns " attr= " http://example.com " ></tag> ' )
2014-10-18 11:21:07 -07:00
' <tag xmlns:xsi= " http://www.example.com/ns " attr= " /web/20131010/http://example.com " ></tag> '
2014-02-17 02:34:39 -08:00
>> > _test_xml ( ' <tag> http://example.com<other>abchttp://example.com</other></tag> ' )
2014-10-18 11:21:07 -07:00
' <tag> /web/20131010/http://example.com<other>abchttp://example.com</other></tag> '
2014-02-17 02:34:39 -08:00
>> > _test_xml ( ' <main> http://www.example.com/blah</tag> <other xmlns:abcdef= " http://example.com " /> http://example.com </main> ' )
2014-10-18 11:21:07 -07:00
' <main> /web/20131010/http://www.example.com/blah</tag> <other xmlns:abcdef= " http://example.com " /> /web/20131010/http://example.com </main> '
2014-02-17 02:34:39 -08:00
#=================================================================
# CSS Rewriting
#=================================================================
>> > _test_css ( " background: url( ' /some/path.html ' ) " )
2014-10-18 11:21:07 -07:00
" background: url( ' /web/20131010/http://example.com/some/path.html ' ) "
2014-02-17 02:34:39 -08:00
>> > _test_css ( " background: url( ' ../path.html ' ) " )
2014-10-18 11:21:07 -07:00
" background: url( ' /web/20131010/http://example.com/path.html ' ) "
2014-02-17 02:34:39 -08:00
>> > _test_css ( " background: url( \" http://domain.com/path.html \" ) " )
2014-10-18 11:21:07 -07:00
' background: url( " /web/20131010/http://domain.com/path.html " ) '
2014-02-17 02:34:39 -08:00
>> > _test_css ( " background: url(file.jpeg) " )
2014-10-18 11:21:07 -07:00
' background: url(/web/20131010/http://example.com/file.jpeg) '
2014-02-17 02:34:39 -08:00
2014-07-14 19:13:19 -07:00
>> > _test_css ( " background:#abc url( ' /static/styles/../images/layout/logo.png ' ) " )
2014-10-18 11:21:07 -07:00
" background:#abc url( ' /web/20131010/http://example.com/static/images/layout/logo.png ' ) "
2014-07-14 19:13:19 -07:00
2014-07-14 20:50:45 -07:00
>> > _test_css ( " background:#000 url( ' /static/styles/../../images/layout/logo.png ' ) " )
2014-10-18 11:21:07 -07:00
" background:#000 url( ' /web/20131010/http://example.com/images/layout/logo.png ' ) "
2014-07-14 20:50:45 -07:00
2014-02-17 02:34:39 -08:00
>> > _test_css ( " background: url( ' ' ) " )
" background: url( ' ' ) "
>> > _test_css ( " background: url ( \" weirdpath \' ) " )
2014-10-18 11:21:07 -07:00
' background: url ( " /web/20131010/http://example.com/weirdpath \' ) '
2014-02-17 02:34:39 -08:00
>> > _test_css ( " @import url ( ' path.css ' ) " )
2014-10-18 11:21:07 -07:00
" @import url ( ' /web/20131010/http://example.com/path.css ' ) "
2014-02-17 02:34:39 -08:00
>> > _test_css ( " @import url( ' path.css ' ) " )
2014-10-18 11:21:07 -07:00
" @import url( ' /web/20131010/http://example.com/path.css ' ) "
2014-02-17 02:34:39 -08:00
>> > _test_css ( " @import ( ' path.css ' ) " )
2014-10-18 11:21:07 -07:00
" @import ( ' /web/20131010/http://example.com/path.css ' ) "
2014-02-17 02:34:39 -08:00
>> > _test_css ( " @import \" path.css \" " )
2014-10-18 11:21:07 -07:00
' @import " /web/20131010/http://example.com/path.css " '
2014-02-17 02:34:39 -08:00
>> > _test_css ( " @import ( ' ../path.css \" " )
2014-10-18 11:21:07 -07:00
' @import ( \' /web/20131010/http://example.com/path.css " '
2014-02-17 02:34:39 -08:00
>> > _test_css ( " @import ( ' ../url.css \" " )
2014-10-18 11:21:07 -07:00
' @import ( \' /web/20131010/http://example.com/url.css " '
2014-02-17 02:34:39 -08:00
>> > _test_css ( " @import ( \" url.css \" ) " )
2014-10-18 11:21:07 -07:00
' @import ( " /web/20131010/http://example.com/url.css " ) '
2014-02-17 02:34:39 -08:00
>> > _test_css ( " @import url(/url.css) \n @import url(/anotherurl.css) \n @import url(/and_a_third.css) " )
2014-10-18 11:21:07 -07:00
' @import url(/web/20131010/http://example.com/url.css) \n @import url(/web/20131010/http://example.com/anotherurl.css) \n @import url(/web/20131010/http://example.com/and_a_third.css) '
2014-02-17 02:34:39 -08:00
"""
2014-05-13 17:07:41 -07:00
2014-02-17 02:34:39 -08:00
#=================================================================
from pywb . rewrite . url_rewriter import UrlRewriter
from pywb . rewrite . regex_rewriters import RegexRewriter , JSRewriter , CSSRewriter , XMLRewriter
2014-03-17 19:36:25 -07:00
urlrewriter = UrlRewriter ( ' 20131010/http://example.com/ ' , ' /web/ ' )
2014-02-17 02:34:39 -08:00
def _test_js ( string , extra = [ ] ) :
2014-03-17 15:30:45 -07:00
return JSRewriter ( urlrewriter , extra ) . rewrite ( string )
2014-02-17 02:34:39 -08:00
def _test_xml ( string ) :
2014-03-17 15:30:45 -07:00
return XMLRewriter ( urlrewriter ) . rewrite ( string )
2014-02-17 02:34:39 -08:00
def _test_css ( string ) :
2014-03-17 15:30:45 -07:00
return CSSRewriter ( urlrewriter ) . rewrite ( string )
2014-02-17 02:34:39 -08:00
if __name__ == " __main__ " :
import doctest
doctest . testmod ( )