diff --git a/pywb/framework/wbrequestresponse.py b/pywb/framework/wbrequestresponse.py
index ba1f6a02..11fd99db 100644
--- a/pywb/framework/wbrequestresponse.py
+++ b/pywb/framework/wbrequestresponse.py
@@ -87,17 +87,6 @@ class WbRequest(object):
self._parse_extra()
- @property
- def is_embed(self):
- return (self.wb_url and
- self.wb_url.mod and
- self.wb_url.mod != 'id_')
-
- @property
- def is_identity(self):
- return (self.wb_url and
- self.wb_url.mod == 'id_')
-
def _is_ajax(self):
value = self.env.get('HTTP_X_REQUESTED_WITH')
if value and value.lower() == 'xmlhttprequest':
diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py
index 5a10d651..99cab8d0 100644
--- a/pywb/rewrite/html_rewriter.py
+++ b/pywb/rewrite/html_rewriter.py
@@ -19,35 +19,40 @@ class HTMLRewriterMixin(object):
to rewriters for script and css
"""
- REWRITE_TAGS = {
- 'a': {'href': ''},
- 'applet': {'codebase': 'oe_',
- 'archive': 'oe_'},
- 'area': {'href': ''},
- 'base': {'href': ''},
- 'blockquote': {'cite': ''},
- 'body': {'background': 'im_'},
- 'del': {'cite': ''},
- 'embed': {'src': 'oe_'},
- 'head': {'': ''}, # for head rewriting
- 'iframe': {'src': 'if_'},
- 'img': {'src': 'im_'},
- 'ins': {'cite': ''},
- 'input': {'src': 'im_'},
- 'form': {'action': ''},
- 'frame': {'src': 'fr_'},
- 'link': {'href': 'oe_'},
- 'meta': {'content': ''},
- 'object': {'codebase': 'oe_',
- 'data': 'oe_'},
- 'q': {'cite': ''},
- 'ref': {'href': 'oe_'},
- 'script': {'src': 'js_'},
- 'div': {'data-src': '',
- 'data-uri': ''},
- 'li': {'data-src': '',
- 'data-uri': ''},
- }
+ @staticmethod
+ def _init_rewrite_tags(defmod):
+ rewrite_tags = {
+ 'a': {'href': defmod},
+ 'applet': {'codebase': 'oe_',
+ 'archive': 'oe_'},
+ 'area': {'href': defmod},
+ 'base': {'href': defmod},
+ 'blockquote': {'cite': defmod},
+ 'body': {'background': 'im_'},
+ 'del': {'cite': defmod},
+ 'embed': {'src': 'oe_'},
+ 'head': {'': defmod}, # for head rewriting
+ 'iframe': {'src': 'if_'},
+ 'img': {'src': 'im_'},
+ 'ins': {'cite': defmod},
+ 'input': {'src': 'im_'},
+ 'form': {'action': defmod},
+ 'frame': {'src': 'fr_'},
+ 'link': {'href': 'oe_'},
+ 'meta': {'content': defmod},
+ 'object': {'codebase': 'oe_',
+ 'data': 'oe_'},
+ 'q': {'cite': defmod},
+ 'ref': {'href': 'oe_'},
+ 'script': {'src': 'js_'},
+ 'div': {'data-src': defmod,
+ 'data-uri': defmod},
+ 'li': {'data-src': defmod,
+ 'data-uri': defmod},
+ }
+
+ return rewrite_tags
+
STATE_TAGS = ['script', 'style']
@@ -70,7 +75,8 @@ class HTMLRewriterMixin(object):
def __init__(self, url_rewriter,
head_insert=None,
js_rewriter_class=JSRewriter,
- css_rewriter_class=CSSRewriter):
+ css_rewriter_class=CSSRewriter,
+ defmod=''):
self.url_rewriter = url_rewriter
self._wb_parse_context = None
@@ -79,6 +85,7 @@ class HTMLRewriterMixin(object):
self.css_rewriter = css_rewriter_class(url_rewriter)
self.head_insert = head_insert
+ self.rewrite_tags = self._init_rewrite_tags(defmod)
# ===========================
META_REFRESH_REGEX = re.compile('^[\\d.]+\\s*;\\s*url\\s*=\\s*(.+?)\\s*$',
@@ -140,9 +147,9 @@ class HTMLRewriterMixin(object):
self.head_insert = None
# attr rewriting
- handler = self.REWRITE_TAGS.get(tag)
+ handler = self.rewrite_tags.get(tag)
if not handler:
- handler = self.REWRITE_TAGS.get('')
+ handler = self.rewrite_tags.get('')
if not handler:
return False
@@ -245,16 +252,9 @@ class HTMLRewriterMixin(object):
#=================================================================
class HTMLRewriter(HTMLRewriterMixin, HTMLParser):
- def __init__(self, url_rewriter,
- head_insert=None,
- js_rewriter_class=JSRewriter,
- css_rewriter_class=CSSRewriter):
-
+ def __init__(self, *args, **kwargs):
HTMLParser.__init__(self)
- super(HTMLRewriter, self).__init__(url_rewriter,
- head_insert,
- js_rewriter_class,
- css_rewriter_class)
+ super(HTMLRewriter, self).__init__(*args, **kwargs)
def feed(self, string):
try:
diff --git a/pywb/rewrite/lxml_html_rewriter.py b/pywb/rewrite/lxml_html_rewriter.py
index abf28fc4..29355be4 100644
--- a/pywb/rewrite/lxml_html_rewriter.py
+++ b/pywb/rewrite/lxml_html_rewriter.py
@@ -17,15 +17,8 @@ from html_rewriter import HTMLRewriterMixin
class LXMLHTMLRewriter(HTMLRewriterMixin):
END_HTML = re.compile(r'\s*html\s*>', re.IGNORECASE)
- def __init__(self, url_rewriter,
- head_insert=None,
- js_rewriter_class=JSRewriter,
- css_rewriter_class=CSSRewriter):
-
- super(LXMLHTMLRewriter, self).__init__(url_rewriter,
- head_insert,
- js_rewriter_class,
- css_rewriter_class)
+ def __init__(self, *args, **kwargs):
+ super(LXMLHTMLRewriter, self).__init__(*args, **kwargs)
self.target = RewriterTarget(self)
self.parser = lxml.etree.HTMLParser(remove_pis=False,
diff --git a/pywb/rewrite/rewrite_content.py b/pywb/rewrite/rewrite_content.py
index c2d17047..ae0ef70d 100644
--- a/pywb/rewrite/rewrite_content.py
+++ b/pywb/rewrite/rewrite_content.py
@@ -16,10 +16,11 @@ from pywb.utils.bufferedreaders import ChunkedDataReader
#=================================================================
class RewriteContent:
- def __init__(self, ds_rules_file=None):
+ def __init__(self, ds_rules_file=None, defmod=''):
self.ruleset = RuleSet(RewriteRules, 'rewrite',
default_rule_config={},
ds_rules_file=ds_rules_file)
+ self.defmod = defmod
def sanitize_content(self, status_headers, stream):
# remove transfer encoding chunked and wrap in a dechunking stream
@@ -111,7 +112,8 @@ class RewriteContent:
rewriter = rewriter_class(urlrewriter,
js_rewriter_class=rule.rewriters['js'],
css_rewriter_class=rule.rewriters['css'],
- head_insert=head_insert_str)
+ head_insert=head_insert_str,
+ defmod=self.defmod)
else:
# apply one of (js, css, xml) rewriters
diff --git a/pywb/rewrite/wburl.py b/pywb/rewrite/wburl.py
index 67bab4fb..982743ae 100644
--- a/pywb/rewrite/wburl.py
+++ b/pywb/rewrite/wburl.py
@@ -194,6 +194,21 @@ class WbUrl(BaseWbUrl):
else:
return url
+ @property
+ def is_mainpage(self):
+ return (not self.mod or
+ self.mod == 'mp_')
+
+ @property
+ def is_embed(self):
+ return (self.mod and
+ self.mod != 'id_' and
+ self.mod != 'mp_')
+
+ @property
+ def is_identity(self):
+ return (self.mod == 'id_')
+
def __str__(self):
return self.to_str()
diff --git a/pywb/ui/frame_insert.html b/pywb/ui/frame_insert.html
index fd772251..71ddbd31 100644
--- a/pywb/ui/frame_insert.html
+++ b/pywb/ui/frame_insert.html
@@ -15,9 +15,9 @@ window.addEventListener("message", update_url, false);
function push_state(url) {
state = {}
- state.inner_url = wbinfo.prefix + url;
- state.outer_url = wbinfo.prefix + "fr_/" + url;
-
+ state.outer_url = wbinfo.prefix + url;
+ state.inner_url = wbinfo.prefix + "mp_/" + url;
+
if (url == wbinfo.capture_url) {
return;
}
@@ -30,6 +30,7 @@ function pop_state(url) {
}
function update_url(event) {
+ console.log(event);
if (event.source == window.frames[0]) {
push_state(event.data);
}
diff --git a/pywb/ui/head_insert.html b/pywb/ui/head_insert.html
index 19c14fa0..9b6f3de7 100644
--- a/pywb/ui/head_insert.html
+++ b/pywb/ui/head_insert.html
@@ -11,7 +11,7 @@
diff --git a/pywb/webapp/pywb_init.py b/pywb/webapp/pywb_init.py
index 337c48c6..cb0edee4 100644
--- a/pywb/webapp/pywb_init.py
+++ b/pywb/webapp/pywb_init.py
@@ -11,7 +11,9 @@ from pywb.warc.resolvingloader import ResolvingLoader
from pywb.rewrite.rewrite_content import RewriteContent
from pywb.rewrite.rewriterules import use_lxml_parser
-from views import load_template_file, load_query_template, add_env_globals
+from views import J2TemplateView, add_env_globals
+from views import J2HtmlCapturesView, HeadInsertView
+
from replay_views import ReplayView
from query_handler import QueryHandler
@@ -78,8 +80,9 @@ def create_wb_handler(query_handler, config,
if template_globals:
add_env_globals(template_globals)
- head_insert_view = load_template_file(config.get('head_insert_html'),
- 'Head Insert')
+ head_insert_view = (HeadInsertView.
+ create_template(config.get('head_insert_html'),
+ 'Head Insert'))
replayer = ReplayView(
content_loader=resolving_loader,
@@ -97,8 +100,9 @@ def create_wb_handler(query_handler, config,
reporter=config.get('reporter')
)
- search_view = load_template_file(config.get('search_html'),
- 'Search Page')
+ search_view = (J2TemplateView.
+ create_template(config.get('search_html'),
+ 'Search Page'))
wb_handler_class = config.get('wb_handler_class', WBHandler)
@@ -120,8 +124,9 @@ def init_collection(value, config):
ds_rules_file = route_config.get('domain_specific_rules', None)
- html_view = load_query_template(config.get('query_html'),
- 'Captures Page')
+ html_view = (J2HtmlCapturesView.
+ create_template(config.get('query_html'),
+ 'Captures Page'))
query_handler = QueryHandler.init_from_config(route_config,
ds_rules_file,
@@ -247,9 +252,9 @@ def create_wb_router(passed_config={}):
abs_path=config.get('absolute_paths', True),
- home_view=load_template_file(config.get('home_html'),
- 'Home Page'),
+ home_view=J2TemplateView.create_template(config.get('home_html'),
+ 'Home Page'),
- error_view=load_template_file(config.get('error_html'),
- 'Error Page')
+ error_view=J2TemplateView.create_template(config.get('error_html'),
+ 'Error Page')
)
diff --git a/pywb/webapp/replay_views.py b/pywb/webapp/replay_views.py
index 31fe4b57..c45b5983 100644
--- a/pywb/webapp/replay_views.py
+++ b/pywb/webapp/replay_views.py
@@ -113,7 +113,10 @@ class ReplayView(object):
urlrewriter = wbrequest.urlrewriter
- head_insert_func = self.get_head_insert_func(wbrequest, cdx)
+ head_insert_func = None
+ if self.head_insert_view:
+ head_insert_func = self.head_insert_view.create_insert_func(wbrequest,
+ cdx)
result = (self.content_rewriter.
rewrite_content(urlrewriter,
@@ -121,7 +124,7 @@ class ReplayView(object):
stream=stream,
head_insert_func=head_insert_func,
urlkey=cdx['urlkey'],
- sanitize_only=wbrequest.is_identity))
+ sanitize_only=wbrequest.wb_url.is_identity))
(status_headers, response_iter, is_rewritten) = result
@@ -141,18 +144,6 @@ class ReplayView(object):
return response
- def get_head_insert_func(self, wbrequest, cdx):
- # no head insert specified
- if not self.head_insert_view:
- return None
-
- def make_head_insert(rule):
- return (self.head_insert_view.
- render_to_string(wbrequest=wbrequest,
- cdx=cdx,
- rule=rule))
- return make_head_insert
-
# Buffer rewrite iterator and return a response from a string
def buffered_response(self, status_headers, iterator):
out = BytesIO()
@@ -207,7 +198,7 @@ class ReplayView(object):
# skip all 304s
if (status_headers.statusline.startswith('304') and
- not wbrequest.is_identity):
+ not wbrequest.wb_url.is_identity):
raise CaptureException('Skipping 304 Modified: ' + str(cdx))
diff --git a/pywb/webapp/rewrite_handler.py b/pywb/webapp/rewrite_handler.py
index 07f6644c..894aae39 100644
--- a/pywb/webapp/rewrite_handler.py
+++ b/pywb/webapp/rewrite_handler.py
@@ -13,22 +13,24 @@ from pywb.utils.statusandheaders import StatusAndHeaders
from pywb.rewrite.rewriterules import use_lxml_parser
import datetime
-#import urllib2
-import urlparse
-import httplib
import requests
from io import BytesIO, BufferedReader
-from views import load_template_file
+from views import J2TemplateView, HeadInsertView
class RewriteHandler(WbUrlHandler): # pragma: no cover
def __init__(self, head_insert_view=None):
#use_lxml_parser()
- self.rewriter = RewriteContent()
- self.head_insert_view = load_template_file('ui/head_insert.html', 'Head Insert')
- self.frame_insert_view = load_template_file('ui/frame_insert.html', 'Frame Insert')
+ self.rewriter = RewriteContent(defmod='mp_')
+ self.head_insert_view = (HeadInsertView.
+ create_template('ui/head_insert.html',
+ 'Head Insert'))
+
+ self.frame_insert_view = (J2TemplateView.
+ create_template('ui/frame_insert.html',
+ 'Frame Insert'))
def proxy_request(self, url, env):
@@ -76,36 +78,12 @@ class RewriteHandler(WbUrlHandler): # pragma: no cover
stream=True)
return req
- def do_request(self, method, url, data, req_headers):
- splits = urlparse.urlsplit(url)
-
- hostport = splits.netloc.split(':', 1)
- host = hostport[0]
-
- if len(hostport) == 2:
- port = hostport[1]
- else:
- port = None
-
- path = splits.path
-
- if splits.query:
- path += '?' + splits.query
-
- if splits.scheme == 'https':
- conn = httplib.HTTPSConnection(host, port)
- else:
- conn = httplib.HTTPConnection(host, port)
-
- conn.request(method.upper(), path, data, req_headers)
- return conn.getresponse()
-
def __call__(self, wbrequest):
url = wbrequest.wb_url.url
- if wbrequest.wb_url.mod == 'fr_':
- embed_url = wbrequest.wb_url.to_str(mod='')
+ if not wbrequest.wb_url.mod:
+ embed_url = wbrequest.wb_url.to_str(mod='mp_')
timestamp = datetime_to_timestamp(datetime.datetime.utcnow())
return self.frame_insert_view.render_response(embed_url=embed_url,
@@ -133,7 +111,9 @@ class RewriteHandler(WbUrlHandler): # pragma: no cover
}
- head_insert_func = self.get_head_insert_func(wbrequest, cdx)
+ #head_insert_func = self.get_head_insert_func(wbrequest, cdx)
+ head_insert_func = self.head_insert_view.create_insert_func(wbrequest,
+ cdx)
result = self.rewriter.rewrite_content(wbrequest.urlrewriter,
status_headers,
diff --git a/pywb/webapp/views.py b/pywb/webapp/views.py
index c452d0e0..9aedc230 100644
--- a/pywb/webapp/views.py
+++ b/pywb/webapp/views.py
@@ -101,6 +101,14 @@ class J2TemplateView:
status=status,
content_type=content_type)
+ @staticmethod
+ def create_template(filename, desc='', view_class=None):
+ if not view_class:
+ view_class = J2TemplateView
+
+ logging.debug('Adding {0}: {1}'.format(desc, filename))
+ return view_class(filename)
+
#=================================================================
def add_env_globals(glb):
@@ -108,17 +116,18 @@ def add_env_globals(glb):
#=================================================================
-def load_template_file(file, desc=None, view_class=J2TemplateView):
- if file:
- logging.debug('Adding {0}: {1}'.format(desc if desc else name, file))
- file = view_class(file)
+class HeadInsertView(J2TemplateView):
+ def create_insert_func(self, wbrequest, cdx):
+ def make_head_insert(rule):
+ return (self.render_to_string(wbrequest=wbrequest,
+ cdx=cdx,
+ rule=rule))
+ return make_head_insert
- return file
-
-
-#=================================================================
-def load_query_template(file, desc=None):
- return load_template_file(file, desc, J2HtmlCapturesView)
+ @staticmethod
+ def create_template(filename, desc=''):
+ return J2TemplateView.create_template(filename, desc,
+ HeadInsertView)
#=================================================================
@@ -132,6 +141,11 @@ class J2HtmlCapturesView(J2TemplateView):
type=wbrequest.wb_url.type,
prefix=wbrequest.wb_prefix)
+ @staticmethod
+ def create_template(filename, desc=''):
+ return J2TemplateView.create_template(filename, desc,
+ J2HtmlCapturesView)
+
#=================================================================
class MementoTimemapView(object):