mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
Merge branch 'develop' into https-proxy
This commit is contained in:
commit
a2d86fa495
@ -179,13 +179,6 @@ class WbResponse(object):
|
|||||||
return WbResponse(StatusAndHeaders(status, redir_headers))
|
return WbResponse(StatusAndHeaders(status, redir_headers))
|
||||||
|
|
||||||
def __call__(self, env, start_response):
|
def __call__(self, env, start_response):
|
||||||
|
|
||||||
# PERF
|
|
||||||
perfstats = env.get('X_PERF')
|
|
||||||
if perfstats:
|
|
||||||
self.status_headers.headers.append(('X-Archive-Perf-Stats',
|
|
||||||
str(perfstats)))
|
|
||||||
|
|
||||||
start_response(self.status_headers.statusline,
|
start_response(self.status_headers.statusline,
|
||||||
self.status_headers.headers)
|
self.status_headers.headers)
|
||||||
|
|
||||||
|
@ -101,12 +101,9 @@ class HTMLRewriterMixin(object):
|
|||||||
if not m:
|
if not m:
|
||||||
return meta_refresh
|
return meta_refresh
|
||||||
|
|
||||||
try:
|
meta_refresh = (meta_refresh[:m.start(1)] +
|
||||||
meta_refresh = (meta_refresh[:m.start(1)] +
|
self._rewrite_url(m.group(1)) +
|
||||||
self._rewrite_url(m.group(1)) +
|
meta_refresh[m.end(1):])
|
||||||
meta_refresh[m.end(1):])
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return meta_refresh
|
return meta_refresh
|
||||||
# ===========================
|
# ===========================
|
||||||
@ -136,7 +133,7 @@ class HTMLRewriterMixin(object):
|
|||||||
return value.lower() == attr_value.lower()
|
return value.lower() == attr_value.lower()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _rewrite_tag_attrs(self, tag, tag_attrs, escape=False):
|
def _rewrite_tag_attrs(self, tag, tag_attrs):
|
||||||
# special case: script or style parse context
|
# special case: script or style parse context
|
||||||
if ((tag in self.STATE_TAGS) and not self._wb_parse_context):
|
if ((tag in self.STATE_TAGS) and not self._wb_parse_context):
|
||||||
self._wb_parse_context = tag
|
self._wb_parse_context = tag
|
||||||
@ -197,7 +194,7 @@ class HTMLRewriterMixin(object):
|
|||||||
rebase_rewriter(attr_value))
|
rebase_rewriter(attr_value))
|
||||||
|
|
||||||
# write the attr!
|
# write the attr!
|
||||||
self._write_attr(attr_name, attr_value, escape=escape)
|
self._write_attr(attr_name, attr_value)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@ -217,12 +214,10 @@ class HTMLRewriterMixin(object):
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _write_attr(self, name, value, escape=False):
|
def _write_attr(self, name, value):
|
||||||
# parser doesn't differentiate between 'attr=""' and just 'attr'
|
# parser doesn't differentiate between 'attr=""' and just 'attr'
|
||||||
# 'attr=""' is more common, so use that form
|
# 'attr=""' is more common, so use that form
|
||||||
if value:
|
if value:
|
||||||
if escape:
|
|
||||||
value = cgi.escape(value, quote=True)
|
|
||||||
self.out.write(' ' + name + '="' + value + '"')
|
self.out.write(' ' + name + '="' + value + '"')
|
||||||
else:
|
else:
|
||||||
self.out.write(' ' + name + '=""')
|
self.out.write(' ' + name + '=""')
|
||||||
@ -259,8 +254,8 @@ class HTMLRewriterMixin(object):
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _internal_close(self):
|
def _internal_close(self): # pragma: no cover
|
||||||
pass
|
raise NotImplementedError('Base method')
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
@ -272,7 +267,8 @@ class HTMLRewriter(HTMLRewriterMixin, HTMLParser):
|
|||||||
def feed(self, string):
|
def feed(self, string):
|
||||||
try:
|
try:
|
||||||
HTMLParser.feed(self, string)
|
HTMLParser.feed(self, string)
|
||||||
except HTMLParseError:
|
except HTMLParseError: # pragma: no cover
|
||||||
|
# only raised in 2.6
|
||||||
self.out.write(string)
|
self.out.write(string)
|
||||||
|
|
||||||
def _internal_close(self):
|
def _internal_close(self):
|
||||||
@ -283,7 +279,8 @@ class HTMLRewriter(HTMLRewriterMixin, HTMLParser):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
HTMLParser.close(self)
|
HTMLParser.close(self)
|
||||||
except HTMLParseError:
|
except HTMLParseError: # pragma: no cover
|
||||||
|
# only raised in 2.6
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# called to unescape attrs -- do not unescape!
|
# called to unescape attrs -- do not unescape!
|
||||||
|
@ -58,10 +58,12 @@ class RewriteContent:
|
|||||||
|
|
||||||
return (rewritten_headers, stream)
|
return (rewritten_headers, stream)
|
||||||
|
|
||||||
def rewrite_content(self, wb_url, urlrewriter, headers, stream,
|
def rewrite_content(self, urlrewriter, headers, stream,
|
||||||
head_insert_func=None, urlkey='',
|
head_insert_func=None, urlkey='',
|
||||||
cdx=None):
|
cdx=None):
|
||||||
|
|
||||||
|
wb_url = urlrewriter.wburl
|
||||||
|
|
||||||
if (wb_url.is_identity or
|
if (wb_url.is_identity or
|
||||||
(not head_insert_func and wb_url.is_banner_only)):
|
(not head_insert_func and wb_url.is_banner_only)):
|
||||||
status_headers, stream = self.sanitize_content(headers, stream)
|
status_headers, stream = self.sanitize_content(headers, stream)
|
||||||
@ -109,16 +111,6 @@ class RewriteContent:
|
|||||||
else:
|
else:
|
||||||
stream = DecompressingBufferedReader(stream)
|
stream = DecompressingBufferedReader(stream)
|
||||||
|
|
||||||
#if self.decode_stream:
|
|
||||||
# if rewritten_headers.charset:
|
|
||||||
# encoding = rewritten_headers.charset
|
|
||||||
# else:
|
|
||||||
# (encoding, first_buff) = self._detect_charset(stream)
|
|
||||||
|
|
||||||
# if encoding not set or chardet thinks its ascii, use utf-8
|
|
||||||
# if not encoding or encoding == 'ascii':
|
|
||||||
# encoding = 'utf-8'
|
|
||||||
|
|
||||||
rule = self.ruleset.get_first_match(urlkey)
|
rule = self.ruleset.get_first_match(urlkey)
|
||||||
|
|
||||||
rewriter_class = rule.rewriters[text_type]
|
rewriter_class = rule.rewriters[text_type]
|
||||||
@ -149,8 +141,11 @@ class RewriteContent:
|
|||||||
rewriter = rewriter_class(urlrewriter)
|
rewriter = rewriter_class(urlrewriter)
|
||||||
|
|
||||||
# Create rewriting generator
|
# Create rewriting generator
|
||||||
gen = self._rewriting_stream_gen(rewriter, encoding,
|
gen = self.stream_to_gen(stream,
|
||||||
stream, first_buff)
|
rewrite_func=rewriter.rewrite,
|
||||||
|
final_read_func=rewriter.close,
|
||||||
|
first_buff=first_buff)
|
||||||
|
|
||||||
|
|
||||||
return (status_headers, gen, True)
|
return (status_headers, gen, True)
|
||||||
|
|
||||||
@ -179,32 +174,6 @@ class RewriteContent:
|
|||||||
for buff in self.stream_to_gen(stream):
|
for buff in self.stream_to_gen(stream):
|
||||||
yield buff
|
yield buff
|
||||||
|
|
||||||
|
|
||||||
# Create rewrite stream, may even be chunked by front-end
|
|
||||||
def _rewriting_stream_gen(self, rewriter, encoding,
|
|
||||||
stream, first_buff=None):
|
|
||||||
|
|
||||||
def do_rewrite(buff):
|
|
||||||
if encoding:
|
|
||||||
buff = self._decode_buff(buff, stream, encoding)
|
|
||||||
buff = rewriter.rewrite(buff)
|
|
||||||
if encoding:
|
|
||||||
buff = buff.encode(encoding)
|
|
||||||
|
|
||||||
return buff
|
|
||||||
|
|
||||||
def do_finish():
|
|
||||||
result = rewriter.close()
|
|
||||||
if encoding:
|
|
||||||
result = result.encode(encoding)
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
return self.stream_to_gen(stream,
|
|
||||||
rewrite_func=do_rewrite,
|
|
||||||
final_read_func=do_finish,
|
|
||||||
first_buff=first_buff)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _decode_buff(buff, stream, encoding): # pragma: no coverage
|
def _decode_buff(buff, stream, encoding): # pragma: no coverage
|
||||||
try:
|
try:
|
||||||
@ -223,26 +192,6 @@ class RewriteContent:
|
|||||||
|
|
||||||
return buff
|
return buff
|
||||||
|
|
||||||
def _detect_charset(self, stream): # pragma: no coverage
|
|
||||||
full_buff = stream.read(8192)
|
|
||||||
io_buff = BytesIO(full_buff)
|
|
||||||
|
|
||||||
detector = UniversalDetector()
|
|
||||||
|
|
||||||
try:
|
|
||||||
buff = io_buff.read(256)
|
|
||||||
while buff:
|
|
||||||
detector.feed(buff)
|
|
||||||
if detector.done:
|
|
||||||
break
|
|
||||||
|
|
||||||
buff = io_buff.read(256)
|
|
||||||
finally:
|
|
||||||
detector.close()
|
|
||||||
|
|
||||||
print "chardet result: ", str(detector.result)
|
|
||||||
return (detector.result['encoding'], full_buff)
|
|
||||||
|
|
||||||
# Create a generator reading from a stream,
|
# Create a generator reading from a stream,
|
||||||
# with optional rewriting and final read call
|
# with optional rewriting and final read call
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -9,7 +9,7 @@ import logging
|
|||||||
|
|
||||||
from urlparse import urlsplit
|
from urlparse import urlsplit
|
||||||
|
|
||||||
from pywb.utils.loaders import is_http, LimitReader
|
from pywb.utils.loaders import is_http, LimitReader, BlockLoader
|
||||||
from pywb.utils.timeutils import datetime_to_timestamp
|
from pywb.utils.timeutils import datetime_to_timestamp
|
||||||
from pywb.utils.statusandheaders import StatusAndHeaders
|
from pywb.utils.statusandheaders import StatusAndHeaders
|
||||||
from pywb.utils.canonicalize import canonicalize
|
from pywb.utils.canonicalize import canonicalize
|
||||||
@ -30,7 +30,8 @@ class LiveRewriter(object):
|
|||||||
logging.debug('Live Rewrite Direct (no proxy)')
|
logging.debug('Live Rewrite Direct (no proxy)')
|
||||||
|
|
||||||
def fetch_local_file(self, uri):
|
def fetch_local_file(self, uri):
|
||||||
fh = open(uri)
|
#fh = open(uri)
|
||||||
|
fh = BlockLoader().load_file_or_resource(uri)
|
||||||
|
|
||||||
content_type, _ = mimetypes.guess_type(uri)
|
content_type, _ = mimetypes.guess_type(uri)
|
||||||
|
|
||||||
@ -118,7 +119,7 @@ class LiveRewriter(object):
|
|||||||
|
|
||||||
return (status_headers, stream)
|
return (status_headers, stream)
|
||||||
|
|
||||||
def fetch_request(self, wb_url, urlrewriter,
|
def fetch_request(self, url, urlrewriter,
|
||||||
head_insert_func=None,
|
head_insert_func=None,
|
||||||
urlkey=None,
|
urlkey=None,
|
||||||
env=None,
|
env=None,
|
||||||
@ -127,15 +128,11 @@ class LiveRewriter(object):
|
|||||||
follow_redirects=False,
|
follow_redirects=False,
|
||||||
proxies=None):
|
proxies=None):
|
||||||
|
|
||||||
if isinstance(wb_url, str):
|
|
||||||
url = wb_url
|
|
||||||
wb_url = WbUrl(url)
|
|
||||||
else:
|
|
||||||
url = wb_url.url
|
|
||||||
|
|
||||||
ts_err = url.split('///')
|
ts_err = url.split('///')
|
||||||
|
|
||||||
if len(ts_err) > 1:
|
# fixup for accidental erroneous rewrite which has ///
|
||||||
|
# (unless file:///)
|
||||||
|
if len(ts_err) > 1 and ts_err[0] != 'file:':
|
||||||
url = 'http://' + ts_err[1]
|
url = 'http://' + ts_err[1]
|
||||||
|
|
||||||
if url.startswith('//'):
|
if url.startswith('//'):
|
||||||
@ -164,8 +161,7 @@ class LiveRewriter(object):
|
|||||||
}
|
}
|
||||||
|
|
||||||
result = (self.rewriter.
|
result = (self.rewriter.
|
||||||
rewrite_content(wb_url,
|
rewrite_content(urlrewriter,
|
||||||
urlrewriter,
|
|
||||||
status_headers,
|
status_headers,
|
||||||
stream,
|
stream,
|
||||||
head_insert_func=head_insert_func,
|
head_insert_func=head_insert_func,
|
||||||
|
@ -99,6 +99,7 @@ ur"""
|
|||||||
>>> parse('<link href="abc.txt"><div>SomeTest</div>', head_insert = '<script>load_stuff();</script>')
|
>>> parse('<link href="abc.txt"><div>SomeTest</div>', head_insert = '<script>load_stuff();</script>')
|
||||||
<link href="/web/20131226101010oe_/http://example.com/some/path/abc.txt"><script>load_stuff();</script><div>SomeTest</div>
|
<link href="/web/20131226101010oe_/http://example.com/some/path/abc.txt"><script>load_stuff();</script><div>SomeTest</div>
|
||||||
|
|
||||||
|
# doctype
|
||||||
>>> parse('<!doctype html PUBLIC "public">')
|
>>> parse('<!doctype html PUBLIC "public">')
|
||||||
<!doctype html PUBLIC "public">
|
<!doctype html PUBLIC "public">
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from pywb.rewrite.rewrite_live import LiveRewriter
|
from pywb.rewrite.rewrite_live import LiveRewriter
|
||||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||||
|
from pywb.rewrite.wburl import WbUrl
|
||||||
|
|
||||||
from pywb import get_test_dir
|
from pywb import get_test_dir
|
||||||
|
|
||||||
@ -9,6 +10,7 @@ from io import BytesIO
|
|||||||
# As such, the content may change and the test may break
|
# As such, the content may change and the test may break
|
||||||
|
|
||||||
urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/pywb/')
|
urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/pywb/')
|
||||||
|
bn_urlrewriter = UrlRewriter('20131226101010bn_/http://example.com/some/path/index.html', '/pywb/')
|
||||||
|
|
||||||
def head_insert_func(rule, cdx):
|
def head_insert_func(rule, cdx):
|
||||||
if rule.js_rewrite_location == True:
|
if rule.js_rewrite_location == True:
|
||||||
@ -33,6 +35,51 @@ def test_local_1():
|
|||||||
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
||||||
|
|
||||||
|
|
||||||
|
def test_local_no_head():
|
||||||
|
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head.html',
|
||||||
|
urlrewriter,
|
||||||
|
head_insert_func,
|
||||||
|
'com,example,test)/')
|
||||||
|
|
||||||
|
# wombat insert added
|
||||||
|
assert '<script src="/static/default/wombat.js"> </script>' in buff
|
||||||
|
|
||||||
|
# location rewritten
|
||||||
|
assert 'window.WB_wombat_location = "/other.html"' in buff
|
||||||
|
|
||||||
|
# link rewritten
|
||||||
|
assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff
|
||||||
|
|
||||||
|
def test_local_no_head_banner_only():
|
||||||
|
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head.html',
|
||||||
|
bn_urlrewriter,
|
||||||
|
head_insert_func,
|
||||||
|
'com,example,test)/')
|
||||||
|
|
||||||
|
# wombat insert added
|
||||||
|
assert '<script src="/static/default/wombat.js"> </script>' in buff
|
||||||
|
|
||||||
|
# location NOT rewritten
|
||||||
|
assert 'window.location = "/other.html"' in buff
|
||||||
|
|
||||||
|
# link NOT rewritten
|
||||||
|
assert '"another.html"' in buff
|
||||||
|
|
||||||
|
def test_local_banner_only():
|
||||||
|
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
|
||||||
|
bn_urlrewriter,
|
||||||
|
head_insert_func,
|
||||||
|
'com,example,test)/')
|
||||||
|
|
||||||
|
# wombat insert added
|
||||||
|
assert '<head><script src="/static/default/wombat.js"> </script>' in buff
|
||||||
|
|
||||||
|
# location NOT rewritten
|
||||||
|
assert 'window.location = "/other.html"' in buff
|
||||||
|
|
||||||
|
# link NOT rewritten
|
||||||
|
assert '"another.html"' in buff
|
||||||
|
|
||||||
def test_local_2_no_js_location_rewrite():
|
def test_local_2_no_js_location_rewrite():
|
||||||
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
|
status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html',
|
||||||
urlrewriter,
|
urlrewriter,
|
||||||
@ -76,8 +123,7 @@ def test_example_4_rewrite_err():
|
|||||||
assert status_headers.get_statuscode() == '200'
|
assert status_headers.get_statuscode() == '200'
|
||||||
|
|
||||||
def test_example_domain_specific_3():
|
def test_example_domain_specific_3():
|
||||||
urlrewriter2 = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/pywb/')
|
status_headers, buff = get_rewritten('http://facebook.com/digitalpreservation', urlrewriter, follow_redirects=True)
|
||||||
status_headers, buff = get_rewritten('http://facebook.com/digitalpreservation', urlrewriter2, follow_redirects=True)
|
|
||||||
|
|
||||||
# comment out bootloader
|
# comment out bootloader
|
||||||
assert '/* Bootloader.configurePage' in buff
|
assert '/* Bootloader.configurePage' in buff
|
||||||
|
@ -65,6 +65,9 @@
|
|||||||
>>> do_rewrite('mailto:example@example.com', '20131010/http://example.com/path/page.html', 'https://web.archive.org/web/')
|
>>> do_rewrite('mailto:example@example.com', '20131010/http://example.com/path/page.html', 'https://web.archive.org/web/')
|
||||||
'mailto:example@example.com'
|
'mailto:example@example.com'
|
||||||
|
|
||||||
|
>>> do_rewrite('file:///some/path/', '20131010/http://example.com/path/page.html', 'https://web.archive.org/web/')
|
||||||
|
'file:///some/path/'
|
||||||
|
|
||||||
>>> UrlRewriter('19960708im_/http://domain.example.com/path.txt', '/abc/').get_abs_url()
|
>>> UrlRewriter('19960708im_/http://domain.example.com/path.txt', '/abc/').get_abs_url()
|
||||||
'/abc/19960708im_/'
|
'/abc/19960708im_/'
|
||||||
|
|
||||||
@ -73,10 +76,10 @@
|
|||||||
|
|
||||||
|
|
||||||
# HttpsUrlRewriter tests
|
# HttpsUrlRewriter tests
|
||||||
>>> HttpsUrlRewriter(None, None).rewrite('https://example.com/abc')
|
>>> HttpsUrlRewriter('http://example.com/', None).rewrite('https://example.com/abc')
|
||||||
'http://example.com/abc'
|
'http://example.com/abc'
|
||||||
|
|
||||||
>>> HttpsUrlRewriter(None, None).rewrite('http://example.com/abc')
|
>>> HttpsUrlRewriter('http://example.com/', None).rewrite('http://example.com/abc')
|
||||||
'http://example.com/abc'
|
'http://example.com/abc'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
@ -13,7 +13,8 @@ class UrlRewriter(object):
|
|||||||
instance and an optional full path prefix
|
instance and an optional full path prefix
|
||||||
"""
|
"""
|
||||||
|
|
||||||
NO_REWRITE_URI_PREFIX = ['#', 'javascript:', 'data:', 'mailto:', 'about:']
|
NO_REWRITE_URI_PREFIX = ['#', 'javascript:', 'data:',
|
||||||
|
'mailto:', 'about:', 'file:']
|
||||||
|
|
||||||
PROTOCOLS = ['http:', 'https:', 'ftp:', 'mms:', 'rtsp:', 'wais:']
|
PROTOCOLS = ['http:', 'https:', 'ftp:', 'mms:', 'rtsp:', 'wais:']
|
||||||
|
|
||||||
@ -125,7 +126,7 @@ class UrlRewriter(object):
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class HttpsUrlRewriter(object):
|
class HttpsUrlRewriter(UrlRewriter):
|
||||||
"""
|
"""
|
||||||
A url rewriter which urls that start with https:// to http://
|
A url rewriter which urls that start with https:// to http://
|
||||||
Other urls/input is unchanged.
|
Other urls/input is unchanged.
|
||||||
@ -134,9 +135,6 @@ class HttpsUrlRewriter(object):
|
|||||||
HTTP = 'http://'
|
HTTP = 'http://'
|
||||||
HTTPS = 'https://'
|
HTTPS = 'https://'
|
||||||
|
|
||||||
def __init__(self, wburl, prefix, full_prefix=None):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def rewrite(self, url, mod=None):
|
def rewrite(self, url, mod=None):
|
||||||
if url.startswith(self.HTTPS):
|
if url.startswith(self.HTTPS):
|
||||||
result = self.HTTP + url[len(self.HTTPS):]
|
result = self.HTTP + url[len(self.HTTPS):]
|
||||||
|
@ -1,3 +1,28 @@
|
|||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<script>
|
||||||
|
function ts_to_date(ts, is_gmt)
|
||||||
|
{
|
||||||
|
if (ts.length < 14) {
|
||||||
|
return ts;
|
||||||
|
}
|
||||||
|
|
||||||
|
var datestr = (ts.substring(0, 4) + "-" +
|
||||||
|
ts.substring(4, 6) + "-" +
|
||||||
|
ts.substring(6, 8) + "T" +
|
||||||
|
ts.substring(8, 10) + ":" +
|
||||||
|
ts.substring(10, 12) + ":" +
|
||||||
|
ts.substring(12, 14) + "-00:00");
|
||||||
|
|
||||||
|
var date = new Date(datestr);
|
||||||
|
if (is_gmt) {
|
||||||
|
return date.toGMTString();
|
||||||
|
} else {
|
||||||
|
return date.toLocaleString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<h2>pywb Sample Calendar Results</h2>
|
<h2>pywb Sample Calendar Results</h2>
|
||||||
<b>{{ cdx_lines | length }}</b> captures of <b>{{ url }}</b>
|
<b>{{ cdx_lines | length }}</b> captures of <b>{{ url }}</b>
|
||||||
@ -10,7 +35,9 @@
|
|||||||
</tr>
|
</tr>
|
||||||
{% for cdx in cdx_lines %}
|
{% for cdx in cdx_lines %}
|
||||||
<tr style="{{ 'font-weight: bold' if cdx['mimetype'] != 'warc/revisit' else '' }}">
|
<tr style="{{ 'font-weight: bold' if cdx['mimetype'] != 'warc/revisit' else '' }}">
|
||||||
<td><a href="{{ prefix }}{{ cdx.timestamp }}/{{ cdx.original }}">{{ cdx['timestamp'] | format_ts}}</a></td>
|
<td><a href="{{ prefix }}{{ cdx.timestamp }}/{{ cdx.original }}">
|
||||||
|
<script>document.write(ts_to_date("{{ cdx['timestamp']}}", true))</script>
|
||||||
|
</a></td>
|
||||||
<td>{{ cdx['statuscode'] }}</td>
|
<td>{{ cdx['statuscode'] }}</td>
|
||||||
<td>{{ cdx['original'] }}</td>
|
<td>{{ cdx['original'] }}</td>
|
||||||
<td>{{ cdx['filename'] }}</td>
|
<td>{{ cdx['filename'] }}</td>
|
||||||
@ -21,3 +48,4 @@
|
|||||||
<i><b>* Unique captures are bold.</b> Other captures are duplicates of a previous capture.</i>
|
<i><b>* Unique captures are bold.</b> Other captures are duplicates of a previous capture.</i>
|
||||||
</p>
|
</p>
|
||||||
</body>
|
</body>
|
||||||
|
</html>
|
||||||
|
@ -96,7 +96,7 @@ class BlockLoader(object):
|
|||||||
else:
|
else:
|
||||||
return self.load_file_or_resource(url, offset, length)
|
return self.load_file_or_resource(url, offset, length)
|
||||||
|
|
||||||
def load_file_or_resource(self, url, offset, length):
|
def load_file_or_resource(self, url, offset=0, length=-1):
|
||||||
"""
|
"""
|
||||||
Load a file-like reader from the local file system
|
Load a file-like reader from the local file system
|
||||||
"""
|
"""
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
#=================================================================
|
#=================================================================
|
||||||
"""
|
r"""
|
||||||
# LimitReader Tests
|
# LimitReader Tests
|
||||||
>>> LimitReader(BytesIO('abcdefghjiklmnopqrstuvwxyz'), 10).read(26)
|
>>> LimitReader(BytesIO('abcdefghjiklmnopqrstuvwxyz'), 10).read(26)
|
||||||
'abcdefghji'
|
'abcdefghji'
|
||||||
@ -32,10 +32,14 @@ True
|
|||||||
>>> BlockLoader(HMACCookieMaker('test', 'test', 5)).load('http://example.com', 41, 14).read()
|
>>> BlockLoader(HMACCookieMaker('test', 'test', 5)).load('http://example.com', 41, 14).read()
|
||||||
'Example Domain'
|
'Example Domain'
|
||||||
|
|
||||||
# fixed cookie
|
# fixed cookie, range request
|
||||||
>>> BlockLoader('some=value').load('http://example.com', 41, 14).read()
|
>>> BlockLoader('some=value').load('http://example.com', 41, 14).read()
|
||||||
'Example Domain'
|
'Example Domain'
|
||||||
|
|
||||||
|
# range request
|
||||||
|
>>> BlockLoader().load('http://example.com', 1262).read()
|
||||||
|
'</html>\n'
|
||||||
|
|
||||||
# test with extra id, ensure 4 parts of the A-B=C-D form are present
|
# test with extra id, ensure 4 parts of the A-B=C-D form are present
|
||||||
>>> len(re.split('[-=]', HMACCookieMaker('test', 'test', 5).make('extra')))
|
>>> len(re.split('[-=]', HMACCookieMaker('test', 'test', 5).make('extra')))
|
||||||
4
|
4
|
||||||
|
@ -38,6 +38,10 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
|||||||
return self.render_content(wbrequest)
|
return self.render_content(wbrequest)
|
||||||
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
import traceback
|
||||||
|
err_details = traceback.format_exc(exc)
|
||||||
|
print err_details
|
||||||
|
|
||||||
url = wbrequest.wb_url.url
|
url = wbrequest.wb_url.url
|
||||||
msg = 'Could not load the url from the live web: ' + url
|
msg = 'Could not load the url from the live web: ' + url
|
||||||
raise LiveResourceException(msg=msg, url=url)
|
raise LiveResourceException(msg=msg, url=url)
|
||||||
@ -53,8 +57,8 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
|||||||
if ref_wburl_str:
|
if ref_wburl_str:
|
||||||
wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url
|
wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url
|
||||||
|
|
||||||
wb_url = wbrequest.wb_url
|
result = self.rewriter.fetch_request(wbrequest.wb_url.url,
|
||||||
result = self.rewriter.fetch_request(wb_url, wbrequest.urlrewriter,
|
wbrequest.urlrewriter,
|
||||||
head_insert_func=head_insert_func,
|
head_insert_func=head_insert_func,
|
||||||
req_headers=req_headers,
|
req_headers=req_headers,
|
||||||
env=wbrequest.env)
|
env=wbrequest.env)
|
||||||
|
@ -130,8 +130,7 @@ class ReplayView(object):
|
|||||||
create_insert_func(wbrequest))
|
create_insert_func(wbrequest))
|
||||||
|
|
||||||
result = (self.content_rewriter.
|
result = (self.content_rewriter.
|
||||||
rewrite_content(wbrequest.wb_url,
|
rewrite_content(urlrewriter,
|
||||||
urlrewriter,
|
|
||||||
headers=status_headers,
|
headers=status_headers,
|
||||||
stream=stream,
|
stream=stream,
|
||||||
head_insert_func=head_insert_func,
|
head_insert_func=head_insert_func,
|
||||||
|
8
sample_archive/text_content/sample_no_head.html
Normal file
8
sample_archive/text_content/sample_no_head.html
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<script>
|
||||||
|
var some_val = false;
|
||||||
|
if (some_val) {
|
||||||
|
window.location = "/other.html";
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
Test Content
|
||||||
|
<a href="another.html">Some Link</a>
|
@ -301,6 +301,11 @@ class TestWb:
|
|||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
assert '"data": "^"' in resp.body
|
assert '"data": "^"' in resp.body
|
||||||
|
|
||||||
|
def test_post_invalid(self):
|
||||||
|
# not json
|
||||||
|
resp = self.testapp.post_json('/pywb/20140610001255mp_/http://httpbin.org/post?foo=bar', {'data': '^'}, status=404)
|
||||||
|
assert resp.status_int == 404
|
||||||
|
|
||||||
def test_post_redirect(self):
|
def test_post_redirect(self):
|
||||||
# post handled without redirect (since 307 not allowed)
|
# post handled without redirect (since 307 not allowed)
|
||||||
resp = self.testapp.post('/post', {'foo': 'bar', 'test': 'abc'}, headers=[('Referer', 'http://localhost:8080/pywb/2014mp_/http://httpbin.org/post')])
|
resp = self.testapp.post('/post', {'foo': 'bar', 'test': 'abc'}, headers=[('Referer', 'http://localhost:8080/pywb/2014mp_/http://httpbin.org/post')])
|
||||||
@ -308,7 +313,6 @@ class TestWb:
|
|||||||
assert '"foo": "bar"' in resp.body
|
assert '"foo": "bar"' in resp.body
|
||||||
assert '"test": "abc"' in resp.body
|
assert '"test": "abc"' in resp.body
|
||||||
|
|
||||||
|
|
||||||
def test_excluded_content(self):
|
def test_excluded_content(self):
|
||||||
resp = self.testapp.get('/pywb/mp_/http://www.iana.org/_img/bookmark_icon.ico', status = 403)
|
resp = self.testapp.get('/pywb/mp_/http://www.iana.org/_img/bookmark_icon.ico', status = 403)
|
||||||
assert resp.status_int == 403
|
assert resp.status_int == 403
|
||||||
|
@ -17,6 +17,13 @@ class TestLiveRewriter:
|
|||||||
resp = self.testapp.get('/rewrite/mp_/http://facebook.com/')
|
resp = self.testapp.get('/rewrite/mp_/http://facebook.com/')
|
||||||
assert resp.status_int == 301
|
assert resp.status_int == 301
|
||||||
|
|
||||||
|
def test_live_rewrite_post(self):
|
||||||
|
resp = self.testapp.post('/rewrite/mp_/httpbin.org/post', {'foo': 'bar', 'test': 'abc'})
|
||||||
|
assert resp.status_int == 200
|
||||||
|
assert '"foo": "bar"' in resp.body
|
||||||
|
assert '"test": "abc"' in resp.body
|
||||||
|
assert resp.status_int == 200
|
||||||
|
|
||||||
def test_live_rewrite_frame(self):
|
def test_live_rewrite_frame(self):
|
||||||
resp = self.testapp.get('/rewrite/http://example.com/')
|
resp = self.testapp.get('/rewrite/http://example.com/')
|
||||||
assert resp.status_int == 200
|
assert resp.status_int == 200
|
||||||
|
Loading…
x
Reference in New Issue
Block a user