mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Merge branch 'develop' for 0.8.3
This commit is contained in:
commit
b4b92482ad
12
CHANGES.rst
12
CHANGES.rst
@ -1,3 +1,15 @@
|
|||||||
|
pywb 0.8.3 changelist
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
* cookie rewrite: all cookie rewriters remove ``secure`` flag to allow equivalent replay of sites with cookies via HTTP and HTTPS.
|
||||||
|
|
||||||
|
* html rewrite: fix ``<base>`` tag rewriting to add a trailing slash to the url if it is a hostname with no path, ex:
|
||||||
|
|
||||||
|
``<base href="http://example.com" />`` -> ``<base href="http://localhost:8080/rewrite/http://example.com/" />``
|
||||||
|
|
||||||
|
* framed replay: fix double slash that remainded when rewriting top frame url.
|
||||||
|
|
||||||
|
|
||||||
pywb 0.8.2 changelist
|
pywb 0.8.2 changelist
|
||||||
~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
PyWb 0.8.2
|
PyWb 0.8.3
|
||||||
==========
|
==========
|
||||||
|
|
||||||
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master
|
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master
|
||||||
|
@ -32,6 +32,11 @@ class WbUrlBaseCookieRewriter(object):
|
|||||||
if morsel.get('max-age'):
|
if morsel.get('max-age'):
|
||||||
del morsel['max-age']
|
del morsel['max-age']
|
||||||
|
|
||||||
|
# for now, also remove secure to avoid issues when
|
||||||
|
# proxying over plain http (TODO: detect https?)
|
||||||
|
if morsel.get('secure'):
|
||||||
|
del morsel['secure']
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class MinimalScopeCookieRewriter(WbUrlBaseCookieRewriter):
|
class MinimalScopeCookieRewriter(WbUrlBaseCookieRewriter):
|
||||||
|
@ -5,6 +5,7 @@ import sys
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from HTMLParser import HTMLParser, HTMLParseError
|
from HTMLParser import HTMLParser, HTMLParseError
|
||||||
|
from urlparse import urlsplit
|
||||||
|
|
||||||
from url_rewriter import UrlRewriter
|
from url_rewriter import UrlRewriter
|
||||||
from regex_rewriters import JSRewriter, CSSRewriter
|
from regex_rewriters import JSRewriter, CSSRewriter
|
||||||
@ -121,7 +122,22 @@ class HTMLRewriterMixin(object):
|
|||||||
meta_refresh[m.end(1):])
|
meta_refresh[m.end(1):])
|
||||||
|
|
||||||
return meta_refresh
|
return meta_refresh
|
||||||
# ===========================
|
|
||||||
|
def _rewrite_base(self, value, mod=''):
|
||||||
|
if not value.endswith('/'):
|
||||||
|
# check if hostname with no path,
|
||||||
|
# eg http://example.com
|
||||||
|
if not urlsplit(value).path:
|
||||||
|
value += '/'
|
||||||
|
|
||||||
|
base_value = self._rewrite_url(value, mod)
|
||||||
|
|
||||||
|
if self.opts.get('rewrite_base', True):
|
||||||
|
value = base_value
|
||||||
|
|
||||||
|
self.url_rewriter = (self.url_rewriter.
|
||||||
|
rebase_rewriter(base_value))
|
||||||
|
return value
|
||||||
|
|
||||||
def _rewrite_url(self, value, mod=None):
|
def _rewrite_url(self, value, mod=None):
|
||||||
if value:
|
if value:
|
||||||
@ -221,12 +237,7 @@ class HTMLRewriterMixin(object):
|
|||||||
# special case: base tag
|
# special case: base tag
|
||||||
elif (tag == 'base') and (attr_name == 'href') and attr_value:
|
elif (tag == 'base') and (attr_name == 'href') and attr_value:
|
||||||
rw_mod = handler.get(attr_name)
|
rw_mod = handler.get(attr_name)
|
||||||
base_value = self._rewrite_url(attr_value, rw_mod)
|
attr_value = self._rewrite_base(attr_value, rw_mod)
|
||||||
if self.opts.get('rewrite_base', True):
|
|
||||||
attr_value = base_value
|
|
||||||
self.url_rewriter = (self.url_rewriter.
|
|
||||||
rebase_rewriter(base_value))
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# rewrite url using tag handler
|
# rewrite url using tag handler
|
||||||
rw_mod = handler.get(attr_name)
|
rw_mod = handler.get(attr_name)
|
||||||
@ -338,15 +349,15 @@ class HTMLRewriter(HTMLRewriterMixin, HTMLParser):
|
|||||||
return s
|
return s
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag, attrs):
|
||||||
if not self._rewrite_tag_attrs(tag, attrs):
|
self._rewrite_tag_attrs(tag, attrs)
|
||||||
self.out.write(self.get_starttag_text())
|
|
||||||
elif tag != 'head' or not self._rewrite_head(False):
|
if tag != 'head' or not self._rewrite_head(False):
|
||||||
self.out.write('>')
|
self.out.write('>')
|
||||||
|
|
||||||
def handle_startendtag(self, tag, attrs):
|
def handle_startendtag(self, tag, attrs):
|
||||||
if not self._rewrite_tag_attrs(tag, attrs):
|
self._rewrite_tag_attrs(tag, attrs)
|
||||||
self.out.write(self.get_starttag_text())
|
|
||||||
elif tag != 'head' or not self._rewrite_head(True):
|
if tag != 'head' or not self._rewrite_head(True):
|
||||||
self.out.write('/>')
|
self.out.write('/>')
|
||||||
|
|
||||||
def handle_endtag(self, tag):
|
def handle_endtag(self, tag):
|
||||||
|
@ -27,6 +27,10 @@ ur"""
|
|||||||
>>> parse('<base href="static/"/><img src="image.gif"/>')
|
>>> parse('<base href="static/"/><img src="image.gif"/>')
|
||||||
<base href="/web/20131226101010/http://example.com/some/path/static/"/><img src="/web/20131226101010im_/http://example.com/some/path/static/image.gif"/>
|
<base href="/web/20131226101010/http://example.com/some/path/static/"/><img src="/web/20131226101010im_/http://example.com/some/path/static/image.gif"/>
|
||||||
|
|
||||||
|
# ensure trailing slash added
|
||||||
|
>>> parse('<base href="http://example.com"/>')
|
||||||
|
<base href="/web/20131226101010/http://example.com/"/>
|
||||||
|
|
||||||
# Base Tests -- no rewrite
|
# Base Tests -- no rewrite
|
||||||
>>> parse('<html><head><base href="http://example.com/diff/path/file.html"/>', urlrewriter=no_base_canon_rewriter)
|
>>> parse('<html><head><base href="http://example.com/diff/path/file.html"/>', urlrewriter=no_base_canon_rewriter)
|
||||||
<html><head><base href="http://example.com/diff/path/file.html"/>
|
<html><head><base href="http://example.com/diff/path/file.html"/>
|
||||||
|
@ -203,8 +203,9 @@ def test_example_4_rewrite_err():
|
|||||||
def test_example_domain_specific_3():
|
def test_example_domain_specific_3():
|
||||||
status_headers, buff = get_rewritten('http://facebook.com/digitalpreservation', urlrewriter, follow_redirects=True)
|
status_headers, buff = get_rewritten('http://facebook.com/digitalpreservation', urlrewriter, follow_redirects=True)
|
||||||
|
|
||||||
# comment out bootloader
|
# comment out Bootloader.configurePage, if it is still there
|
||||||
assert '/* Bootloader.configurePage' in buff
|
if 'Bootloader.configurePage' in buff:
|
||||||
|
assert '/* Bootloader.configurePage' in buff
|
||||||
|
|
||||||
def test_wombat_top():
|
def test_wombat_top():
|
||||||
#status_headers, buff = get_rewritten('https://assets-cdn.github.com/assets/github-0f06d0f46fe7bcfbf31f2380f23aec15ba21b8ec.js', urlrewriter)
|
#status_headers, buff = get_rewritten('https://assets-cdn.github.com/assets/github-0f06d0f46fe7bcfbf31f2380f23aec15ba21b8ec.js', urlrewriter)
|
||||||
|
@ -38,7 +38,7 @@ function make_inner_url(url, ts)
|
|||||||
if (ts) {
|
if (ts) {
|
||||||
return wbinfo.prefix + ts + "/" + url;
|
return wbinfo.prefix + ts + "/" + url;
|
||||||
} else {
|
} else {
|
||||||
return wbinfo.prefix + "/" + url;
|
return wbinfo.prefix + url;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
2
setup.py
2
setup.py
@ -34,7 +34,7 @@ class PyTest(TestCommand):
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='pywb',
|
name='pywb',
|
||||||
version='0.8.2',
|
version='0.8.3',
|
||||||
url='https://github.com/ikreymer/pywb',
|
url='https://github.com/ikreymer/pywb',
|
||||||
author='Ilya Kreymer',
|
author='Ilya Kreymer',
|
||||||
author_email='ikreymer@gmail.com',
|
author_email='ikreymer@gmail.com',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user