1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

Merge branch 'develop' for 0.8.3

This commit is contained in:
Ilya Kreymer 2015-03-13 11:06:52 -07:00
commit b4b92482ad
8 changed files with 51 additions and 18 deletions

View File

@ -1,3 +1,15 @@
pywb 0.8.3 changelist
~~~~~~~~~~~~~~~~~~~~~
* cookie rewrite: all cookie rewriters remove ``secure`` flag to allow equivalent replay of sites with cookies via HTTP and HTTPS.
* html rewrite: fix ``<base>`` tag rewriting to add a trailing slash to the url if it is a hostname with no path, ex:
``<base href="http://example.com" />`` -> ``<base href="http://localhost:8080/rewrite/http://example.com/" />``
* framed replay: fix double slash that remainded when rewriting top frame url.
pywb 0.8.2 changelist pywb 0.8.2 changelist
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~

View File

@ -1,4 +1,4 @@
PyWb 0.8.2 PyWb 0.8.3
========== ==========
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master .. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master

View File

@ -32,6 +32,11 @@ class WbUrlBaseCookieRewriter(object):
if morsel.get('max-age'): if morsel.get('max-age'):
del morsel['max-age'] del morsel['max-age']
# for now, also remove secure to avoid issues when
# proxying over plain http (TODO: detect https?)
if morsel.get('secure'):
del morsel['secure']
#================================================================= #=================================================================
class MinimalScopeCookieRewriter(WbUrlBaseCookieRewriter): class MinimalScopeCookieRewriter(WbUrlBaseCookieRewriter):

View File

@ -5,6 +5,7 @@ import sys
import re import re
from HTMLParser import HTMLParser, HTMLParseError from HTMLParser import HTMLParser, HTMLParseError
from urlparse import urlsplit
from url_rewriter import UrlRewriter from url_rewriter import UrlRewriter
from regex_rewriters import JSRewriter, CSSRewriter from regex_rewriters import JSRewriter, CSSRewriter
@ -121,7 +122,22 @@ class HTMLRewriterMixin(object):
meta_refresh[m.end(1):]) meta_refresh[m.end(1):])
return meta_refresh return meta_refresh
# ===========================
def _rewrite_base(self, value, mod=''):
if not value.endswith('/'):
# check if hostname with no path,
# eg http://example.com
if not urlsplit(value).path:
value += '/'
base_value = self._rewrite_url(value, mod)
if self.opts.get('rewrite_base', True):
value = base_value
self.url_rewriter = (self.url_rewriter.
rebase_rewriter(base_value))
return value
def _rewrite_url(self, value, mod=None): def _rewrite_url(self, value, mod=None):
if value: if value:
@ -221,12 +237,7 @@ class HTMLRewriterMixin(object):
# special case: base tag # special case: base tag
elif (tag == 'base') and (attr_name == 'href') and attr_value: elif (tag == 'base') and (attr_name == 'href') and attr_value:
rw_mod = handler.get(attr_name) rw_mod = handler.get(attr_name)
base_value = self._rewrite_url(attr_value, rw_mod) attr_value = self._rewrite_base(attr_value, rw_mod)
if self.opts.get('rewrite_base', True):
attr_value = base_value
self.url_rewriter = (self.url_rewriter.
rebase_rewriter(base_value))
else: else:
# rewrite url using tag handler # rewrite url using tag handler
rw_mod = handler.get(attr_name) rw_mod = handler.get(attr_name)
@ -338,15 +349,15 @@ class HTMLRewriter(HTMLRewriterMixin, HTMLParser):
return s return s
def handle_starttag(self, tag, attrs): def handle_starttag(self, tag, attrs):
if not self._rewrite_tag_attrs(tag, attrs): self._rewrite_tag_attrs(tag, attrs)
self.out.write(self.get_starttag_text())
elif tag != 'head' or not self._rewrite_head(False): if tag != 'head' or not self._rewrite_head(False):
self.out.write('>') self.out.write('>')
def handle_startendtag(self, tag, attrs): def handle_startendtag(self, tag, attrs):
if not self._rewrite_tag_attrs(tag, attrs): self._rewrite_tag_attrs(tag, attrs)
self.out.write(self.get_starttag_text())
elif tag != 'head' or not self._rewrite_head(True): if tag != 'head' or not self._rewrite_head(True):
self.out.write('/>') self.out.write('/>')
def handle_endtag(self, tag): def handle_endtag(self, tag):

View File

@ -27,6 +27,10 @@ ur"""
>>> parse('<base href="static/"/><img src="image.gif"/>') >>> parse('<base href="static/"/><img src="image.gif"/>')
<base href="/web/20131226101010/http://example.com/some/path/static/"/><img src="/web/20131226101010im_/http://example.com/some/path/static/image.gif"/> <base href="/web/20131226101010/http://example.com/some/path/static/"/><img src="/web/20131226101010im_/http://example.com/some/path/static/image.gif"/>
# ensure trailing slash added
>>> parse('<base href="http://example.com"/>')
<base href="/web/20131226101010/http://example.com/"/>
# Base Tests -- no rewrite # Base Tests -- no rewrite
>>> parse('<html><head><base href="http://example.com/diff/path/file.html"/>', urlrewriter=no_base_canon_rewriter) >>> parse('<html><head><base href="http://example.com/diff/path/file.html"/>', urlrewriter=no_base_canon_rewriter)
<html><head><base href="http://example.com/diff/path/file.html"/> <html><head><base href="http://example.com/diff/path/file.html"/>

View File

@ -203,8 +203,9 @@ def test_example_4_rewrite_err():
def test_example_domain_specific_3(): def test_example_domain_specific_3():
status_headers, buff = get_rewritten('http://facebook.com/digitalpreservation', urlrewriter, follow_redirects=True) status_headers, buff = get_rewritten('http://facebook.com/digitalpreservation', urlrewriter, follow_redirects=True)
# comment out bootloader # comment out Bootloader.configurePage, if it is still there
assert '/* Bootloader.configurePage' in buff if 'Bootloader.configurePage' in buff:
assert '/* Bootloader.configurePage' in buff
def test_wombat_top(): def test_wombat_top():
#status_headers, buff = get_rewritten('https://assets-cdn.github.com/assets/github-0f06d0f46fe7bcfbf31f2380f23aec15ba21b8ec.js', urlrewriter) #status_headers, buff = get_rewritten('https://assets-cdn.github.com/assets/github-0f06d0f46fe7bcfbf31f2380f23aec15ba21b8ec.js', urlrewriter)

View File

@ -38,7 +38,7 @@ function make_inner_url(url, ts)
if (ts) { if (ts) {
return wbinfo.prefix + ts + "/" + url; return wbinfo.prefix + ts + "/" + url;
} else { } else {
return wbinfo.prefix + "/" + url; return wbinfo.prefix + url;
} }
} }

View File

@ -34,7 +34,7 @@ class PyTest(TestCommand):
setup( setup(
name='pywb', name='pywb',
version='0.8.2', version='0.8.3',
url='https://github.com/ikreymer/pywb', url='https://github.com/ikreymer/pywb',
author='Ilya Kreymer', author='Ilya Kreymer',
author_email='ikreymer@gmail.com', author_email='ikreymer@gmail.com',