From cc22448cc5dddcfe7ebaf1c809a1c14c8ca3688b Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Tue, 4 Mar 2014 18:49:36 -0800 Subject: [PATCH] fixes for 2.6 and pypy --- .travis.yml | 4 +- pywb/cdx/cdxobject.py | 6 ++- pywb/framework/test/test_archivalrouter.py | 3 +- pywb/framework/test/test_wbrequestresponse.py | 3 +- pywb/framework/wsgi_wrappers.py | 1 - pywb/rewrite/html_rewriter.py | 17 ++++++--- pywb/rewrite/test/test_rewrite.py | 37 +++++++++++++------ 7 files changed, 48 insertions(+), 23 deletions(-) diff --git a/.travis.yml b/.travis.yml index de435de6..a5f79f64 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,12 +1,12 @@ language: python python: - - "2.7" - "2.6" + - "2.7" - "pypy" # command to install dependencies install: - python setup.py -q install - - pip install tox coverage pytest-cov coveralls --use-mirrors + - pip install coverage pytest-cov coveralls --use-mirrors --allow-external # command to run tests #script: nosetests --with-doctest #script: py.test run-tests.py ./pywb/ --doctest-modules --ignore=setup.py diff --git a/pywb/cdx/cdxobject.py b/pywb/cdx/cdxobject.py index cf7a5d79..6b7dfdfe 100644 --- a/pywb/cdx/cdxobject.py +++ b/pywb/cdx/cdxobject.py @@ -1,4 +1,8 @@ -from collections import OrderedDict +try: + from collections import OrderedDict +except ImportError: + from ordereddict import OrderedDict + import itertools from urllib import urlencode diff --git a/pywb/framework/test/test_archivalrouter.py b/pywb/framework/test/test_archivalrouter.py index 706027ba..b27f5f45 100644 --- a/pywb/framework/test/test_archivalrouter.py +++ b/pywb/framework/test/test_archivalrouter.py @@ -90,7 +90,8 @@ import pprint def print_req(req): varlist = vars(req) - pprint.pprint({k: varlist[k] for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll')}) + the_dict = dict((k, varlist[k]) for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll')) + pprint.pprint(the_dict) def _test_redir(match_host, request_uri, referrer, script_name = '', coll = 'coll', http_host = None): diff --git a/pywb/framework/test/test_wbrequestresponse.py b/pywb/framework/test/test_wbrequestresponse.py index 977a8863..e9a4ca9e 100644 --- a/pywb/framework/test/test_wbrequestresponse.py +++ b/pywb/framework/test/test_wbrequestresponse.py @@ -47,7 +47,8 @@ from pywb.framework.wbrequestresponse import WbRequest, WbResponse def print_req_from_uri(request_uri, env={}, use_abs_prefix=False): response = req_from_uri(request_uri, env, use_abs_prefix) varlist = vars(response) - print str({k: varlist[k] for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll')}) + the_dict = dict((k, varlist[k]) for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll')) + print the_dict def req_from_uri(request_uri, env={}, use_abs_prefix=False): diff --git a/pywb/framework/wsgi_wrappers.py b/pywb/framework/wsgi_wrappers.py index 1c7532ce..f7b97e4f 100644 --- a/pywb/framework/wsgi_wrappers.py +++ b/pywb/framework/wsgi_wrappers.py @@ -6,7 +6,6 @@ from wbrequestresponse import WbResponse, StatusAndHeaders import os -import importlib import logging diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py index c6eeab23..a6d9718d 100644 --- a/pywb/rewrite/html_rewriter.py +++ b/pywb/rewrite/html_rewriter.py @@ -4,7 +4,8 @@ import sys import re -from HTMLParser import HTMLParser +from HTMLParser import HTMLParser, HTMLParseError + from url_rewriter import UrlRewriter from regex_rewriters import JSRewriter, CSSRewriter @@ -181,7 +182,10 @@ class HTMLRewriter(HTMLParser): if not self.out: self.out = self.AccumBuff() - self.feed(string) + try: + self.feed(string) + except HTMLParseError: + self.out.write(string) result = self.out.buff # Clear buffer to create new one for next rewrite() @@ -197,7 +201,11 @@ class HTMLRewriter(HTMLParser): else: result = '' - HTMLParser.close(self) + try: + HTMLParser.close(self) + except HTMLParseError: + pass + return result def handle_starttag(self, tag, attrs): @@ -238,6 +246,3 @@ class HTMLRewriter(HTMLParser): self.out.write('') - - - diff --git a/pywb/rewrite/test/test_rewrite.py b/pywb/rewrite/test/test_rewrite.py index 7498e601..6915e26f 100644 --- a/pywb/rewrite/test/test_rewrite.py +++ b/pywb/rewrite/test/test_rewrite.py @@ -16,8 +16,9 @@ r""" >>> parse('
')
->>> parse('') - +# malformed html -- (2.6 parser raises exception) +#>>> parse('') +# >>> parse('') @@ -197,26 +198,39 @@ HTTP Headers Rewriting # Text with charset >>> _test_headers([('Date', 'Fri, 03 Jan 2014 03:03:21 GMT'), ('Content-Length', '5'), ('Content-Type', 'text/html;charset=UTF-8')]) -{'text_type': 'html', 'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Date', 'Fri, 03 Jan 2014 03:03:21 GMT'), +{'charset': 'utf-8', + 'removed_header_dict': {}, + 'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Date', 'Fri, 03 Jan 2014 03:03:21 GMT'), ('X-Archive-Orig-Content-Length', '5'), - ('Content-Type', 'text/html;charset=UTF-8')]), 'removed_header_dict': {}, 'charset': 'utf-8'} + ('Content-Type', 'text/html;charset=UTF-8')]), + 'text_type': 'html'} # Redirect >>> _test_headers([('Connection', 'close'), ('Location', '/other.html')], '302 Redirect') -{'text_type': None, 'status_headers': StatusAndHeaders(protocol = '', statusline = '302 Redirect', headers = [ ('X-Archive-Orig-Connection', 'close'), - ('Location', '/web/20131226101010/http://example.com/other.html')]), 'removed_header_dict': {}, 'charset': None} +{'charset': None, + 'removed_header_dict': {}, + 'status_headers': StatusAndHeaders(protocol = '', statusline = '302 Redirect', headers = [ ('X-Archive-Orig-Connection', 'close'), + ('Location', '/web/20131226101010/http://example.com/other.html')]), + 'text_type': None} # gzip >>> _test_headers([('Content-Length', '199999'), ('Content-Type', 'text/javascript'), ('Content-Encoding', 'gzip'), ('Transfer-Encoding', 'chunked')]) -{'text_type': 'js', 'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Content-Length', '199999'), - ('Content-Type', 'text/javascript')]), 'removed_header_dict': {'transfer-encoding': 'chunked', 'content-encoding': 'gzip'}, 'charset': None} +{'charset': None, + 'removed_header_dict': {'content-encoding': 'gzip', + 'transfer-encoding': 'chunked'}, + 'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Content-Length', '199999'), + ('Content-Type', 'text/javascript')]), + 'text_type': 'js'} # Binary >>> _test_headers([('Content-Length', '200000'), ('Content-Type', 'image/png'), ('Cookie', 'blah'), ('Content-Encoding', 'gzip'), ('Transfer-Encoding', 'chunked')]) -{'text_type': None, 'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('Content-Length', '200000'), +{'charset': None, + 'removed_header_dict': {'transfer-encoding': 'chunked'}, + 'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('Content-Length', '200000'), ('Content-Type', 'image/png'), ('X-Archive-Orig-Cookie', 'blah'), - ('Content-Encoding', 'gzip')]), 'removed_header_dict': {'transfer-encoding': 'chunked'}, 'charset': None} + ('Content-Encoding', 'gzip')]), + 'text_type': None} Removing Transfer-Encoding always, Was: ('Content-Encoding', 'gzip'), @@ -233,6 +247,7 @@ from pywb.rewrite.header_rewriter import HeaderRewriter from pywb.utils.statusandheaders import StatusAndHeaders +import pprint urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/web/') @@ -256,7 +271,7 @@ headerrewriter = HeaderRewriter() def _test_headers(headers, status = '200 OK'): rewritten = headerrewriter.rewrite(StatusAndHeaders(status, headers), urlrewriter) - return vars(rewritten) + return pprint.pprint(vars(rewritten)) if __name__ == "__main__":