mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
fixes for 2.6 and pypy
This commit is contained in:
parent
2d48f2d733
commit
cc22448cc5
@ -1,12 +1,12 @@
|
|||||||
language: python
|
language: python
|
||||||
python:
|
python:
|
||||||
- "2.7"
|
|
||||||
- "2.6"
|
- "2.6"
|
||||||
|
- "2.7"
|
||||||
- "pypy"
|
- "pypy"
|
||||||
# command to install dependencies
|
# command to install dependencies
|
||||||
install:
|
install:
|
||||||
- python setup.py -q install
|
- python setup.py -q install
|
||||||
- pip install tox coverage pytest-cov coveralls --use-mirrors
|
- pip install coverage pytest-cov coveralls --use-mirrors --allow-external
|
||||||
# command to run tests
|
# command to run tests
|
||||||
#script: nosetests --with-doctest
|
#script: nosetests --with-doctest
|
||||||
#script: py.test run-tests.py ./pywb/ --doctest-modules --ignore=setup.py
|
#script: py.test run-tests.py ./pywb/ --doctest-modules --ignore=setup.py
|
||||||
|
@ -1,4 +1,8 @@
|
|||||||
from collections import OrderedDict
|
try:
|
||||||
|
from collections import OrderedDict
|
||||||
|
except ImportError:
|
||||||
|
from ordereddict import OrderedDict
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
|
@ -90,7 +90,8 @@ import pprint
|
|||||||
|
|
||||||
def print_req(req):
|
def print_req(req):
|
||||||
varlist = vars(req)
|
varlist = vars(req)
|
||||||
pprint.pprint({k: varlist[k] for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll')})
|
the_dict = dict((k, varlist[k]) for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll'))
|
||||||
|
pprint.pprint(the_dict)
|
||||||
|
|
||||||
|
|
||||||
def _test_redir(match_host, request_uri, referrer, script_name = '', coll = 'coll', http_host = None):
|
def _test_redir(match_host, request_uri, referrer, script_name = '', coll = 'coll', http_host = None):
|
||||||
|
@ -47,7 +47,8 @@ from pywb.framework.wbrequestresponse import WbRequest, WbResponse
|
|||||||
def print_req_from_uri(request_uri, env={}, use_abs_prefix=False):
|
def print_req_from_uri(request_uri, env={}, use_abs_prefix=False):
|
||||||
response = req_from_uri(request_uri, env, use_abs_prefix)
|
response = req_from_uri(request_uri, env, use_abs_prefix)
|
||||||
varlist = vars(response)
|
varlist = vars(response)
|
||||||
print str({k: varlist[k] for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll')})
|
the_dict = dict((k, varlist[k]) for k in ('request_uri', 'wb_prefix', 'wb_url', 'coll'))
|
||||||
|
print the_dict
|
||||||
|
|
||||||
|
|
||||||
def req_from_uri(request_uri, env={}, use_abs_prefix=False):
|
def req_from_uri(request_uri, env={}, use_abs_prefix=False):
|
||||||
|
@ -6,7 +6,6 @@ from wbrequestresponse import WbResponse, StatusAndHeaders
|
|||||||
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import importlib
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
@ -4,7 +4,8 @@
|
|||||||
import sys
|
import sys
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from HTMLParser import HTMLParser
|
from HTMLParser import HTMLParser, HTMLParseError
|
||||||
|
|
||||||
from url_rewriter import UrlRewriter
|
from url_rewriter import UrlRewriter
|
||||||
from regex_rewriters import JSRewriter, CSSRewriter
|
from regex_rewriters import JSRewriter, CSSRewriter
|
||||||
|
|
||||||
@ -181,7 +182,10 @@ class HTMLRewriter(HTMLParser):
|
|||||||
if not self.out:
|
if not self.out:
|
||||||
self.out = self.AccumBuff()
|
self.out = self.AccumBuff()
|
||||||
|
|
||||||
self.feed(string)
|
try:
|
||||||
|
self.feed(string)
|
||||||
|
except HTMLParseError:
|
||||||
|
self.out.write(string)
|
||||||
|
|
||||||
result = self.out.buff
|
result = self.out.buff
|
||||||
# Clear buffer to create new one for next rewrite()
|
# Clear buffer to create new one for next rewrite()
|
||||||
@ -197,7 +201,11 @@ class HTMLRewriter(HTMLParser):
|
|||||||
else:
|
else:
|
||||||
result = ''
|
result = ''
|
||||||
|
|
||||||
HTMLParser.close(self)
|
try:
|
||||||
|
HTMLParser.close(self)
|
||||||
|
except HTMLParseError:
|
||||||
|
pass
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag, attrs):
|
||||||
@ -238,6 +246,3 @@ class HTMLRewriter(HTMLParser):
|
|||||||
self.out.write('<![')
|
self.out.write('<![')
|
||||||
self.parse_data(data)
|
self.parse_data(data)
|
||||||
self.out.write(']>')
|
self.out.write(']>')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -16,8 +16,9 @@ r"""
|
|||||||
>>> parse('<body x="y"><img src="/img.gif"/><br/></body>')
|
>>> parse('<body x="y"><img src="/img.gif"/><br/></body>')
|
||||||
<body x="y"><img src="/web/20131226101010im_/http://example.com/img.gif"/><br/></body>
|
<body x="y"><img src="/web/20131226101010im_/http://example.com/img.gif"/><br/></body>
|
||||||
|
|
||||||
>>> parse('<input "selected"><img src></div>')
|
# malformed html -- (2.6 parser raises exception)
|
||||||
<input "selected"=""><img src=""></div>
|
#>>> parse('<input "selected"><img src></div>')
|
||||||
|
#<input "selected"=""><img src=""></div>
|
||||||
|
|
||||||
>>> parse('<html><head><base href="http://example.com/some/path/index.html"/>')
|
>>> parse('<html><head><base href="http://example.com/some/path/index.html"/>')
|
||||||
<html><head><base href="/web/20131226101010/http://example.com/some/path/index.html"/>
|
<html><head><base href="/web/20131226101010/http://example.com/some/path/index.html"/>
|
||||||
@ -197,26 +198,39 @@ HTTP Headers Rewriting
|
|||||||
|
|
||||||
# Text with charset
|
# Text with charset
|
||||||
>>> _test_headers([('Date', 'Fri, 03 Jan 2014 03:03:21 GMT'), ('Content-Length', '5'), ('Content-Type', 'text/html;charset=UTF-8')])
|
>>> _test_headers([('Date', 'Fri, 03 Jan 2014 03:03:21 GMT'), ('Content-Length', '5'), ('Content-Type', 'text/html;charset=UTF-8')])
|
||||||
{'text_type': 'html', 'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Date', 'Fri, 03 Jan 2014 03:03:21 GMT'),
|
{'charset': 'utf-8',
|
||||||
|
'removed_header_dict': {},
|
||||||
|
'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Date', 'Fri, 03 Jan 2014 03:03:21 GMT'),
|
||||||
('X-Archive-Orig-Content-Length', '5'),
|
('X-Archive-Orig-Content-Length', '5'),
|
||||||
('Content-Type', 'text/html;charset=UTF-8')]), 'removed_header_dict': {}, 'charset': 'utf-8'}
|
('Content-Type', 'text/html;charset=UTF-8')]),
|
||||||
|
'text_type': 'html'}
|
||||||
|
|
||||||
# Redirect
|
# Redirect
|
||||||
>>> _test_headers([('Connection', 'close'), ('Location', '/other.html')], '302 Redirect')
|
>>> _test_headers([('Connection', 'close'), ('Location', '/other.html')], '302 Redirect')
|
||||||
{'text_type': None, 'status_headers': StatusAndHeaders(protocol = '', statusline = '302 Redirect', headers = [ ('X-Archive-Orig-Connection', 'close'),
|
{'charset': None,
|
||||||
('Location', '/web/20131226101010/http://example.com/other.html')]), 'removed_header_dict': {}, 'charset': None}
|
'removed_header_dict': {},
|
||||||
|
'status_headers': StatusAndHeaders(protocol = '', statusline = '302 Redirect', headers = [ ('X-Archive-Orig-Connection', 'close'),
|
||||||
|
('Location', '/web/20131226101010/http://example.com/other.html')]),
|
||||||
|
'text_type': None}
|
||||||
|
|
||||||
# gzip
|
# gzip
|
||||||
>>> _test_headers([('Content-Length', '199999'), ('Content-Type', 'text/javascript'), ('Content-Encoding', 'gzip'), ('Transfer-Encoding', 'chunked')])
|
>>> _test_headers([('Content-Length', '199999'), ('Content-Type', 'text/javascript'), ('Content-Encoding', 'gzip'), ('Transfer-Encoding', 'chunked')])
|
||||||
{'text_type': 'js', 'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Content-Length', '199999'),
|
{'charset': None,
|
||||||
('Content-Type', 'text/javascript')]), 'removed_header_dict': {'transfer-encoding': 'chunked', 'content-encoding': 'gzip'}, 'charset': None}
|
'removed_header_dict': {'content-encoding': 'gzip',
|
||||||
|
'transfer-encoding': 'chunked'},
|
||||||
|
'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('X-Archive-Orig-Content-Length', '199999'),
|
||||||
|
('Content-Type', 'text/javascript')]),
|
||||||
|
'text_type': 'js'}
|
||||||
|
|
||||||
# Binary
|
# Binary
|
||||||
>>> _test_headers([('Content-Length', '200000'), ('Content-Type', 'image/png'), ('Cookie', 'blah'), ('Content-Encoding', 'gzip'), ('Transfer-Encoding', 'chunked')])
|
>>> _test_headers([('Content-Length', '200000'), ('Content-Type', 'image/png'), ('Cookie', 'blah'), ('Content-Encoding', 'gzip'), ('Transfer-Encoding', 'chunked')])
|
||||||
{'text_type': None, 'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('Content-Length', '200000'),
|
{'charset': None,
|
||||||
|
'removed_header_dict': {'transfer-encoding': 'chunked'},
|
||||||
|
'status_headers': StatusAndHeaders(protocol = '', statusline = '200 OK', headers = [ ('Content-Length', '200000'),
|
||||||
('Content-Type', 'image/png'),
|
('Content-Type', 'image/png'),
|
||||||
('X-Archive-Orig-Cookie', 'blah'),
|
('X-Archive-Orig-Cookie', 'blah'),
|
||||||
('Content-Encoding', 'gzip')]), 'removed_header_dict': {'transfer-encoding': 'chunked'}, 'charset': None}
|
('Content-Encoding', 'gzip')]),
|
||||||
|
'text_type': None}
|
||||||
|
|
||||||
Removing Transfer-Encoding always, Was:
|
Removing Transfer-Encoding always, Was:
|
||||||
('Content-Encoding', 'gzip'),
|
('Content-Encoding', 'gzip'),
|
||||||
@ -233,6 +247,7 @@ from pywb.rewrite.header_rewriter import HeaderRewriter
|
|||||||
|
|
||||||
from pywb.utils.statusandheaders import StatusAndHeaders
|
from pywb.utils.statusandheaders import StatusAndHeaders
|
||||||
|
|
||||||
|
import pprint
|
||||||
|
|
||||||
urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/web/')
|
urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/web/')
|
||||||
|
|
||||||
@ -256,7 +271,7 @@ headerrewriter = HeaderRewriter()
|
|||||||
|
|
||||||
def _test_headers(headers, status = '200 OK'):
|
def _test_headers(headers, status = '200 OK'):
|
||||||
rewritten = headerrewriter.rewrite(StatusAndHeaders(status, headers), urlrewriter)
|
rewritten = headerrewriter.rewrite(StatusAndHeaders(status, headers), urlrewriter)
|
||||||
return vars(rewritten)
|
return pprint.pprint(vars(rewritten))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
Loading…
x
Reference in New Issue
Block a user