1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

Merge branch 'develop' for 0.7.5

This commit is contained in:
Ilya Kreymer 2015-01-12 00:50:16 -08:00
commit c935aa5ec9
28 changed files with 158 additions and 75 deletions

4
.gitattributes vendored Normal file
View File

@ -0,0 +1,4 @@
*.arc -text
*.warc -text
*.cdx -text
*.gz -text

View File

@ -1,3 +1,16 @@
pywb 0.7.5 changelist
~~~~~~~~~~~~~~~~~~~~~
* Cross platform fixes to support Windows -- all tests pass on Linux, OS X and Windows now. Improved cross-platform support includes:
- read all files as binary to avoid line ending issues
- properly convert url <-> file
- avoid platform dependent apis
* Change any unhandled exceptions to result in a 500 error, instead of 400.
* More compresensive client side ``src`` attribute rewriting (via wombat.js), additional server-side HTML tag rewriting.
pywb 0.7.2 changelist pywb 0.7.2 changelist
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~

View File

@ -1,4 +1,4 @@
PyWb 0.7.2 PyWb 0.7.5
========== ==========
.. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master .. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master
@ -13,7 +13,7 @@ pywb is a python implementation of web archival replay tools, sometimes also kno
pywb allows high-quality replay (browsing) of archived web data stored in standardized `ARC <http://en.wikipedia.org/wiki/ARC_(file_format)>`_ and `WARC <http://en.wikipedia.org/wiki/Web_ARChive>`_. pywb allows high-quality replay (browsing) of archived web data stored in standardized `ARC <http://en.wikipedia.org/wiki/ARC_(file_format)>`_ and `WARC <http://en.wikipedia.org/wiki/Web_ARChive>`_.
The replay system is designed to accurately replay complex dynamic sites, including video and audio content. The replay system is designed to accurately replay complex dynamic sites, including video and audio content.
pywb can be used as a traditional web application or an HTTP or HTTPS proxy server. pywb can be used as a traditional web application or an HTTP or HTTPS proxy server, and has been tested on Linux, OS X and Windows platforms.
pywb is also fully compliant with the `Memento <http://mementoweb.org/>`_ protocol (`RFC-7089 <http://tools.ietf.org/html/rfc7089>`_). pywb is also fully compliant with the `Memento <http://mementoweb.org/>`_ protocol (`RFC-7089 <http://tools.ietf.org/html/rfc7089>`_).

View File

@ -30,7 +30,7 @@ class CDXFile(CDXSource):
def load_cdx(self, query): def load_cdx(self, query):
def do_open(): def do_open():
try: try:
source = open(self.filename) source = open(self.filename, 'rb')
gen = iter_range(source, query.key, query.end_key) gen = iter_range(source, query.key, query.end_key)
for line in gen: for line in gen:
yield line yield line

View File

@ -26,7 +26,7 @@ test_cdx_dir = get_test_dir() + 'cdx/'
def load_cdx_into_redis(source, filename, key=None): def load_cdx_into_redis(source, filename, key=None):
# load a cdx into mock redis # load a cdx into mock redis
with open(test_cdx_dir + filename) as fh: with open(test_cdx_dir + filename, 'rb') as fh:
for line in fh: for line in fh:
zadd_cdx(source, line, key) zadd_cdx(source, line, key)

View File

@ -84,7 +84,7 @@ class ZipNumCluster(CDXSource):
self.loc_mtime = new_mtime self.loc_mtime = new_mtime
logging.debug('Loading loc from: ' + self.loc_filename) logging.debug('Loading loc from: ' + self.loc_filename)
with open(self.loc_filename) as fh: with open(self.loc_filename, 'rb') as fh:
for line in fh: for line in fh:
parts = line.rstrip().split('\t') parts = line.rstrip().split('\t')
self.loc_map[parts[0]] = parts[1:] self.loc_map[parts[0]] = parts[1:]
@ -112,7 +112,7 @@ class ZipNumCluster(CDXSource):
def load_cdx(self, query): def load_cdx(self, query):
self.load_loc() self.load_loc()
reader = open(self.summary) reader = open(self.summary, 'rb')
idx_iter = iter_range(reader, idx_iter = iter_range(reader,
query.key, query.key,

View File

@ -13,8 +13,8 @@ from argparse import ArgumentParser
#================================================================= #=================================================================
# Duration of 100 years # Duration of 10 years
CERT_DURATION = 100 * 365 * 24 * 60 * 60 CERT_DURATION = 10 * 365 * 24 * 60 * 60
CERTS_DIR = './ca/certs/' CERTS_DIR = './ca/certs/'

View File

@ -334,7 +334,7 @@ class ProxyRouter(object):
return None return None
buff = '' buff = ''
with open(self.ca.ca_file) as fh: with open(self.ca.ca_file, 'rb') as fh:
buff = fh.read() buff = fh.read()
content_type = 'application/x-x509-ca-cert' content_type = 'application/x-x509-ca-cert'

View File

@ -5,8 +5,8 @@ import shutil
from pywb.framework.certauth import main, CertificateAuthority from pywb.framework.certauth import main, CertificateAuthority
TEST_CA_DIR = './pywb/framework/test/pywb_test_ca_certs' TEST_CA_DIR = os.path.join('.', 'pywb', 'framework', 'test', 'pywb_test_ca_certs')
TEST_CA_ROOT = './pywb/framework/test/pywb_test_ca.pem' TEST_CA_ROOT = os.path.join('.', 'pywb', 'framework', 'test', 'pywb_test_ca.pem')
def setup_module(): def setup_module():
openssl_support = pytest.importorskip("OpenSSL") openssl_support = pytest.importorskip("OpenSSL")

View File

@ -14,7 +14,7 @@ class TestOkApp:
class TestErrApp: class TestErrApp:
def __call__(self, env): def __call__(self, env):
raise Exception('Test Error') raise Exception('Test Unexpected Error')
class TestCustomErrApp: class TestCustomErrApp:
def __call__(self, env): def __call__(self, env):
@ -41,8 +41,8 @@ def test_err_app():
testapp = webtest.TestApp(the_app) testapp = webtest.TestApp(the_app)
resp = testapp.get('/abc', expect_errors=True) resp = testapp.get('/abc', expect_errors=True)
assert resp.status_int == 400 assert resp.status_int == 500
assert '400 Bad Request Error: Test Error' in resp.body assert '500 Internal Server Error Error: Test Unexpected Error' in resp.body
def test_custom_err_app(): def test_custom_err_app():
the_app = init_app(initer(TestCustomErrApp), load_yaml=False) the_app = init_app(initer(TestCustomErrApp), load_yaml=False)

View File

@ -118,7 +118,7 @@ class WSGIApp(object):
if hasattr(exc, 'status'): if hasattr(exc, 'status'):
status = exc.status() status = exc.status()
else: else:
status = '400 Bad Request' status = '500 Internal Server Error'
if hasattr(exc, 'url'): if hasattr(exc, 'url'):
err_url = exc.url err_url = exc.url

View File

@ -30,6 +30,8 @@ class HTMLRewriterMixin(object):
'base': {'href': defmod}, 'base': {'href': defmod},
'blockquote': {'cite': defmod}, 'blockquote': {'cite': defmod},
'body': {'background': 'im_'}, 'body': {'background': 'im_'},
'button': {'formaction': defmod},
'command': {'icon': 'im_'},
'del': {'cite': defmod}, 'del': {'cite': defmod},
'embed': {'src': 'oe_'}, 'embed': {'src': 'oe_'},
'head': {'': defmod}, # for head rewriting 'head': {'': defmod}, # for head rewriting
@ -37,7 +39,8 @@ class HTMLRewriterMixin(object):
'img': {'src': 'im_', 'img': {'src': 'im_',
'srcset': 'im_'}, 'srcset': 'im_'},
'ins': {'cite': defmod}, 'ins': {'cite': defmod},
'input': {'src': 'im_'}, 'input': {'src': 'im_',
'formaction': defmod},
'form': {'action': defmod}, 'form': {'action': defmod},
'frame': {'src': 'fr_'}, 'frame': {'src': 'fr_'},
'link': {'href': 'oe_'}, 'link': {'href': 'oe_'},
@ -49,7 +52,8 @@ class HTMLRewriterMixin(object):
'ref': {'href': 'oe_'}, 'ref': {'href': 'oe_'},
'script': {'src': 'js_'}, 'script': {'src': 'js_'},
'source': {'src': 'oe_'}, 'source': {'src': 'oe_'},
'video': {'src': 'oe_'}, 'video': {'src': 'oe_',
'poster': 'im_'},
'div': {'data-src': defmod, 'div': {'data-src': defmod,
'data-uri': defmod}, 'data-uri': defmod},

View File

@ -6,10 +6,11 @@ import requests
import datetime import datetime
import mimetypes import mimetypes
import logging import logging
import os
from urlparse import urlsplit from urlparse import urlsplit
from pywb.utils.loaders import is_http, LimitReader, BlockLoader from pywb.utils.loaders import is_http, LimitReader, BlockLoader, to_file_url
from pywb.utils.loaders import extract_client_cookie from pywb.utils.loaders import extract_client_cookie
from pywb.utils.timeutils import datetime_to_timestamp from pywb.utils.timeutils import datetime_to_timestamp
from pywb.utils.statusandheaders import StatusAndHeaders from pywb.utils.statusandheaders import StatusAndHeaders
@ -180,11 +181,18 @@ class LiveRewriter(object):
if url.startswith('//'): if url.startswith('//'):
url = 'http:' + url url = 'http:' + url
if is_http(url):
is_remote = True
else:
is_remote = False
if not url.startswith('file:'):
url = to_file_url(url)
# explicit urlkey may be passed in (say for testing) # explicit urlkey may be passed in (say for testing)
if not urlkey: if not urlkey:
urlkey = canonicalize(url) urlkey = canonicalize(url)
if is_http(url): if is_remote:
(status_headers, stream) = self.fetch_http(url, urlkey, env, (status_headers, stream) = self.fetch_http(url, urlkey, env,
req_headers, req_headers,
follow_redirects, follow_redirects,

View File

@ -103,12 +103,17 @@
'http://example.com/file.html?param=https://example.com/filename.html&other=value&a=b&param2=http://test.example.com' 'http://example.com/file.html?param=https://example.com/filename.html&other=value&a=b&param2=http://test.example.com'
# HttpsUrlRewriter tests # HttpsUrlRewriter tests
>>> HttpsUrlRewriter('http://example.com/', None).rewrite('https://example.com/abc') >>> httpsrewriter = HttpsUrlRewriter('http://example.com/', None)
>>> httpsrewriter.rewrite('https://example.com/abc')
'http://example.com/abc' 'http://example.com/abc'
>>> HttpsUrlRewriter('http://example.com/', None).rewrite('http://example.com/abc') >>> httpsrewriter.rewrite('http://example.com/abc')
'http://example.com/abc' 'http://example.com/abc'
# rebase is identity
>>> httpsrewriter.rebase_rewriter('https://example.com/') == httpsrewriter
True
""" """

View File

@ -105,6 +105,8 @@ _WBWombat = (function() {
"http:/" + prefix, "https:/" + prefix]; "http:/" + prefix, "https:/" + prefix];
} }
var SRC_TAGS = ["IMG", "SCRIPT", "VIDEO", "AUDIO", "SOURCE", "EMBED", "INPUT"];
//============================================ //============================================
function rewrite_url_(url) { function rewrite_url_(url) {
// If undefined, just return it // If undefined, just return it
@ -692,12 +694,9 @@ _WBWombat = (function() {
} }
override_attr(created, "src"); override_attr(created, "src");
} else if (created.tagName == "IMG" || created.tagName == "VIDEO" || created.tagName == "AUDIO") { } else if (created.tagName && starts_with(created.tagName, SRC_TAGS)) {
override_attr(created, "src"); override_attr(created, "src");
} }
// } else if (created.tagName == "A") {
// override_attr(created, "href");
// }
return created; return created;
} }

View File

@ -46,9 +46,6 @@ class BufferedReader(object):
self.buff_size = 0 self.buff_size = 0
def set_decomp(self, decomp_type): def set_decomp(self, decomp_type):
if self.num_read > 0:
raise Exception('Attempting to change decompression mid-stream')
self._init_decomp(decomp_type) self._init_decomp(decomp_type)
def _init_decomp(self, decomp_type): def _init_decomp(self, decomp_type):

View File

@ -7,6 +7,7 @@ import os
import hmac import hmac
import urllib import urllib
import urllib2 import urllib2
import urlparse
import time import time
import pkg_resources import pkg_resources
from io import open from io import open
@ -17,6 +18,15 @@ def is_http(filename):
return filename.startswith(('http://', 'https://')) return filename.startswith(('http://', 'https://'))
#=================================================================
def to_file_url(filename):
""" Convert a filename to a file:// url
"""
url = os.path.abspath(filename)
url = urlparse.urljoin('file:', urllib.pathname2url(url))
return url
#================================================================= #=================================================================
def load_yaml_config(config_file): def load_yaml_config(config_file):
import yaml import yaml
@ -39,12 +49,12 @@ def extract_post_query(method, mime, length, stream):
not mime.lower().startswith('application/x-www-form-urlencoded'))): not mime.lower().startswith('application/x-www-form-urlencoded'))):
return None return None
if not length or length == '0':
return None
try: try:
length = int(length) length = int(length)
except ValueError: except (ValueError, TypeError):
return None
if length <= 0:
return None return None
#todo: encoding issues? #todo: encoding issues?
@ -129,9 +139,10 @@ class BlockLoader(object):
# if starting with . or /, can only be a file path.. # if starting with . or /, can only be a file path..
file_only = url.startswith(('/', '.')) file_only = url.startswith(('/', '.'))
# convert to filename
if url.startswith('file://'): if url.startswith('file://'):
url = url[len('file://'):]
file_only = True file_only = True
url = urllib.url2pathname(url[len('file://'):])
try: try:
# first, try as file # first, try as file

View File

@ -66,12 +66,12 @@ from pywb import get_test_dir
test_cdx_dir = get_test_dir() + 'cdx/' test_cdx_dir = get_test_dir() + 'cdx/'
def print_binsearch_results(key, iter_func): def print_binsearch_results(key, iter_func):
with open(test_cdx_dir + 'iana.cdx') as cdx: with open(test_cdx_dir + 'iana.cdx', 'rb') as cdx:
for line in iter_func(cdx, key): for line in iter_func(cdx, key):
print line print line
def print_binsearch_results_range(key, end_key, iter_func, prev_size=0): def print_binsearch_results_range(key, end_key, iter_func, prev_size=0):
with open(test_cdx_dir + 'iana.cdx') as cdx: with open(test_cdx_dir + 'iana.cdx', 'rb') as cdx:
for line in iter_func(cdx, key, end_key, prev_size=prev_size): for line in iter_func(cdx, key, end_key, prev_size=prev_size):
print line print line

View File

@ -25,7 +25,7 @@ True
100 100
# no length specified, read full amount requested # no length specified, read full amount requested
>>> len(BlockLoader().load('file://' + test_cdx_dir + 'example.cdx', 0, -1).read(400)) >>> len(BlockLoader().load(to_file_url(test_cdx_dir + 'example.cdx'), 0, -1).read(400))
400 400
# HMAC Cookie Maker # HMAC Cookie Maker
@ -56,14 +56,41 @@ True
>>> extract_client_cookie(dict(HTTP_COOKIE='x'), 'x') >>> extract_client_cookie(dict(HTTP_COOKIE='x'), 'x')
>>> extract_client_cookie({}, 'y') >>> extract_client_cookie({}, 'y')
# extract_post_query tests
# correct POST data
>>> post_data = 'foo=bar&dir=%2Fbaz'
>>> extract_post_query('POST', 'application/x-www-form-urlencoded', len(post_data), BytesIO(post_data))
'foo=bar&dir=/baz'
# unsupported method
>>> extract_post_query('PUT', 'application/x-www-form-urlencoded', len(post_data), BytesIO(post_data))
# unsupported type
>>> extract_post_query('POST', 'text/plain', len(post_data), BytesIO(post_data))
# invalid length
>>> extract_post_query('POST', 'application/x-www-form-urlencoded', 'abc', BytesIO(post_data))
>>> extract_post_query('POST', 'application/x-www-form-urlencoded', 0, BytesIO(post_data))
# length too short
>>> extract_post_query('POST', 'application/x-www-form-urlencoded', len(post_data) - 4, BytesIO(post_data))
'foo=bar&dir=%2'
# length too long
>>> extract_post_query('POST', 'application/x-www-form-urlencoded', len(post_data) + 4, BytesIO(post_data))
'foo=bar&dir=/baz'
""" """
#================================================================= #=================================================================
import re import re
import os
from io import BytesIO from io import BytesIO
from pywb.utils.loaders import BlockLoader, HMACCookieMaker from pywb.utils.loaders import BlockLoader, HMACCookieMaker, to_file_url
from pywb.utils.loaders import LimitReader, extract_client_cookie from pywb.utils.loaders import LimitReader, extract_client_cookie, extract_post_query
from pywb import get_test_dir from pywb import get_test_dir
@ -82,7 +109,6 @@ def seek_read_full(seekable_reader, offset):
return seekable_reader.readline() return seekable_reader.readline()
if __name__ == "__main__": if __name__ == "__main__":
import doctest import doctest
doctest.testmod() doctest.testmod()

View File

@ -6,8 +6,9 @@ class WbException(Exception):
Exception.__init__(self, msg) Exception.__init__(self, msg)
self.url = url self.url = url
def status(self): # Default Error Code
return '500 Internal Server Error' # def status(self):
# return '500 Internal Server Error'
#================================================================= #=================================================================

View File

@ -115,8 +115,8 @@ def write_multi_cdx_index(output, inputs, **options):
outpath = cdx_filename(filename) outpath = cdx_filename(filename)
outpath = os.path.join(output, outpath) outpath = os.path.join(output, outpath)
with open(outpath, 'w') as outfile: with open(outpath, 'wb') as outfile:
with open(fullpath, 'r') as infile: with open(fullpath, 'rb') as infile:
write_cdx_index(outfile, infile, filename, **options) write_cdx_index(outfile, infile, filename, **options)
# write to one cdx file # write to one cdx file
@ -124,7 +124,7 @@ def write_multi_cdx_index(output, inputs, **options):
if output == '-': if output == '-':
outfile = sys.stdout outfile = sys.stdout
else: else:
outfile = open(output, 'w') outfile = open(output, 'wb')
if options.get('sort'): if options.get('sort'):
writer_cls = SortedCDXWriter writer_cls = SortedCDXWriter
@ -133,7 +133,7 @@ def write_multi_cdx_index(output, inputs, **options):
with writer_cls(outfile, options.get('cdx09')) as writer: with writer_cls(outfile, options.get('cdx09')) as writer:
for fullpath, filename in iter_file_or_dir(inputs): for fullpath, filename in iter_file_or_dir(inputs):
with open(fullpath, 'r') as infile: with open(fullpath, 'rb') as infile:
entry_iter = create_index_iter(infile, **options) entry_iter = create_index_iter(infile, **options)
for entry in entry_iter: for entry in entry_iter:

View File

@ -3,6 +3,7 @@ import redis
from pywb.utils.binsearch import iter_exact from pywb.utils.binsearch import iter_exact
import urlparse import urlparse
import urllib
import os import os
import logging import logging
@ -56,7 +57,7 @@ class RedisResolver:
class PathIndexResolver: class PathIndexResolver:
def __init__(self, pathindex_file): def __init__(self, pathindex_file):
self.pathindex_file = pathindex_file self.pathindex_file = pathindex_file
self.reader = open(pathindex_file) self.reader = open(pathindex_file, 'rb')
def __call__(self, filename): def __call__(self, filename):
result = iter_exact(self.reader, filename, '\t') result = iter_exact(self.reader, filename, '\t')
@ -92,6 +93,7 @@ def make_best_resolver(param):
if url_parts.scheme == 'file': if url_parts.scheme == 'file':
path = url_parts.path path = url_parts.path
path = urllib.url2pathname(path)
if os.path.isfile(path): if os.path.isfile(path):
logging.debug('Adding Path Index: ' + path) logging.debug('Adding Path Index: ' + path)

View File

@ -160,7 +160,7 @@ TEST_CDX_DIR = get_test_dir() + 'cdx/'
TEST_WARC_DIR = get_test_dir() + 'warcs/' TEST_WARC_DIR = get_test_dir() + 'warcs/'
def read_fully(cdx): def read_fully(cdx):
with open(TEST_CDX_DIR + cdx) as fh: with open(TEST_CDX_DIR + cdx, 'rb') as fh:
curr = BytesIO() curr = BytesIO()
while True: while True:
b = fh.read() b = fh.read()
@ -172,7 +172,7 @@ def read_fully(cdx):
def cdx_index(warc, **options): def cdx_index(warc, **options):
buff = BytesIO() buff = BytesIO()
with open(TEST_WARC_DIR + warc) as fh: with open(TEST_WARC_DIR + warc, 'rb') as fh:
write_cdx_index(buff, fh, warc, **options) write_cdx_index(buff, fh, warc, **options)
return buff.getvalue() return buff.getvalue()
@ -213,7 +213,7 @@ def cli_lines_with_dir(input_):
print filename print filename
with open(os.path.join(tmp_dir, filename), 'r') as fh: with open(os.path.join(tmp_dir, filename), 'rb') as fh:
lines = fh.read(8192).rstrip().split('\n') lines = fh.read(8192).rstrip().split('\n')
finally: finally:

View File

@ -33,13 +33,13 @@ PrefixResolver('http://myhost.example.com/warcs/', contains = '/')
RedisResolver('redis://myhost.example.com:1234/1') RedisResolver('redis://myhost.example.com:1234/1')
# a file # a file
>>> r = make_best_resolver('file://' + os.path.realpath(__file__)) >>> r = make_best_resolver(to_file_url(os.path.realpath(__file__)))
>>> r.__class__.__name__ >>> r.__class__.__name__
'PathIndexResolver' 'PathIndexResolver'
# a dir # a dir
>>> path = os.path.realpath(__file__) >>> path = os.path.realpath(__file__)
>>> r = make_best_resolver('file://' + os.path.dirname(path)) >>> r = make_best_resolver(to_file_url(os.path.dirname(path)))
>>> r.__class__.__name__ >>> r.__class__.__name__
'PrefixResolver' 'PrefixResolver'
@ -54,8 +54,9 @@ RedisResolver('redis://myhost.example.com:1234/1')
from pywb import get_test_dir from pywb import get_test_dir
from pywb.warc.pathresolvers import PrefixResolver, PathIndexResolver, RedisResolver from pywb.warc.pathresolvers import PrefixResolver, PathIndexResolver, RedisResolver
from pywb.warc.pathresolvers import make_best_resolver, make_best_resolvers from pywb.warc.pathresolvers import make_best_resolver, make_best_resolvers
import os from pywb.utils.loaders import to_file_url
import os
from fakeredis import FakeStrictRedis from fakeredis import FakeStrictRedis
from mock import patch from mock import patch
@ -68,7 +69,6 @@ def init_redis_resolver():
def hset_path(filename, path): def hset_path(filename, path):
redis_resolver.redis.hset(redis_resolver.key_prefix + filename, 'path', path) redis_resolver.redis.hset(redis_resolver.key_prefix + filename, 'path', path)
redis_resolver = init_redis_resolver() redis_resolver = init_redis_resolver()
#================================================================= #=================================================================

View File

@ -65,7 +65,7 @@ class RangeCache(object):
maxlen = min(maxlen, end - start + 1) maxlen = min(maxlen, end - start + 1)
def read_range(): def read_range():
with open(spec['name']) as fh: with open(spec['name'], 'rb') as fh:
fh.seek(start) fh.seek(start)
fh = LimitReader.wrap_stream(fh, maxlen) fh = LimitReader.wrap_stream(fh, maxlen)
while True: while True:

View File

@ -0,0 +1,20 @@
"""
>>> format_ts('20141226101000')
'Fri, Dec 26 2014 10:10:00'
>>> format_ts('20141226101000', '%s')
1419588600
>>> is_wb_handler(DebugEchoHandler())
False
"""
from pywb.webapp.views import format_ts, is_wb_handler
from pywb.webapp.handlers import DebugEchoHandler
if __name__ == "__main__":
import doctest
doctest.testmod()

View File

@ -1,4 +1,4 @@
from pywb.utils.timeutils import timestamp_to_datetime from pywb.utils.timeutils import timestamp_to_datetime, timestamp_to_sec
from pywb.framework.wbrequestresponse import WbResponse from pywb.framework.wbrequestresponse import WbResponse
from pywb.framework.memento import make_timemap, LINK_FORMAT from pywb.framework.memento import make_timemap, LINK_FORMAT
@ -22,11 +22,7 @@ class template_filter(object):
Otherwise, the func name is the filter name Otherwise, the func name is the filter name
""" """
def __init__(self, param=None): def __init__(self, param=None):
if hasattr(param, '__call__'): self.name = param
self.name = None
self.__call__(param)
else:
self.name = param
def __call__(self, func): def __call__(self, func):
name = self.name name = self.name
@ -39,10 +35,13 @@ class template_filter(object):
#================================================================= #=================================================================
# Filters # Filters
@template_filter @template_filter()
def format_ts(value, format_='%a, %b %d %Y %H:%M:%S'): def format_ts(value, format_='%a, %b %d %Y %H:%M:%S'):
value = timestamp_to_datetime(value) if format_ == '%s':
return value.strftime(format_) return timestamp_to_sec(value)
else:
value = timestamp_to_datetime(value)
return value.strftime(format_)
@template_filter('urlsplit') @template_filter('urlsplit')
@ -51,17 +50,11 @@ def get_urlsplit(url):
return split return split
@template_filter()
def request_hostname(env):
return env.get('HTTP_HOST', 'localhost')
@template_filter() @template_filter()
def is_wb_handler(obj): def is_wb_handler(obj):
if not hasattr(obj, 'handler'): if not hasattr(obj, 'handler'):
return False return False
#return isinstance(obj.handler, WBHandler)
return obj.handler.__class__.__name__ == "WBHandler" return obj.handler.__class__.__name__ == "WBHandler"

View File

@ -34,7 +34,7 @@ class PyTest(TestCommand):
setup( setup(
name='pywb', name='pywb',
version='0.7.2', version='0.7.5',
url='https://github.com/ikreymer/pywb', url='https://github.com/ikreymer/pywb',
author='Ilya Kreymer', author='Ilya Kreymer',
author_email='ikreymer@gmail.com', author_email='ikreymer@gmail.com',
@ -58,10 +58,10 @@ setup(
'pywb': ['static/flowplayer/*', 'static/*.*', 'ui/*', '*.yaml'], 'pywb': ['static/flowplayer/*', 'static/*.*', 'ui/*', '*.yaml'],
}, },
data_files=[ data_files=[
('sample_archive/cdx/', glob.glob('sample_archive/cdx/*')), ('sample_archive/cdx', glob.glob('sample_archive/cdx/*')),
('sample_archive/zipcdx/', glob.glob('sample_archive/zipcdx/*')), ('sample_archive/zipcdx', glob.glob('sample_archive/zipcdx/*')),
('sample_archive/warcs/', glob.glob('sample_archive/warcs/*')), ('sample_archive/warcs', glob.glob('sample_archive/warcs/*')),
('sample_archive/text_content/', ('sample_archive/text_content',
glob.glob('sample_archive/text_content/*')), glob.glob('sample_archive/text_content/*')),
], ],
install_requires=[ install_requires=[
@ -90,7 +90,7 @@ setup(
live-rewrite-server = pywb.apps.live_rewrite_server:main live-rewrite-server = pywb.apps.live_rewrite_server:main
proxy-cert-auth = pywb.framework.certauth:main proxy-cert-auth = pywb.framework.certauth:main
""", """,
zip_safe=False, zip_safe=True,
classifiers=[ classifiers=[
'Development Status :: 4 - Beta', 'Development Status :: 4 - Beta',
'Environment :: Web Environment', 'Environment :: Web Environment',