From 205aeca4a12ad3f89e8e866bc684a4b0475d5191 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 4 Jan 2015 17:32:58 -0800 Subject: [PATCH 01/17] bump version to 0.7.3 rewrite: add additional tags for client side src rewrite, add missing tags to server-side html rewrite --- CHANGES.rst | 6 ++++++ README.rst | 2 +- pywb/rewrite/html_rewriter.py | 8 ++++++-- pywb/static/wombat.js | 4 +++- setup.py | 2 +- 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 947b9afb..55e328b2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,9 @@ +pywb 0.7.3 changelist +~~~~~~~~~~~~~~~~~~~~~ + +* More compresensive client side ``src`` attribute rewriting (via wombat.js), additional server-side HTML tag rewriting. + + pywb 0.7.2 changelist ~~~~~~~~~~~~~~~~~~~~~ diff --git a/README.rst b/README.rst index 1d9c8b57..cf81f3f8 100644 --- a/README.rst +++ b/README.rst @@ -1,4 +1,4 @@ -PyWb 0.7.2 +PyWb 0.7.3 ========== .. image:: https://travis-ci.org/ikreymer/pywb.png?branch=develop diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py index 3e92bf6a..246c037b 100644 --- a/pywb/rewrite/html_rewriter.py +++ b/pywb/rewrite/html_rewriter.py @@ -30,6 +30,8 @@ class HTMLRewriterMixin(object): 'base': {'href': defmod}, 'blockquote': {'cite': defmod}, 'body': {'background': 'im_'}, + 'button': {'formaction': defmod}, + 'command': {'icon': 'im_'}, 'del': {'cite': defmod}, 'embed': {'src': 'oe_'}, 'head': {'': defmod}, # for head rewriting @@ -37,7 +39,8 @@ class HTMLRewriterMixin(object): 'img': {'src': 'im_', 'srcset': 'im_'}, 'ins': {'cite': defmod}, - 'input': {'src': 'im_'}, + 'input': {'src': 'im_', + 'formaction': defmod}, 'form': {'action': defmod}, 'frame': {'src': 'fr_'}, 'link': {'href': 'oe_'}, @@ -49,7 +52,8 @@ class HTMLRewriterMixin(object): 'ref': {'href': 'oe_'}, 'script': {'src': 'js_'}, 'source': {'src': 'oe_'}, - 'video': {'src': 'oe_'}, + 'video': {'src': 'oe_', + 'poster': 'im_'}, 'div': {'data-src': defmod, 'data-uri': defmod}, diff --git a/pywb/static/wombat.js b/pywb/static/wombat.js index 18ffafb7..ef715195 100644 --- a/pywb/static/wombat.js +++ b/pywb/static/wombat.js @@ -105,6 +105,8 @@ _WBWombat = (function() { "http:/" + prefix, "https:/" + prefix]; } + var SRC_TAGS = ["IMG", "SCRIPT", "VIDEO", "AUDIO", "SOURCE", "EMBED", "INPUT"]; + //============================================ function rewrite_url_(url) { // If undefined, just return it @@ -692,7 +694,7 @@ _WBWombat = (function() { } override_attr(created, "src"); - } else if (created.tagName == "IMG" || created.tagName == "VIDEO" || created.tagName == "AUDIO") { + } else if (created.tagName && starts_with(created.tagName, SRC_TAGS)) { override_attr(created, "src"); } // } else if (created.tagName == "A") { diff --git a/setup.py b/setup.py index 7370802d..aaeefa6e 100755 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ class PyTest(TestCommand): setup( name='pywb', - version='0.7.2', + version='0.7.3', url='https://github.com/ikreymer/pywb', author='Ilya Kreymer', author_email='ikreymer@gmail.com', From 06b2ea498e8b969629837458437113d638463dfb Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Fri, 9 Jan 2015 22:57:21 -0800 Subject: [PATCH 02/17] setup: set zipSafe to true to allow bundling egg in installer --- pywb/static/wombat.js | 3 --- setup.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/pywb/static/wombat.js b/pywb/static/wombat.js index ef715195..dc9b2205 100644 --- a/pywb/static/wombat.js +++ b/pywb/static/wombat.js @@ -697,9 +697,6 @@ _WBWombat = (function() { } else if (created.tagName && starts_with(created.tagName, SRC_TAGS)) { override_attr(created, "src"); } -// } else if (created.tagName == "A") { -// override_attr(created, "href"); -// } return created; } diff --git a/setup.py b/setup.py index aaeefa6e..1ecd998c 100755 --- a/setup.py +++ b/setup.py @@ -90,7 +90,7 @@ setup( live-rewrite-server = pywb.apps.live_rewrite_server:main proxy-cert-auth = pywb.framework.certauth:main """, - zip_safe=False, + zip_safe=True, classifiers=[ 'Development Status :: 4 - Beta', 'Environment :: Web Environment', From 1eb0f96f92a5430d598931c876a61c68877a22d8 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sat, 10 Jan 2015 14:06:15 -0800 Subject: [PATCH 03/17] windows support work: fix loaders to use pathname2url to convert to file:/// url, use urlopen to open file paths fix some tests to use universal line breaks --- pywb/rewrite/rewrite_live.py | 17 ++++++++++++++--- pywb/utils/loaders.py | 10 +++++----- pywb/utils/test/test_bufferedreaders.py | 4 ++-- pywb/utils/test/test_loaders.py | 4 +++- pywb/warc/test/test_indexing.py | 6 +++--- setup.py | 8 ++++---- 6 files changed, 31 insertions(+), 18 deletions(-) diff --git a/pywb/rewrite/rewrite_live.py b/pywb/rewrite/rewrite_live.py index 3ebbe68d..8378c5d0 100644 --- a/pywb/rewrite/rewrite_live.py +++ b/pywb/rewrite/rewrite_live.py @@ -6,8 +6,10 @@ import requests import datetime import mimetypes import logging +import os -from urlparse import urlsplit +from urlparse import urlsplit, urljoin +from urllib import pathname2url from pywb.utils.loaders import is_http, LimitReader, BlockLoader from pywb.utils.loaders import extract_client_cookie @@ -180,16 +182,25 @@ class LiveRewriter(object): if url.startswith('//'): url = 'http:' + url + if is_http(url): + is_remote = True + else: + is_remote = False + if not url.startswith('file:'): + url = os.path.abspath(url) + url = urljoin('file:', pathname2url(url)) + print(url) + # explicit urlkey may be passed in (say for testing) if not urlkey: urlkey = canonicalize(url) - if is_http(url): + if is_remote: (status_headers, stream) = self.fetch_http(url, urlkey, env, req_headers, follow_redirects, ignore_proxies) - else: + else: (status_headers, stream) = self.fetch_local_file(url) if timestamp is None: diff --git a/pywb/utils/loaders.py b/pywb/utils/loaders.py index d54f4908..6ef1355e 100644 --- a/pywb/utils/loaders.py +++ b/pywb/utils/loaders.py @@ -129,13 +129,13 @@ class BlockLoader(object): # if starting with . or /, can only be a file path.. file_only = url.startswith(('/', '.')) - if url.startswith('file://'): - url = url[len('file://'):] - file_only = True - try: # first, try as file - afile = open(url, 'rb') + if url.startswith('file://'): + file_only = True + afile = urllib.urlopen(url) + else: + afile = open(url, 'rb') except IOError: if file_only: diff --git a/pywb/utils/test/test_bufferedreaders.py b/pywb/utils/test/test_bufferedreaders.py index cd5f3787..0a249981 100644 --- a/pywb/utils/test/test_bufferedreaders.py +++ b/pywb/utils/test/test_bufferedreaders.py @@ -3,11 +3,11 @@ r""" #================================================================= # DecompressingBufferedReader readline() ->>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline() +>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU')).readline() ' CDX N b a m s k r M S V g\n' # detect not compressed ->>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline() +>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU'), decomp_type = 'gzip').readline() ' CDX N b a m s k r M S V g\n' # decompress with on the fly compression, default gzip compression diff --git a/pywb/utils/test/test_loaders.py b/pywb/utils/test/test_loaders.py index 1da5d71e..020becca 100644 --- a/pywb/utils/test/test_loaders.py +++ b/pywb/utils/test/test_loaders.py @@ -25,7 +25,7 @@ True 100 # no length specified, read full amount requested ->>> len(BlockLoader().load('file://' + test_cdx_dir + 'example.cdx', 0, -1).read(400)) +>>> len(BlockLoader().load('file:' + pathname2url(test_cdx_dir + 'example.cdx'), 0, -1).read(400)) 400 # HMAC Cookie Maker @@ -65,6 +65,8 @@ from io import BytesIO from pywb.utils.loaders import BlockLoader, HMACCookieMaker from pywb.utils.loaders import LimitReader, extract_client_cookie +from urllib import pathname2url + from pywb import get_test_dir test_cdx_dir = get_test_dir() + 'cdx/' diff --git a/pywb/warc/test/test_indexing.py b/pywb/warc/test/test_indexing.py index c8584c8d..51e82d96 100644 --- a/pywb/warc/test/test_indexing.py +++ b/pywb/warc/test/test_indexing.py @@ -160,7 +160,7 @@ TEST_CDX_DIR = get_test_dir() + 'cdx/' TEST_WARC_DIR = get_test_dir() + 'warcs/' def read_fully(cdx): - with open(TEST_CDX_DIR + cdx) as fh: + with open(TEST_CDX_DIR + cdx, 'rU') as fh: curr = BytesIO() while True: b = fh.read() @@ -172,7 +172,7 @@ def read_fully(cdx): def cdx_index(warc, **options): buff = BytesIO() - with open(TEST_WARC_DIR + warc) as fh: + with open(TEST_WARC_DIR + warc, 'rU') as fh: write_cdx_index(buff, fh, warc, **options) return buff.getvalue() @@ -213,7 +213,7 @@ def cli_lines_with_dir(input_): print filename - with open(os.path.join(tmp_dir, filename), 'r') as fh: + with open(os.path.join(tmp_dir, filename), 'rU') as fh: lines = fh.read(8192).rstrip().split('\n') finally: diff --git a/setup.py b/setup.py index 1ecd998c..8d8b0ab8 100755 --- a/setup.py +++ b/setup.py @@ -58,10 +58,10 @@ setup( 'pywb': ['static/flowplayer/*', 'static/*.*', 'ui/*', '*.yaml'], }, data_files=[ - ('sample_archive/cdx/', glob.glob('sample_archive/cdx/*')), - ('sample_archive/zipcdx/', glob.glob('sample_archive/zipcdx/*')), - ('sample_archive/warcs/', glob.glob('sample_archive/warcs/*')), - ('sample_archive/text_content/', + ('sample_archive/cdx', glob.glob('sample_archive/cdx/*')), + ('sample_archive/zipcdx', glob.glob('sample_archive/zipcdx/*')), + ('sample_archive/warcs', glob.glob('sample_archive/warcs/*')), + ('sample_archive/text_content', glob.glob('sample_archive/text_content/*')), ], install_requires=[ From d5c22e364934df079063a9799666ef83185799c4 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sat, 10 Jan 2015 15:27:45 -0800 Subject: [PATCH 04/17] test loaders: fix file:// prefix --- pywb/utils/test/test_loaders.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pywb/utils/test/test_loaders.py b/pywb/utils/test/test_loaders.py index 020becca..a765808e 100644 --- a/pywb/utils/test/test_loaders.py +++ b/pywb/utils/test/test_loaders.py @@ -25,7 +25,7 @@ True 100 # no length specified, read full amount requested ->>> len(BlockLoader().load('file:' + pathname2url(test_cdx_dir + 'example.cdx'), 0, -1).read(400)) +>>> len(BlockLoader().load(to_local_url(test_cdx_dir + 'example.cdx'), 0, -1).read(400)) 400 # HMAC Cookie Maker @@ -61,6 +61,7 @@ True #================================================================= import re +import os from io import BytesIO from pywb.utils.loaders import BlockLoader, HMACCookieMaker from pywb.utils.loaders import LimitReader, extract_client_cookie @@ -83,7 +84,9 @@ def seek_read_full(seekable_reader, offset): seekable_reader.readline() #skip return seekable_reader.readline() - +def to_local_url(filename): + filename = os.path.abspath(filename) + return 'file://' + pathname2url(filename) if __name__ == "__main__": import doctest From 7f52ecdca918e917410f1b49a5b5a5051dcf4460 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sat, 10 Jan 2015 15:36:53 -0800 Subject: [PATCH 05/17] tests: fix indexing test, remove extra space/print --- pywb/rewrite/rewrite_live.py | 3 +-- pywb/warc/test/test_indexing.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pywb/rewrite/rewrite_live.py b/pywb/rewrite/rewrite_live.py index 8378c5d0..0c9ff62c 100644 --- a/pywb/rewrite/rewrite_live.py +++ b/pywb/rewrite/rewrite_live.py @@ -189,7 +189,6 @@ class LiveRewriter(object): if not url.startswith('file:'): url = os.path.abspath(url) url = urljoin('file:', pathname2url(url)) - print(url) # explicit urlkey may be passed in (say for testing) if not urlkey: @@ -200,7 +199,7 @@ class LiveRewriter(object): req_headers, follow_redirects, ignore_proxies) - else: + else: (status_headers, stream) = self.fetch_local_file(url) if timestamp is None: diff --git a/pywb/warc/test/test_indexing.py b/pywb/warc/test/test_indexing.py index 51e82d96..2e704530 100644 --- a/pywb/warc/test/test_indexing.py +++ b/pywb/warc/test/test_indexing.py @@ -172,7 +172,7 @@ def read_fully(cdx): def cdx_index(warc, **options): buff = BytesIO() - with open(TEST_WARC_DIR + warc, 'rU') as fh: + with open(TEST_WARC_DIR + warc, 'rb') as fh: write_cdx_index(buff, fh, warc, **options) return buff.getvalue() From ba853a4eae5d96525ce3fd0f2185e7d210e3640f Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sat, 10 Jan 2015 20:59:23 -0800 Subject: [PATCH 06/17] fixes for windows: convert url to file with pathname2url, use 'b' for reading warcs, don't use %s for timestamp conversion (not portable) (#56) --- pywb/warc/cdxindexer.py | 4 ++-- pywb/warc/pathresolvers.py | 2 ++ pywb/warc/test/test_pathresolvers.py | 10 ++++++++-- pywb/webapp/views.py | 6 +++++- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/pywb/warc/cdxindexer.py b/pywb/warc/cdxindexer.py index 60dd5ad5..aa432f38 100644 --- a/pywb/warc/cdxindexer.py +++ b/pywb/warc/cdxindexer.py @@ -116,7 +116,7 @@ def write_multi_cdx_index(output, inputs, **options): outpath = os.path.join(output, outpath) with open(outpath, 'w') as outfile: - with open(fullpath, 'r') as infile: + with open(fullpath, 'rb') as infile: write_cdx_index(outfile, infile, filename, **options) # write to one cdx file @@ -133,7 +133,7 @@ def write_multi_cdx_index(output, inputs, **options): with writer_cls(outfile, options.get('cdx09')) as writer: for fullpath, filename in iter_file_or_dir(inputs): - with open(fullpath, 'r') as infile: + with open(fullpath, 'rb') as infile: entry_iter = create_index_iter(infile, **options) for entry in entry_iter: diff --git a/pywb/warc/pathresolvers.py b/pywb/warc/pathresolvers.py index 469fbfb1..2d1f7439 100644 --- a/pywb/warc/pathresolvers.py +++ b/pywb/warc/pathresolvers.py @@ -3,6 +3,7 @@ import redis from pywb.utils.binsearch import iter_exact import urlparse +import urllib import os import logging @@ -92,6 +93,7 @@ def make_best_resolver(param): if url_parts.scheme == 'file': path = url_parts.path + path = urllib.url2pathname(path) if os.path.isfile(path): logging.debug('Adding Path Index: ' + path) diff --git a/pywb/warc/test/test_pathresolvers.py b/pywb/warc/test/test_pathresolvers.py index 923c0ce1..f60eafaa 100644 --- a/pywb/warc/test/test_pathresolvers.py +++ b/pywb/warc/test/test_pathresolvers.py @@ -33,13 +33,13 @@ PrefixResolver('http://myhost.example.com/warcs/', contains = '/') RedisResolver('redis://myhost.example.com:1234/1') # a file ->>> r = make_best_resolver('file://' + os.path.realpath(__file__)) +>>> r = make_best_resolver(to_local_url(os.path.realpath(__file__))) >>> r.__class__.__name__ 'PathIndexResolver' # a dir >>> path = os.path.realpath(__file__) ->>> r = make_best_resolver('file://' + os.path.dirname(path)) +>>> r = make_best_resolver(to_local_url(os.path.dirname(path))) >>> r.__class__.__name__ 'PrefixResolver' @@ -56,6 +56,7 @@ from pywb.warc.pathresolvers import PrefixResolver, PathIndexResolver, RedisReso from pywb.warc.pathresolvers import make_best_resolver, make_best_resolvers import os +from urllib import pathname2url from fakeredis import FakeStrictRedis from mock import patch @@ -68,6 +69,11 @@ def init_redis_resolver(): def hset_path(filename, path): redis_resolver.redis.hset(redis_resolver.key_prefix + filename, 'path', path) +def to_local_url(filename): + filename = os.path.abspath(filename) + res = 'file:' + pathname2url(filename) + #print(res) + return res redis_resolver = init_redis_resolver() diff --git a/pywb/webapp/views.py b/pywb/webapp/views.py index 11e78618..4d9cdd6d 100644 --- a/pywb/webapp/views.py +++ b/pywb/webapp/views.py @@ -4,6 +4,7 @@ from pywb.framework.memento import make_timemap, LINK_FORMAT import urlparse import logging +import time from os import path from itertools import imap @@ -42,7 +43,10 @@ class template_filter(object): @template_filter def format_ts(value, format_='%a, %b %d %Y %H:%M:%S'): value = timestamp_to_datetime(value) - return value.strftime(format_) + if format_ == '%s': + return int(time.mktime(value.timetuple()) * 1000) + else: + return value.strftime(format_) @template_filter('urlsplit') From cf0a21509beb9002cda5548a0eb60c89fd69df58 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 11 Jan 2015 13:05:48 -0800 Subject: [PATCH 07/17] loaders: add to_file_url() for converting between filename and file://, used in live rewrite and tests --- pywb/rewrite/rewrite_live.py | 8 +++----- pywb/utils/loaders.py | 21 ++++++++++++++++----- pywb/utils/test/test_loaders.py | 9 ++------- pywb/warc/test/test_pathresolvers.py | 14 ++++---------- 4 files changed, 25 insertions(+), 27 deletions(-) diff --git a/pywb/rewrite/rewrite_live.py b/pywb/rewrite/rewrite_live.py index 0c9ff62c..e6860d22 100644 --- a/pywb/rewrite/rewrite_live.py +++ b/pywb/rewrite/rewrite_live.py @@ -8,10 +8,9 @@ import mimetypes import logging import os -from urlparse import urlsplit, urljoin -from urllib import pathname2url +from urlparse import urlsplit -from pywb.utils.loaders import is_http, LimitReader, BlockLoader +from pywb.utils.loaders import is_http, LimitReader, BlockLoader, to_file_url from pywb.utils.loaders import extract_client_cookie from pywb.utils.timeutils import datetime_to_timestamp from pywb.utils.statusandheaders import StatusAndHeaders @@ -187,8 +186,7 @@ class LiveRewriter(object): else: is_remote = False if not url.startswith('file:'): - url = os.path.abspath(url) - url = urljoin('file:', pathname2url(url)) + url = to_file_url(url) # explicit urlkey may be passed in (say for testing) if not urlkey: diff --git a/pywb/utils/loaders.py b/pywb/utils/loaders.py index 6ef1355e..34eca14b 100644 --- a/pywb/utils/loaders.py +++ b/pywb/utils/loaders.py @@ -7,6 +7,7 @@ import os import hmac import urllib import urllib2 +import urlparse import time import pkg_resources from io import open @@ -17,6 +18,15 @@ def is_http(filename): return filename.startswith(('http://', 'https://')) +#================================================================= +def to_file_url(filename): + """ Convert a filename to a file:// url + """ + url = os.path.abspath(filename) + url = urlparse.urljoin('file:', urllib.pathname2url(url)) + return url + + #================================================================= def load_yaml_config(config_file): import yaml @@ -129,13 +139,14 @@ class BlockLoader(object): # if starting with . or /, can only be a file path.. file_only = url.startswith(('/', '.')) + # convert to filename + if url.startswith('file://'): + file_only = True + url = urllib.url2pathname(url[len('file://'):]) + try: # first, try as file - if url.startswith('file://'): - file_only = True - afile = urllib.urlopen(url) - else: - afile = open(url, 'rb') + afile = open(url, 'rb') except IOError: if file_only: diff --git a/pywb/utils/test/test_loaders.py b/pywb/utils/test/test_loaders.py index a765808e..312af81b 100644 --- a/pywb/utils/test/test_loaders.py +++ b/pywb/utils/test/test_loaders.py @@ -25,7 +25,7 @@ True 100 # no length specified, read full amount requested ->>> len(BlockLoader().load(to_local_url(test_cdx_dir + 'example.cdx'), 0, -1).read(400)) +>>> len(BlockLoader().load(to_file_url(test_cdx_dir + 'example.cdx'), 0, -1).read(400)) 400 # HMAC Cookie Maker @@ -63,11 +63,9 @@ True import re import os from io import BytesIO -from pywb.utils.loaders import BlockLoader, HMACCookieMaker +from pywb.utils.loaders import BlockLoader, HMACCookieMaker, to_file_url from pywb.utils.loaders import LimitReader, extract_client_cookie -from urllib import pathname2url - from pywb import get_test_dir test_cdx_dir = get_test_dir() + 'cdx/' @@ -84,9 +82,6 @@ def seek_read_full(seekable_reader, offset): seekable_reader.readline() #skip return seekable_reader.readline() -def to_local_url(filename): - filename = os.path.abspath(filename) - return 'file://' + pathname2url(filename) if __name__ == "__main__": import doctest diff --git a/pywb/warc/test/test_pathresolvers.py b/pywb/warc/test/test_pathresolvers.py index f60eafaa..d00f3348 100644 --- a/pywb/warc/test/test_pathresolvers.py +++ b/pywb/warc/test/test_pathresolvers.py @@ -33,13 +33,13 @@ PrefixResolver('http://myhost.example.com/warcs/', contains = '/') RedisResolver('redis://myhost.example.com:1234/1') # a file ->>> r = make_best_resolver(to_local_url(os.path.realpath(__file__))) +>>> r = make_best_resolver(to_file_url(os.path.realpath(__file__))) >>> r.__class__.__name__ 'PathIndexResolver' # a dir >>> path = os.path.realpath(__file__) ->>> r = make_best_resolver(to_local_url(os.path.dirname(path))) +>>> r = make_best_resolver(to_file_url(os.path.dirname(path))) >>> r.__class__.__name__ 'PrefixResolver' @@ -54,9 +54,9 @@ RedisResolver('redis://myhost.example.com:1234/1') from pywb import get_test_dir from pywb.warc.pathresolvers import PrefixResolver, PathIndexResolver, RedisResolver from pywb.warc.pathresolvers import make_best_resolver, make_best_resolvers -import os +from pywb.utils.loaders import to_file_url -from urllib import pathname2url +import os from fakeredis import FakeStrictRedis from mock import patch @@ -69,12 +69,6 @@ def init_redis_resolver(): def hset_path(filename, path): redis_resolver.redis.hset(redis_resolver.key_prefix + filename, 'path', path) -def to_local_url(filename): - filename = os.path.abspath(filename) - res = 'file:' + pathname2url(filename) - #print(res) - return res - redis_resolver = init_redis_resolver() #================================================================= From 7ae0ff86d2185228ffbef432d30d01510d97acd3 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 11 Jan 2015 13:10:14 -0800 Subject: [PATCH 08/17] test certauth: fix paths --- pywb/framework/test/test_certauth.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pywb/framework/test/test_certauth.py b/pywb/framework/test/test_certauth.py index 6f7ce583..472dc37e 100644 --- a/pywb/framework/test/test_certauth.py +++ b/pywb/framework/test/test_certauth.py @@ -5,8 +5,8 @@ import shutil from pywb.framework.certauth import main, CertificateAuthority -TEST_CA_DIR = './pywb/framework/test/pywb_test_ca_certs' -TEST_CA_ROOT = './pywb/framework/test/pywb_test_ca.pem' +TEST_CA_DIR = os.path.join('.', 'pywb', 'framework', 'test', 'pywb_test_ca_certs') +TEST_CA_ROOT = os.path.join('.', 'pywb', 'framework', 'test', 'pywb_test_ca.pem') def setup_module(): openssl_support = pytest.importorskip("OpenSSL") From 14657fbe152a5cc8c694f6a12c72cf9dbdc4eae7 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 11 Jan 2015 15:04:19 -0800 Subject: [PATCH 09/17] certauth: fix max cert duration to avoid int overflow --- pywb/framework/certauth.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pywb/framework/certauth.py b/pywb/framework/certauth.py index b9f70b06..cadf6bd1 100644 --- a/pywb/framework/certauth.py +++ b/pywb/framework/certauth.py @@ -13,8 +13,8 @@ from argparse import ArgumentParser #================================================================= -# Duration of 100 years -CERT_DURATION = 100 * 365 * 24 * 60 * 60 +# Duration of 10 years +CERT_DURATION = 10 * 365 * 24 * 60 * 60 CERTS_DIR = './ca/certs/' From fb4bf817f7756b796005be98cf0c38dfa0631743 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 11 Jan 2015 18:34:32 -0800 Subject: [PATCH 10/17] rangecache: use 'b' for file open --- pywb/webapp/rangecache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pywb/webapp/rangecache.py b/pywb/webapp/rangecache.py index 84b585b1..4b0954cb 100644 --- a/pywb/webapp/rangecache.py +++ b/pywb/webapp/rangecache.py @@ -65,7 +65,7 @@ class RangeCache(object): maxlen = min(maxlen, end - start + 1) def read_range(): - with open(spec['name']) as fh: + with open(spec['name'], 'rb') as fh: fh.seek(start) fh = LimitReader.wrap_stream(fh, maxlen) while True: From db75bda736f8b19c8f33ea9cdec578cd9d536b4a Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 11 Jan 2015 18:53:47 -0800 Subject: [PATCH 11/17] file open() pass: convert all read and write to ensure binary 'b' flag is set (#56) --- pywb/cdx/cdxsource.py | 2 +- pywb/cdx/test/test_redis_source.py | 2 +- pywb/cdx/zipnum.py | 4 ++-- pywb/framework/proxy.py | 2 +- pywb/utils/test/test_binsearch.py | 4 ++-- pywb/utils/test/test_bufferedreaders.py | 4 ++-- pywb/warc/cdxindexer.py | 4 ++-- pywb/warc/pathresolvers.py | 2 +- pywb/warc/test/test_indexing.py | 4 ++-- 9 files changed, 14 insertions(+), 14 deletions(-) diff --git a/pywb/cdx/cdxsource.py b/pywb/cdx/cdxsource.py index ac0eaf74..e3174ab1 100644 --- a/pywb/cdx/cdxsource.py +++ b/pywb/cdx/cdxsource.py @@ -30,7 +30,7 @@ class CDXFile(CDXSource): def load_cdx(self, query): def do_open(): try: - source = open(self.filename) + source = open(self.filename, 'rb') gen = iter_range(source, query.key, query.end_key) for line in gen: yield line diff --git a/pywb/cdx/test/test_redis_source.py b/pywb/cdx/test/test_redis_source.py index 9f5daa8d..a52411dd 100644 --- a/pywb/cdx/test/test_redis_source.py +++ b/pywb/cdx/test/test_redis_source.py @@ -26,7 +26,7 @@ test_cdx_dir = get_test_dir() + 'cdx/' def load_cdx_into_redis(source, filename, key=None): # load a cdx into mock redis - with open(test_cdx_dir + filename) as fh: + with open(test_cdx_dir + filename, 'rb') as fh: for line in fh: zadd_cdx(source, line, key) diff --git a/pywb/cdx/zipnum.py b/pywb/cdx/zipnum.py index 071319a5..87ec1340 100644 --- a/pywb/cdx/zipnum.py +++ b/pywb/cdx/zipnum.py @@ -84,7 +84,7 @@ class ZipNumCluster(CDXSource): self.loc_mtime = new_mtime logging.debug('Loading loc from: ' + self.loc_filename) - with open(self.loc_filename) as fh: + with open(self.loc_filename, 'rb') as fh: for line in fh: parts = line.rstrip().split('\t') self.loc_map[parts[0]] = parts[1:] @@ -112,7 +112,7 @@ class ZipNumCluster(CDXSource): def load_cdx(self, query): self.load_loc() - reader = open(self.summary) + reader = open(self.summary, 'rb') idx_iter = iter_range(reader, query.key, diff --git a/pywb/framework/proxy.py b/pywb/framework/proxy.py index 57a081e8..f8bab933 100644 --- a/pywb/framework/proxy.py +++ b/pywb/framework/proxy.py @@ -334,7 +334,7 @@ class ProxyRouter(object): return None buff = '' - with open(self.ca.ca_file) as fh: + with open(self.ca.ca_file, 'rb') as fh: buff = fh.read() content_type = 'application/x-x509-ca-cert' diff --git a/pywb/utils/test/test_binsearch.py b/pywb/utils/test/test_binsearch.py index c599377e..7fae02ff 100644 --- a/pywb/utils/test/test_binsearch.py +++ b/pywb/utils/test/test_binsearch.py @@ -66,12 +66,12 @@ from pywb import get_test_dir test_cdx_dir = get_test_dir() + 'cdx/' def print_binsearch_results(key, iter_func): - with open(test_cdx_dir + 'iana.cdx') as cdx: + with open(test_cdx_dir + 'iana.cdx', 'rb') as cdx: for line in iter_func(cdx, key): print line def print_binsearch_results_range(key, end_key, iter_func, prev_size=0): - with open(test_cdx_dir + 'iana.cdx') as cdx: + with open(test_cdx_dir + 'iana.cdx', 'rb') as cdx: for line in iter_func(cdx, key, end_key, prev_size=prev_size): print line diff --git a/pywb/utils/test/test_bufferedreaders.py b/pywb/utils/test/test_bufferedreaders.py index 0a249981..cd5f3787 100644 --- a/pywb/utils/test/test_bufferedreaders.py +++ b/pywb/utils/test/test_bufferedreaders.py @@ -3,11 +3,11 @@ r""" #================================================================= # DecompressingBufferedReader readline() ->>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU')).readline() +>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline() ' CDX N b a m s k r M S V g\n' # detect not compressed ->>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU'), decomp_type = 'gzip').readline() +>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline() ' CDX N b a m s k r M S V g\n' # decompress with on the fly compression, default gzip compression diff --git a/pywb/warc/cdxindexer.py b/pywb/warc/cdxindexer.py index aa432f38..acd492f9 100644 --- a/pywb/warc/cdxindexer.py +++ b/pywb/warc/cdxindexer.py @@ -115,7 +115,7 @@ def write_multi_cdx_index(output, inputs, **options): outpath = cdx_filename(filename) outpath = os.path.join(output, outpath) - with open(outpath, 'w') as outfile: + with open(outpath, 'wb') as outfile: with open(fullpath, 'rb') as infile: write_cdx_index(outfile, infile, filename, **options) @@ -124,7 +124,7 @@ def write_multi_cdx_index(output, inputs, **options): if output == '-': outfile = sys.stdout else: - outfile = open(output, 'w') + outfile = open(output, 'wb') if options.get('sort'): writer_cls = SortedCDXWriter diff --git a/pywb/warc/pathresolvers.py b/pywb/warc/pathresolvers.py index 2d1f7439..6e710533 100644 --- a/pywb/warc/pathresolvers.py +++ b/pywb/warc/pathresolvers.py @@ -57,7 +57,7 @@ class RedisResolver: class PathIndexResolver: def __init__(self, pathindex_file): self.pathindex_file = pathindex_file - self.reader = open(pathindex_file) + self.reader = open(pathindex_file, 'rb') def __call__(self, filename): result = iter_exact(self.reader, filename, '\t') diff --git a/pywb/warc/test/test_indexing.py b/pywb/warc/test/test_indexing.py index 2e704530..7e185a8c 100644 --- a/pywb/warc/test/test_indexing.py +++ b/pywb/warc/test/test_indexing.py @@ -160,7 +160,7 @@ TEST_CDX_DIR = get_test_dir() + 'cdx/' TEST_WARC_DIR = get_test_dir() + 'warcs/' def read_fully(cdx): - with open(TEST_CDX_DIR + cdx, 'rU') as fh: + with open(TEST_CDX_DIR + cdx, 'rb') as fh: curr = BytesIO() while True: b = fh.read() @@ -213,7 +213,7 @@ def cli_lines_with_dir(input_): print filename - with open(os.path.join(tmp_dir, filename), 'rU') as fh: + with open(os.path.join(tmp_dir, filename), 'rb') as fh: lines = fh.read(8192).rstrip().split('\n') finally: From 438f9c3e5c3116e13933eed44642e1046e256d34 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 11 Jan 2015 19:09:01 -0800 Subject: [PATCH 12/17] git: add gitattributes to ensure consistent line endings for warc, arc and cdx --- .gitattributes | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..322dea0e --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +*.arc binary +*.warc binary +*.cdx binary +*.gz binary From 7610d9deb790844193f81e75b62ff9ce5848dff9 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 11 Jan 2015 23:02:48 -0800 Subject: [PATCH 13/17] views: cleanup view filters, remove obsolete, add tests for format_ts and is_wb_handler --- pywb/webapp/test/test_view_filters.py | 20 ++++++++++++++++++++ pywb/webapp/views.py | 14 ++------------ 2 files changed, 22 insertions(+), 12 deletions(-) create mode 100644 pywb/webapp/test/test_view_filters.py diff --git a/pywb/webapp/test/test_view_filters.py b/pywb/webapp/test/test_view_filters.py new file mode 100644 index 00000000..1df32d5f --- /dev/null +++ b/pywb/webapp/test/test_view_filters.py @@ -0,0 +1,20 @@ +""" +>>> format_ts('201412261010') +'Fri, Dec 26 2014 10:10:59' + +>>> format_ts('201412261010', '%s') +1419617459000 + +>>> is_wb_handler(DebugEchoHandler()) +False + + +""" + +from pywb.webapp.views import format_ts, is_wb_handler +from pywb.webapp.handlers import DebugEchoHandler + + +if __name__ == "__main__": + import doctest + doctest.testmod() diff --git a/pywb/webapp/views.py b/pywb/webapp/views.py index 4d9cdd6d..1f3a7e4b 100644 --- a/pywb/webapp/views.py +++ b/pywb/webapp/views.py @@ -23,11 +23,7 @@ class template_filter(object): Otherwise, the func name is the filter name """ def __init__(self, param=None): - if hasattr(param, '__call__'): - self.name = None - self.__call__(param) - else: - self.name = param + self.name = param def __call__(self, func): name = self.name @@ -40,7 +36,7 @@ class template_filter(object): #================================================================= # Filters -@template_filter +@template_filter() def format_ts(value, format_='%a, %b %d %Y %H:%M:%S'): value = timestamp_to_datetime(value) if format_ == '%s': @@ -55,17 +51,11 @@ def get_urlsplit(url): return split -@template_filter() -def request_hostname(env): - return env.get('HTTP_HOST', 'localhost') - - @template_filter() def is_wb_handler(obj): if not hasattr(obj, 'handler'): return False - #return isinstance(obj.handler, WBHandler) return obj.handler.__class__.__name__ == "WBHandler" From 8449647c5f1bb61476a9101b9c195ca2fb08ddce Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 11 Jan 2015 23:53:34 -0800 Subject: [PATCH 14/17] wbexception: remove unused status in WbException, set default error for any uncaught exception to 500, instead of 400 --- pywb/framework/test/test_wsgi_wrapper.py | 6 +++--- pywb/framework/wsgi_wrappers.py | 2 +- pywb/utils/wbexception.py | 5 +++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pywb/framework/test/test_wsgi_wrapper.py b/pywb/framework/test/test_wsgi_wrapper.py index e46cded5..39da76c2 100644 --- a/pywb/framework/test/test_wsgi_wrapper.py +++ b/pywb/framework/test/test_wsgi_wrapper.py @@ -14,7 +14,7 @@ class TestOkApp: class TestErrApp: def __call__(self, env): - raise Exception('Test Error') + raise Exception('Test Unexpected Error') class TestCustomErrApp: def __call__(self, env): @@ -41,8 +41,8 @@ def test_err_app(): testapp = webtest.TestApp(the_app) resp = testapp.get('/abc', expect_errors=True) - assert resp.status_int == 400 - assert '400 Bad Request Error: Test Error' in resp.body + assert resp.status_int == 500 + assert '500 Internal Server Error Error: Test Unexpected Error' in resp.body def test_custom_err_app(): the_app = init_app(initer(TestCustomErrApp), load_yaml=False) diff --git a/pywb/framework/wsgi_wrappers.py b/pywb/framework/wsgi_wrappers.py index cbd3825d..fdc96fb7 100644 --- a/pywb/framework/wsgi_wrappers.py +++ b/pywb/framework/wsgi_wrappers.py @@ -118,7 +118,7 @@ class WSGIApp(object): if hasattr(exc, 'status'): status = exc.status() else: - status = '400 Bad Request' + status = '500 Internal Server Error' if hasattr(exc, 'url'): err_url = exc.url diff --git a/pywb/utils/wbexception.py b/pywb/utils/wbexception.py index b94a6313..24e34f73 100644 --- a/pywb/utils/wbexception.py +++ b/pywb/utils/wbexception.py @@ -6,8 +6,9 @@ class WbException(Exception): Exception.__init__(self, msg) self.url = url - def status(self): - return '500 Internal Server Error' +# Default Error Code +# def status(self): +# return '500 Internal Server Error' #================================================================= From ac525b09378329b52ab8064d5dbd0ad8dd156863 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 11 Jan 2015 23:54:29 -0800 Subject: [PATCH 15/17] tests: add tests for extract_post_query() add test for HttpsUrlRewriter, remove unnecessary check in bufferedreader --- pywb/rewrite/test/test_url_rewriter.py | 9 +++++++-- pywb/utils/bufferedreaders.py | 3 --- pywb/utils/loaders.py | 8 ++++---- pywb/utils/test/test_loaders.py | 28 +++++++++++++++++++++++++- 4 files changed, 38 insertions(+), 10 deletions(-) diff --git a/pywb/rewrite/test/test_url_rewriter.py b/pywb/rewrite/test/test_url_rewriter.py index 3d324069..a78a5529 100644 --- a/pywb/rewrite/test/test_url_rewriter.py +++ b/pywb/rewrite/test/test_url_rewriter.py @@ -103,12 +103,17 @@ 'http://example.com/file.html?param=https://example.com/filename.html&other=value&a=b¶m2=http://test.example.com' # HttpsUrlRewriter tests ->>> HttpsUrlRewriter('http://example.com/', None).rewrite('https://example.com/abc') +>>> httpsrewriter = HttpsUrlRewriter('http://example.com/', None) +>>> httpsrewriter.rewrite('https://example.com/abc') 'http://example.com/abc' ->>> HttpsUrlRewriter('http://example.com/', None).rewrite('http://example.com/abc') +>>> httpsrewriter.rewrite('http://example.com/abc') 'http://example.com/abc' +# rebase is identity +>>> httpsrewriter.rebase_rewriter('https://example.com/') == httpsrewriter +True + """ diff --git a/pywb/utils/bufferedreaders.py b/pywb/utils/bufferedreaders.py index 7e461dee..7ca89780 100644 --- a/pywb/utils/bufferedreaders.py +++ b/pywb/utils/bufferedreaders.py @@ -46,9 +46,6 @@ class BufferedReader(object): self.buff_size = 0 def set_decomp(self, decomp_type): - if self.num_read > 0: - raise Exception('Attempting to change decompression mid-stream') - self._init_decomp(decomp_type) def _init_decomp(self, decomp_type): diff --git a/pywb/utils/loaders.py b/pywb/utils/loaders.py index 34eca14b..affae74f 100644 --- a/pywb/utils/loaders.py +++ b/pywb/utils/loaders.py @@ -49,12 +49,12 @@ def extract_post_query(method, mime, length, stream): not mime.lower().startswith('application/x-www-form-urlencoded'))): return None - if not length or length == '0': - return None - try: length = int(length) - except ValueError: + except (ValueError, TypeError): + return None + + if length <= 0: return None #todo: encoding issues? diff --git a/pywb/utils/test/test_loaders.py b/pywb/utils/test/test_loaders.py index 312af81b..e43cdc41 100644 --- a/pywb/utils/test/test_loaders.py +++ b/pywb/utils/test/test_loaders.py @@ -56,6 +56,32 @@ True >>> extract_client_cookie(dict(HTTP_COOKIE='x'), 'x') >>> extract_client_cookie({}, 'y') + + +# extract_post_query tests + +# correct POST data +>>> post_data = 'foo=bar&dir=%2Fbaz' +>>> extract_post_query('POST', 'application/x-www-form-urlencoded', len(post_data), BytesIO(post_data)) +'foo=bar&dir=/baz' + +# unsupported method +>>> extract_post_query('PUT', 'application/x-www-form-urlencoded', len(post_data), BytesIO(post_data)) + +# unsupported type +>>> extract_post_query('POST', 'text/plain', len(post_data), BytesIO(post_data)) + +# invalid length +>>> extract_post_query('POST', 'application/x-www-form-urlencoded', 'abc', BytesIO(post_data)) +>>> extract_post_query('POST', 'application/x-www-form-urlencoded', 0, BytesIO(post_data)) + +# length too short +>>> extract_post_query('POST', 'application/x-www-form-urlencoded', len(post_data) - 4, BytesIO(post_data)) +'foo=bar&dir=%2' + +# length too long +>>> extract_post_query('POST', 'application/x-www-form-urlencoded', len(post_data) + 4, BytesIO(post_data)) +'foo=bar&dir=/baz' """ @@ -64,7 +90,7 @@ import re import os from io import BytesIO from pywb.utils.loaders import BlockLoader, HMACCookieMaker, to_file_url -from pywb.utils.loaders import LimitReader, extract_client_cookie +from pywb.utils.loaders import LimitReader, extract_client_cookie, extract_post_query from pywb import get_test_dir From 7ece05d02295066f5f2dc23f13a67ec01991d851 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 12 Jan 2015 00:09:02 -0800 Subject: [PATCH 16/17] bump version to 0.7.5 update CHANGES fix .gitattributes to use standard flags --- .gitattributes | 8 ++++---- CHANGES.rst | 9 ++++++++- README.rst | 4 ++-- setup.py | 2 +- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/.gitattributes b/.gitattributes index 322dea0e..8e74ef52 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,4 @@ -*.arc binary -*.warc binary -*.cdx binary -*.gz binary +*.arc -text +*.warc -text +*.cdx -text +*.gz -text diff --git a/CHANGES.rst b/CHANGES.rst index 55e328b2..7d7e6d7c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,13 @@ -pywb 0.7.3 changelist +pywb 0.7.5 changelist ~~~~~~~~~~~~~~~~~~~~~ +* Cross platform fixes to support Windows -- all tests pass on Linux, OS X and Windows now. Improved cross-platform support includes: + - read all files as binary to avoid line ending issues + - properly convert url <-> file + - avoid platform dependent apis + +* Change any unhandled exceptions to result in a 500 error, instead of 400. + * More compresensive client side ``src`` attribute rewriting (via wombat.js), additional server-side HTML tag rewriting. diff --git a/README.rst b/README.rst index cf81f3f8..17162d5a 100644 --- a/README.rst +++ b/README.rst @@ -1,4 +1,4 @@ -PyWb 0.7.3 +PyWb 0.7.5 ========== .. image:: https://travis-ci.org/ikreymer/pywb.png?branch=develop @@ -13,7 +13,7 @@ pywb is a python implementation of web archival replay tools, sometimes also kno pywb allows high-quality replay (browsing) of archived web data stored in standardized `ARC `_ and `WARC `_. The replay system is designed to accurately replay complex dynamic sites, including video and audio content. -pywb can be used as a traditional web application or an HTTP or HTTPS proxy server. +pywb can be used as a traditional web application or an HTTP or HTTPS proxy server, and has been tested on Linux, OS X and Windows platforms. pywb is also fully compliant with the `Memento `_ protocol (`RFC-7089 `_). diff --git a/setup.py b/setup.py index 8d8b0ab8..e53c340e 100755 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ class PyTest(TestCommand): setup( name='pywb', - version='0.7.3', + version='0.7.5', url='https://github.com/ikreymer/pywb', author='Ilya Kreymer', author_email='ikreymer@gmail.com', From 43805c67efabed117bb97be8ddf045be1d287a48 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 12 Jan 2015 00:28:06 -0800 Subject: [PATCH 17/17] view: fix format_ts, use existing utc timestamp_to_sec conversion for %s --- pywb/webapp/test/test_view_filters.py | 8 ++++---- pywb/webapp/views.py | 7 +++---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/pywb/webapp/test/test_view_filters.py b/pywb/webapp/test/test_view_filters.py index 1df32d5f..f82ea6e7 100644 --- a/pywb/webapp/test/test_view_filters.py +++ b/pywb/webapp/test/test_view_filters.py @@ -1,9 +1,9 @@ """ ->>> format_ts('201412261010') -'Fri, Dec 26 2014 10:10:59' +>>> format_ts('20141226101000') +'Fri, Dec 26 2014 10:10:00' ->>> format_ts('201412261010', '%s') -1419617459000 +>>> format_ts('20141226101000', '%s') +1419588600 >>> is_wb_handler(DebugEchoHandler()) False diff --git a/pywb/webapp/views.py b/pywb/webapp/views.py index 1f3a7e4b..23c528e2 100644 --- a/pywb/webapp/views.py +++ b/pywb/webapp/views.py @@ -1,10 +1,9 @@ -from pywb.utils.timeutils import timestamp_to_datetime +from pywb.utils.timeutils import timestamp_to_datetime, timestamp_to_sec from pywb.framework.wbrequestresponse import WbResponse from pywb.framework.memento import make_timemap, LINK_FORMAT import urlparse import logging -import time from os import path from itertools import imap @@ -38,10 +37,10 @@ class template_filter(object): # Filters @template_filter() def format_ts(value, format_='%a, %b %d %Y %H:%M:%S'): - value = timestamp_to_datetime(value) if format_ == '%s': - return int(time.mktime(value.timetuple()) * 1000) + return timestamp_to_sec(value) else: + value = timestamp_to_datetime(value) return value.strftime(format_)