diff --git a/pywb/warc/cdxindexer.py b/pywb/warc/cdxindexer.py index 60dd5ad5..aa432f38 100644 --- a/pywb/warc/cdxindexer.py +++ b/pywb/warc/cdxindexer.py @@ -116,7 +116,7 @@ def write_multi_cdx_index(output, inputs, **options): outpath = os.path.join(output, outpath) with open(outpath, 'w') as outfile: - with open(fullpath, 'r') as infile: + with open(fullpath, 'rb') as infile: write_cdx_index(outfile, infile, filename, **options) # write to one cdx file @@ -133,7 +133,7 @@ def write_multi_cdx_index(output, inputs, **options): with writer_cls(outfile, options.get('cdx09')) as writer: for fullpath, filename in iter_file_or_dir(inputs): - with open(fullpath, 'r') as infile: + with open(fullpath, 'rb') as infile: entry_iter = create_index_iter(infile, **options) for entry in entry_iter: diff --git a/pywb/warc/pathresolvers.py b/pywb/warc/pathresolvers.py index 469fbfb1..2d1f7439 100644 --- a/pywb/warc/pathresolvers.py +++ b/pywb/warc/pathresolvers.py @@ -3,6 +3,7 @@ import redis from pywb.utils.binsearch import iter_exact import urlparse +import urllib import os import logging @@ -92,6 +93,7 @@ def make_best_resolver(param): if url_parts.scheme == 'file': path = url_parts.path + path = urllib.url2pathname(path) if os.path.isfile(path): logging.debug('Adding Path Index: ' + path) diff --git a/pywb/warc/test/test_pathresolvers.py b/pywb/warc/test/test_pathresolvers.py index 923c0ce1..f60eafaa 100644 --- a/pywb/warc/test/test_pathresolvers.py +++ b/pywb/warc/test/test_pathresolvers.py @@ -33,13 +33,13 @@ PrefixResolver('http://myhost.example.com/warcs/', contains = '/') RedisResolver('redis://myhost.example.com:1234/1') # a file ->>> r = make_best_resolver('file://' + os.path.realpath(__file__)) +>>> r = make_best_resolver(to_local_url(os.path.realpath(__file__))) >>> r.__class__.__name__ 'PathIndexResolver' # a dir >>> path = os.path.realpath(__file__) ->>> r = make_best_resolver('file://' + os.path.dirname(path)) +>>> r = make_best_resolver(to_local_url(os.path.dirname(path))) >>> r.__class__.__name__ 'PrefixResolver' @@ -56,6 +56,7 @@ from pywb.warc.pathresolvers import PrefixResolver, PathIndexResolver, RedisReso from pywb.warc.pathresolvers import make_best_resolver, make_best_resolvers import os +from urllib import pathname2url from fakeredis import FakeStrictRedis from mock import patch @@ -68,6 +69,11 @@ def init_redis_resolver(): def hset_path(filename, path): redis_resolver.redis.hset(redis_resolver.key_prefix + filename, 'path', path) +def to_local_url(filename): + filename = os.path.abspath(filename) + res = 'file:' + pathname2url(filename) + #print(res) + return res redis_resolver = init_redis_resolver() diff --git a/pywb/webapp/views.py b/pywb/webapp/views.py index 11e78618..4d9cdd6d 100644 --- a/pywb/webapp/views.py +++ b/pywb/webapp/views.py @@ -4,6 +4,7 @@ from pywb.framework.memento import make_timemap, LINK_FORMAT import urlparse import logging +import time from os import path from itertools import imap @@ -42,7 +43,10 @@ class template_filter(object): @template_filter def format_ts(value, format_='%a, %b %d %Y %H:%M:%S'): value = timestamp_to_datetime(value) - return value.strftime(format_) + if format_ == '%s': + return int(time.mktime(value.timetuple()) * 1000) + else: + return value.strftime(format_) @template_filter('urlsplit')