1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

fixes for windows: convert url to file with pathname2url, use 'b' for

reading warcs, don't use %s for timestamp conversion (not portable)
(#56)
This commit is contained in:
Ilya Kreymer 2015-01-10 20:59:23 -08:00
parent 7f52ecdca9
commit ba853a4eae
4 changed files with 17 additions and 5 deletions

View File

@ -116,7 +116,7 @@ def write_multi_cdx_index(output, inputs, **options):
outpath = os.path.join(output, outpath) outpath = os.path.join(output, outpath)
with open(outpath, 'w') as outfile: with open(outpath, 'w') as outfile:
with open(fullpath, 'r') as infile: with open(fullpath, 'rb') as infile:
write_cdx_index(outfile, infile, filename, **options) write_cdx_index(outfile, infile, filename, **options)
# write to one cdx file # write to one cdx file
@ -133,7 +133,7 @@ def write_multi_cdx_index(output, inputs, **options):
with writer_cls(outfile, options.get('cdx09')) as writer: with writer_cls(outfile, options.get('cdx09')) as writer:
for fullpath, filename in iter_file_or_dir(inputs): for fullpath, filename in iter_file_or_dir(inputs):
with open(fullpath, 'r') as infile: with open(fullpath, 'rb') as infile:
entry_iter = create_index_iter(infile, **options) entry_iter = create_index_iter(infile, **options)
for entry in entry_iter: for entry in entry_iter:

View File

@ -3,6 +3,7 @@ import redis
from pywb.utils.binsearch import iter_exact from pywb.utils.binsearch import iter_exact
import urlparse import urlparse
import urllib
import os import os
import logging import logging
@ -92,6 +93,7 @@ def make_best_resolver(param):
if url_parts.scheme == 'file': if url_parts.scheme == 'file':
path = url_parts.path path = url_parts.path
path = urllib.url2pathname(path)
if os.path.isfile(path): if os.path.isfile(path):
logging.debug('Adding Path Index: ' + path) logging.debug('Adding Path Index: ' + path)

View File

@ -33,13 +33,13 @@ PrefixResolver('http://myhost.example.com/warcs/', contains = '/')
RedisResolver('redis://myhost.example.com:1234/1') RedisResolver('redis://myhost.example.com:1234/1')
# a file # a file
>>> r = make_best_resolver('file://' + os.path.realpath(__file__)) >>> r = make_best_resolver(to_local_url(os.path.realpath(__file__)))
>>> r.__class__.__name__ >>> r.__class__.__name__
'PathIndexResolver' 'PathIndexResolver'
# a dir # a dir
>>> path = os.path.realpath(__file__) >>> path = os.path.realpath(__file__)
>>> r = make_best_resolver('file://' + os.path.dirname(path)) >>> r = make_best_resolver(to_local_url(os.path.dirname(path)))
>>> r.__class__.__name__ >>> r.__class__.__name__
'PrefixResolver' 'PrefixResolver'
@ -56,6 +56,7 @@ from pywb.warc.pathresolvers import PrefixResolver, PathIndexResolver, RedisReso
from pywb.warc.pathresolvers import make_best_resolver, make_best_resolvers from pywb.warc.pathresolvers import make_best_resolver, make_best_resolvers
import os import os
from urllib import pathname2url
from fakeredis import FakeStrictRedis from fakeredis import FakeStrictRedis
from mock import patch from mock import patch
@ -68,6 +69,11 @@ def init_redis_resolver():
def hset_path(filename, path): def hset_path(filename, path):
redis_resolver.redis.hset(redis_resolver.key_prefix + filename, 'path', path) redis_resolver.redis.hset(redis_resolver.key_prefix + filename, 'path', path)
def to_local_url(filename):
filename = os.path.abspath(filename)
res = 'file:' + pathname2url(filename)
#print(res)
return res
redis_resolver = init_redis_resolver() redis_resolver = init_redis_resolver()

View File

@ -4,6 +4,7 @@ from pywb.framework.memento import make_timemap, LINK_FORMAT
import urlparse import urlparse
import logging import logging
import time
from os import path from os import path
from itertools import imap from itertools import imap
@ -42,7 +43,10 @@ class template_filter(object):
@template_filter @template_filter
def format_ts(value, format_='%a, %b %d %Y %H:%M:%S'): def format_ts(value, format_='%a, %b %d %Y %H:%M:%S'):
value = timestamp_to_datetime(value) value = timestamp_to_datetime(value)
return value.strftime(format_) if format_ == '%s':
return int(time.mktime(value.timetuple()) * 1000)
else:
return value.strftime(format_)
@template_filter('urlsplit') @template_filter('urlsplit')