diff --git a/pywb/warc/cdxindexer.py b/pywb/warc/cdxindexer.py index 585b5711..60dd5ad5 100644 --- a/pywb/warc/cdxindexer.py +++ b/pywb/warc/cdxindexer.py @@ -109,7 +109,6 @@ def cdx_filename(filename): #================================================================= def write_multi_cdx_index(output, inputs, **options): - # write one cdx per dir if output != '-' and os.path.isdir(output): for fullpath, filename in iter_file_or_dir(inputs): @@ -145,6 +144,9 @@ def write_multi_cdx_index(output, inputs, **options): def write_cdx_index(outfile, infile, filename, **options): writer_cls = options.get('writer_cls') + if type(filename) is unicode: + filename = filename.encode(sys.getfilesystemencoding()) + if writer_cls: pass elif options.get('sort'): diff --git a/pywb/warc/test/test_indexing.py b/pywb/warc/test/test_indexing.py index 88a3d3ff..b90e9d65 100644 --- a/pywb/warc/test/test_indexing.py +++ b/pywb/warc/test/test_indexing.py @@ -130,8 +130,8 @@ com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 20 org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz 4 -# test writing to temp dir ->>> cli_lines_with_dir(TEST_WARC_DIR + 'example.warc.gz') +# test writing to temp dir, also use unicode filename +>>> cli_lines_with_dir(unicode(TEST_WARC_DIR + 'example.warc.gz')) example.cdx com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1043 333 example.warc.gz org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz