1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

cdxindexing: encode unicode filenames using system encoding,

add test for unicode filenames
This commit is contained in:
Ilya Kreymer 2014-07-23 15:27:01 -07:00
parent 4d31c17d4c
commit e513b3755c
2 changed files with 5 additions and 3 deletions

View File

@ -109,7 +109,6 @@ def cdx_filename(filename):
#=================================================================
def write_multi_cdx_index(output, inputs, **options):
# write one cdx per dir
if output != '-' and os.path.isdir(output):
for fullpath, filename in iter_file_or_dir(inputs):
@ -145,6 +144,9 @@ def write_multi_cdx_index(output, inputs, **options):
def write_cdx_index(outfile, infile, filename, **options):
writer_cls = options.get('writer_cls')
if type(filename) is unicode:
filename = filename.encode(sys.getfilesystemencoding())
if writer_cls:
pass
elif options.get('sort'):

View File

@ -130,8 +130,8 @@ com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 20
org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz
4
# test writing to temp dir
>>> cli_lines_with_dir(TEST_WARC_DIR + 'example.warc.gz')
# test writing to temp dir, also use unicode filename
>>> cli_lines_with_dir(unicode(TEST_WARC_DIR + 'example.warc.gz'))
example.cdx
com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1043 333 example.warc.gz
org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz