mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
cdxindexing: encode unicode filenames using system encoding,
add test for unicode filenames
This commit is contained in:
parent
4d31c17d4c
commit
e513b3755c
@ -109,7 +109,6 @@ def cdx_filename(filename):
|
||||
|
||||
#=================================================================
|
||||
def write_multi_cdx_index(output, inputs, **options):
|
||||
|
||||
# write one cdx per dir
|
||||
if output != '-' and os.path.isdir(output):
|
||||
for fullpath, filename in iter_file_or_dir(inputs):
|
||||
@ -145,6 +144,9 @@ def write_multi_cdx_index(output, inputs, **options):
|
||||
def write_cdx_index(outfile, infile, filename, **options):
|
||||
writer_cls = options.get('writer_cls')
|
||||
|
||||
if type(filename) is unicode:
|
||||
filename = filename.encode(sys.getfilesystemencoding())
|
||||
|
||||
if writer_cls:
|
||||
pass
|
||||
elif options.get('sort'):
|
||||
|
@ -130,8 +130,8 @@ com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 20
|
||||
org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz
|
||||
4
|
||||
|
||||
# test writing to temp dir
|
||||
>>> cli_lines_with_dir(TEST_WARC_DIR + 'example.warc.gz')
|
||||
# test writing to temp dir, also use unicode filename
|
||||
>>> cli_lines_with_dir(unicode(TEST_WARC_DIR + 'example.warc.gz'))
|
||||
example.cdx
|
||||
com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1043 333 example.warc.gz
|
||||
org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz
|
||||
|
Loading…
x
Reference in New Issue
Block a user