mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
cdx-indexer: minor cleanup, add custom writer override to
write_multi_cdx_index
This commit is contained in:
parent
ef98716bd8
commit
40fba3c27b
@ -5,6 +5,8 @@ pywb 0.7.7 changelist
|
|||||||
|
|
||||||
* rules: fix YT rewrite rule, add rule for wikimedia
|
* rules: fix YT rewrite rule, add rule for wikimedia
|
||||||
|
|
||||||
|
* cdx-indexer: minor cleanup, add support for custom writer for batched cdx (write_multi_cdx_index)
|
||||||
|
|
||||||
|
|
||||||
pywb 0.7.6 changelist
|
pywb 0.7.6 changelist
|
||||||
~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
@ -107,6 +107,19 @@ def cdx_filename(filename):
|
|||||||
return remove_ext(filename) + '.cdx'
|
return remove_ext(filename) + '.cdx'
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
def get_cdx_writer_cls(options):
|
||||||
|
writer_cls = options.get('writer_cls')
|
||||||
|
|
||||||
|
if not writer_cls:
|
||||||
|
if options.get('sort'):
|
||||||
|
writer_cls = SortedCDXWriter
|
||||||
|
else:
|
||||||
|
writer_cls = CDXWriter
|
||||||
|
|
||||||
|
return writer_cls
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def write_multi_cdx_index(output, inputs, **options):
|
def write_multi_cdx_index(output, inputs, **options):
|
||||||
# write one cdx per dir
|
# write one cdx per dir
|
||||||
@ -117,7 +130,7 @@ def write_multi_cdx_index(output, inputs, **options):
|
|||||||
|
|
||||||
with open(outpath, 'wb') as outfile:
|
with open(outpath, 'wb') as outfile:
|
||||||
with open(fullpath, 'rb') as infile:
|
with open(fullpath, 'rb') as infile:
|
||||||
write_cdx_index(outfile, infile, filename, **options)
|
return write_cdx_index(outfile, infile, filename, **options)
|
||||||
|
|
||||||
# write to one cdx file
|
# write to one cdx file
|
||||||
else:
|
else:
|
||||||
@ -126,10 +139,7 @@ def write_multi_cdx_index(output, inputs, **options):
|
|||||||
else:
|
else:
|
||||||
outfile = open(output, 'wb')
|
outfile = open(output, 'wb')
|
||||||
|
|
||||||
if options.get('sort'):
|
writer_cls = get_cdx_writer_cls(options)
|
||||||
writer_cls = SortedCDXWriter
|
|
||||||
else:
|
|
||||||
writer_cls = CDXWriter
|
|
||||||
|
|
||||||
with writer_cls(outfile, options.get('cdx09')) as writer:
|
with writer_cls(outfile, options.get('cdx09')) as writer:
|
||||||
for fullpath, filename in iter_file_or_dir(inputs):
|
for fullpath, filename in iter_file_or_dir(inputs):
|
||||||
@ -139,20 +149,15 @@ def write_multi_cdx_index(output, inputs, **options):
|
|||||||
for entry in entry_iter:
|
for entry in entry_iter:
|
||||||
writer.write(entry, filename)
|
writer.write(entry, filename)
|
||||||
|
|
||||||
|
return writer
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def write_cdx_index(outfile, infile, filename, **options):
|
def write_cdx_index(outfile, infile, filename, **options):
|
||||||
writer_cls = options.get('writer_cls')
|
|
||||||
|
|
||||||
if type(filename) is unicode:
|
if type(filename) is unicode:
|
||||||
filename = filename.encode(sys.getfilesystemencoding())
|
filename = filename.encode(sys.getfilesystemencoding())
|
||||||
|
|
||||||
if writer_cls:
|
writer_cls = get_cdx_writer_cls(options)
|
||||||
pass
|
|
||||||
elif options.get('sort'):
|
|
||||||
writer_cls = SortedCDXWriter
|
|
||||||
else:
|
|
||||||
writer_cls = CDXWriter
|
|
||||||
|
|
||||||
with writer_cls(outfile, options.get('cdx09')) as writer:
|
with writer_cls(outfile, options.get('cdx09')) as writer:
|
||||||
entry_iter = create_index_iter(infile, **options)
|
entry_iter = create_index_iter(infile, **options)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user