diff --git a/pywb/warc/cdxindexer.py b/pywb/warc/cdxindexer.py index 83ffef8d..58bc43a7 100644 --- a/pywb/warc/cdxindexer.py +++ b/pywb/warc/cdxindexer.py @@ -98,6 +98,7 @@ def iter_file_or_dir(inputs, recursive=True): if filename.endswith(ALLOWED_EXT): full_path = os.path.join(root, filename) rel_path = os.path.relpath(full_path, input_) + rel_path = rel_path.replace(os.path.sep, '/') yield full_path, rel_path diff --git a/pywb/warc/pathresolvers.py b/pywb/warc/pathresolvers.py index 6e710533..1d7bf71b 100644 --- a/pywb/warc/pathresolvers.py +++ b/pywb/warc/pathresolvers.py @@ -28,6 +28,8 @@ class PrefixResolver: self.contains = contains if contains else '' def __call__(self, filename): + # use os path seperator + filename = filename.replace('/', os.path.sep) return [self.prefix + filename] if (self.contains in filename) else [] def __repr__(self): diff --git a/pywb/warc/test/test_indexing.py b/pywb/warc/test/test_indexing.py index 7e185a8c..e64595a3 100644 --- a/pywb/warc/test/test_indexing.py +++ b/pywb/warc/test/test_indexing.py @@ -116,6 +116,12 @@ com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYA org,iana,example)/ 20130702195402 http://example.iana.org/ text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1001 353 example-url-agnostic-orig.warc.gz Total: 206 +# test sort, multiple inputs, recursive, from base test dir +>>> cli_lines(['--sort', '-r', '-', get_test_dir()]) +com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 warcs/example-url-agnostic-revisit.warc.gz +org,iana,example)/ 20130702195402 http://example.iana.org/ text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1001 353 warcs/example-url-agnostic-orig.warc.gz +Total: 206 + # test sort, multiple inputs, all records + post query >>> cli_lines(['--sort', '-a', '-p', '-9', TEST_WARC_DIR]) com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - 355 example-url-agnostic-revisit.warc.gz