1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

tests: add tests for recursive cdx indexing, #64

cross-platform: store rel filename path as '/', but convert to os.path.sep
when resolving to full path as prefix
This commit is contained in:
Ilya Kreymer 2015-02-20 13:56:35 -08:00
parent 8d52be4c44
commit c0ff596c68
3 changed files with 9 additions and 0 deletions

View File

@ -98,6 +98,7 @@ def iter_file_or_dir(inputs, recursive=True):
if filename.endswith(ALLOWED_EXT):
full_path = os.path.join(root, filename)
rel_path = os.path.relpath(full_path, input_)
rel_path = rel_path.replace(os.path.sep, '/')
yield full_path, rel_path

View File

@ -28,6 +28,8 @@ class PrefixResolver:
self.contains = contains if contains else ''
def __call__(self, filename):
# use os path seperator
filename = filename.replace('/', os.path.sep)
return [self.prefix + filename] if (self.contains in filename) else []
def __repr__(self):

View File

@ -116,6 +116,12 @@ com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYA
org,iana,example)/ 20130702195402 http://example.iana.org/ text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1001 353 example-url-agnostic-orig.warc.gz
Total: 206
# test sort, multiple inputs, recursive, from base test dir
>>> cli_lines(['--sort', '-r', '-', get_test_dir()])
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 warcs/example-url-agnostic-revisit.warc.gz
org,iana,example)/ 20130702195402 http://example.iana.org/ text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1001 353 warcs/example-url-agnostic-orig.warc.gz
Total: 206
# test sort, multiple inputs, all records + post query
>>> cli_lines(['--sort', '-a', '-p', '-9', TEST_WARC_DIR])
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - 355 example-url-agnostic-revisit.warc.gz