From 90eee03cdb0fb3938130c2edefc32ae85aa2fef6 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 25 Mar 2015 10:56:53 -0700 Subject: [PATCH] fixes for windows: indexing: ensure '/' always written to cdx autoindex: improved test case, ensure threads exit with join style: fix long lines --- pywb/manager/autoindex.py | 6 ++++-- pywb/manager/manager.py | 10 +++++++--- pywb/warc/cdxindexer.py | 15 +++++++++++---- tests/test_auto_colls.py | 6 +++++- 4 files changed, 27 insertions(+), 10 deletions(-) diff --git a/pywb/manager/autoindex.py b/pywb/manager/autoindex.py index 65003048..901c069e 100644 --- a/pywb/manager/autoindex.py +++ b/pywb/manager/autoindex.py @@ -33,8 +33,10 @@ class CDXAutoIndexer(RegexMatchingEventHandler): while keep_running: time.sleep(sleep_time) except KeyboardInterrupt: # pragma: no cover - observer.stop() - observer.join() + pass + finally: + self.observer.stop() + self.observer.join() #============================================================================= diff --git a/pywb/manager/manager.py b/pywb/manager/manager.py index 04114250..a2b9d9de 100644 --- a/pywb/manager/manager.py +++ b/pywb/manager/manager.py @@ -249,7 +249,8 @@ directory structure expected by pywb 'a collection name: template --{1} {0}') raise IOError(msg.format(template_name, verb)) - full_path = os.path.join(self.templates_dir, os.path.basename(filename)) + full_path = os.path.join(self.templates_dir, + os.path.basename(filename)) except KeyError: try: @@ -278,7 +279,8 @@ directory structure expected by pywb fh.write(data) full_path = os.path.abspath(full_path) - print('Copied default template "{0}" to "{1}"'.format(filename, full_path)) + msg = 'Copied default template "{0}" to "{1}"' + print(msg.format(filename, full_path)) def remove_template(self, template_name, force=False): full_path, filename = self._get_template_path(template_name, 'remove') @@ -330,7 +332,9 @@ directory structure expected by pywb def do_index(warc): if any_coll: - coll_name = warc.split(self.colls_dir + os.path.sep)[-1].split('/')[0] + coll_name = warc.split(self.colls_dir + os.path.sep) + coll_name = coll_name[-1].split(os.path.sep)[0] + if coll_name != self.coll_name: self._set_coll_dirs(coll_name) diff --git a/pywb/warc/cdxindexer.py b/pywb/warc/cdxindexer.py index 4c8aa377..06ef5176 100644 --- a/pywb/warc/cdxindexer.py +++ b/pywb/warc/cdxindexer.py @@ -143,6 +143,14 @@ class SortedCDXWriter(BaseCDXWriter): ALLOWED_EXT = ('.arc', '.arc.gz', '.warc', '.warc.gz') +#================================================================= +def _resolve_rel_path(path, rel_root): + path = os.path.relpath(path, rel_root) + if os.path.sep != '/': #pragma: no cover + path = path.replace(os.path.sep, '/') + return path + + #================================================================= def iter_file_or_dir(inputs, recursive=True, rel_root=None): for input_ in inputs: @@ -150,7 +158,7 @@ def iter_file_or_dir(inputs, recursive=True, rel_root=None): if not rel_root: filename = os.path.basename(input_) else: - filename = os.path.relpath(input_, rel_root) + filename = _resolve_rel_path(input_, rel_root) yield input_, filename @@ -159,7 +167,7 @@ def iter_file_or_dir(inputs, recursive=True, rel_root=None): if filename.endswith(ALLOWED_EXT): full_path = os.path.join(input_, filename) if rel_root: - filename = os.path.relpath(full_path, rel_root) + filename = _resolve_rel_path(full_path, rel_root) yield full_path, filename else: @@ -169,8 +177,7 @@ def iter_file_or_dir(inputs, recursive=True, rel_root=None): full_path = os.path.join(root, filename) if not rel_root: rel_root = input_ - rel_path = os.path.relpath(full_path, rel_root) - rel_path = rel_path.replace(os.path.sep, '/') + rel_path = _resolve_rel_path(full_path, rel_root) yield full_path, rel_path diff --git a/tests/test_auto_colls.py b/tests/test_auto_colls.py index d261c3aa..4b70a315 100644 --- a/tests/test_auto_colls.py +++ b/tests/test_auto_colls.py @@ -482,6 +482,8 @@ class TestManagedColls(object): main(['autoindex']) + thread.join() + index_file = os.path.join(auto_dir, INDEX_DIR, AUTOINDEX_FILE) assert os.path.isfile(index_file) @@ -504,7 +506,9 @@ class TestManagedColls(object): main(['autoindex', 'auto']) - # assert file was update + thread.join() + + # assert file was update assert os.path.getmtime(index_file) > mtime def test_err_template_remove(self):