From ae363ad368672614de32b0d55e16ba39b61fe9e3 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sun, 22 Mar 2015 23:03:39 -0700 Subject: [PATCH] autoindex and cli: add autoindex to cli with 'wayback -a' option, #81 --- pywb/apps/cli.py | 22 +++++++++++++++++++--- pywb/manager/autoindex.py | 9 +++++---- pywb/manager/manager.py | 8 +++++--- tests/test_auto_colls.py | 17 +++++++++++++++-- 4 files changed, 44 insertions(+), 12 deletions(-) diff --git a/pywb/apps/cli.py b/pywb/apps/cli.py index 2972f8af..d3905b83 100644 --- a/pywb/apps/cli.py +++ b/pywb/apps/cli.py @@ -1,22 +1,38 @@ +import os +import logging +from argparse import ArgumentParser, RawTextHelpFormatter + + #================================================================= def wayback(args=None): - from argparse import ArgumentParser, RawTextHelpFormatter - parser = ArgumentParser('pywb Wayback Web Archive Replay') parser.add_argument('-p', '--port', type=int, default=8080) parser.add_argument('-t', '--threads', type=int, default=4) + parser.add_argument('-a', '--autoindex', action='store_true') help_dir='Specify root archive dir (default is current working directory)' parser.add_argument('-d', '--directory', help=help_dir) r = parser.parse_args(args) if r.directory: #pragma: no cover - import os os.chdir(r.directory) # Load App from pywb.apps.wayback import application + if r.autoindex: + from pywb.manager.manager import CollectionsManager + m = CollectionsManager('', must_exist=False) + if not os.path.isdir(m.colls_dir): + msg = 'No managed directory "{0}" for auto-indexing' + logging.error(msg.format(m.colls_dir)) + import sys + sys.exit(2) + else: + msg = 'Auto-Indexing Enabled on "{0}"' + logging.info(msg.format(m.colls_dir)) + m.autoindex(do_loop=False) + try: from waitress import serve serve(application, port=r.port, threads=r.threads) diff --git a/pywb/manager/autoindex.py b/pywb/manager/autoindex.py index 0c182e5d..65003048 100644 --- a/pywb/manager/autoindex.py +++ b/pywb/manager/autoindex.py @@ -23,11 +23,12 @@ class CDXAutoIndexer(RegexMatchingEventHandler): def on_modified(self, event): self.updater(event.src_path) - def do_watch(self, sleep_time=1): - observer = Observer() - observer.schedule(self, self.cdx_path, recursive=True) - observer.start() + def start_watch(self): + self.observer = Observer() + self.observer.schedule(self, self.cdx_path, recursive=True) + self.observer.start() + def do_loop(self, sleep_time=1): try: while keep_running: time.sleep(sleep_time) diff --git a/pywb/manager/manager.py b/pywb/manager/manager.py index 4a360c4d..6a1e83fb 100644 --- a/pywb/manager/manager.py +++ b/pywb/manager/manager.py @@ -318,7 +318,7 @@ directory structure expected by pywb migrate.convert_to_cdxj() - def autoindex(self): + def autoindex(self, do_loop=True): from autoindex import CDXAutoIndexer if self.coll_name: @@ -340,7 +340,9 @@ directory structure expected by pywb indexer = CDXAutoIndexer(do_index, path) - indexer.do_watch() + indexer.start_watch() + if do_loop: + indexer.do_loop() #============================================================================= @@ -455,7 +457,7 @@ Create manage file based web archive collections # Auto Index def do_autoindex(r): m = CollectionsManager(r.coll_name, must_exist=False) - m.autoindex() + m.autoindex(True) autoindex_help = 'Automatically index any change archive files' autoindex = subparsers.add_parser('autoindex', help=autoindex_help) diff --git a/tests/test_auto_colls.py b/tests/test_auto_colls.py index 029fe12d..ff81b3f1 100644 --- a/tests/test_auto_colls.py +++ b/tests/test_auto_colls.py @@ -13,6 +13,8 @@ from io import BytesIO from pywb.webapp.pywb_init import create_wb_router from pywb.manager.manager import main +import pywb.manager.autoindex + from pywb.warc.cdxindexer import main as cdxindexer_main from pywb import get_test_dir @@ -73,10 +75,21 @@ class TestManagedColls(object): @patch('waitress.serve', lambda *args, **kwargs: None) def test_run_cli(self): """ test new wayback cli interface + test autoindex error before collections inited """ from pywb.apps.cli import wayback wayback([]) + # Nothing to auto-index.. yet + with raises(SystemExit): + wayback(['-a']) + + colls = os.path.join(self.root_dir, 'collections') + os.mkdir(colls) + + pywb.manager.autoindex.keep_running = False + wayback(['-a']) + def test_create_first_coll(self): """ Test first collection creation, with all required dirs """ @@ -452,6 +465,8 @@ class TestManagedColls(object): archive_sub_dir = os.path.join(archive_dir, 'sub') os.makedirs(archive_sub_dir) + pywb.manager.autoindex.keep_running = True + def do_copy(): try: time.sleep(1) @@ -459,7 +474,6 @@ class TestManagedColls(object): shutil.copy(self._get_sample_warc('example-extra.warc'), archive_sub_dir) time.sleep(1) finally: - import pywb.manager.autoindex pywb.manager.autoindex.keep_running = False thread = threading.Thread(target=do_copy) @@ -480,7 +494,6 @@ class TestManagedColls(object): mtime = os.path.getmtime(index_file) # Update - import pywb.manager.autoindex pywb.manager.autoindex.keep_running = True os.remove(index_file)