1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-28 00:25:21 +01:00

autoindex and cli: add autoindex to cli with 'wayback -a' option, #81

This commit is contained in:
Ilya Kreymer 2015-03-22 23:03:39 -07:00
parent e8db31d066
commit ae363ad368
4 changed files with 44 additions and 12 deletions

View File

@ -1,22 +1,38 @@
import os
import logging
from argparse import ArgumentParser, RawTextHelpFormatter
#================================================================= #=================================================================
def wayback(args=None): def wayback(args=None):
from argparse import ArgumentParser, RawTextHelpFormatter
parser = ArgumentParser('pywb Wayback Web Archive Replay') parser = ArgumentParser('pywb Wayback Web Archive Replay')
parser.add_argument('-p', '--port', type=int, default=8080) parser.add_argument('-p', '--port', type=int, default=8080)
parser.add_argument('-t', '--threads', type=int, default=4) parser.add_argument('-t', '--threads', type=int, default=4)
parser.add_argument('-a', '--autoindex', action='store_true')
help_dir='Specify root archive dir (default is current working directory)' help_dir='Specify root archive dir (default is current working directory)'
parser.add_argument('-d', '--directory', help=help_dir) parser.add_argument('-d', '--directory', help=help_dir)
r = parser.parse_args(args) r = parser.parse_args(args)
if r.directory: #pragma: no cover if r.directory: #pragma: no cover
import os
os.chdir(r.directory) os.chdir(r.directory)
# Load App # Load App
from pywb.apps.wayback import application from pywb.apps.wayback import application
if r.autoindex:
from pywb.manager.manager import CollectionsManager
m = CollectionsManager('', must_exist=False)
if not os.path.isdir(m.colls_dir):
msg = 'No managed directory "{0}" for auto-indexing'
logging.error(msg.format(m.colls_dir))
import sys
sys.exit(2)
else:
msg = 'Auto-Indexing Enabled on "{0}"'
logging.info(msg.format(m.colls_dir))
m.autoindex(do_loop=False)
try: try:
from waitress import serve from waitress import serve
serve(application, port=r.port, threads=r.threads) serve(application, port=r.port, threads=r.threads)

View File

@ -23,11 +23,12 @@ class CDXAutoIndexer(RegexMatchingEventHandler):
def on_modified(self, event): def on_modified(self, event):
self.updater(event.src_path) self.updater(event.src_path)
def do_watch(self, sleep_time=1): def start_watch(self):
observer = Observer() self.observer = Observer()
observer.schedule(self, self.cdx_path, recursive=True) self.observer.schedule(self, self.cdx_path, recursive=True)
observer.start() self.observer.start()
def do_loop(self, sleep_time=1):
try: try:
while keep_running: while keep_running:
time.sleep(sleep_time) time.sleep(sleep_time)

View File

@ -318,7 +318,7 @@ directory structure expected by pywb
migrate.convert_to_cdxj() migrate.convert_to_cdxj()
def autoindex(self): def autoindex(self, do_loop=True):
from autoindex import CDXAutoIndexer from autoindex import CDXAutoIndexer
if self.coll_name: if self.coll_name:
@ -340,7 +340,9 @@ directory structure expected by pywb
indexer = CDXAutoIndexer(do_index, path) indexer = CDXAutoIndexer(do_index, path)
indexer.do_watch() indexer.start_watch()
if do_loop:
indexer.do_loop()
#============================================================================= #=============================================================================
@ -455,7 +457,7 @@ Create manage file based web archive collections
# Auto Index # Auto Index
def do_autoindex(r): def do_autoindex(r):
m = CollectionsManager(r.coll_name, must_exist=False) m = CollectionsManager(r.coll_name, must_exist=False)
m.autoindex() m.autoindex(True)
autoindex_help = 'Automatically index any change archive files' autoindex_help = 'Automatically index any change archive files'
autoindex = subparsers.add_parser('autoindex', help=autoindex_help) autoindex = subparsers.add_parser('autoindex', help=autoindex_help)

View File

@ -13,6 +13,8 @@ from io import BytesIO
from pywb.webapp.pywb_init import create_wb_router from pywb.webapp.pywb_init import create_wb_router
from pywb.manager.manager import main from pywb.manager.manager import main
import pywb.manager.autoindex
from pywb.warc.cdxindexer import main as cdxindexer_main from pywb.warc.cdxindexer import main as cdxindexer_main
from pywb import get_test_dir from pywb import get_test_dir
@ -73,10 +75,21 @@ class TestManagedColls(object):
@patch('waitress.serve', lambda *args, **kwargs: None) @patch('waitress.serve', lambda *args, **kwargs: None)
def test_run_cli(self): def test_run_cli(self):
""" test new wayback cli interface """ test new wayback cli interface
test autoindex error before collections inited
""" """
from pywb.apps.cli import wayback from pywb.apps.cli import wayback
wayback([]) wayback([])
# Nothing to auto-index.. yet
with raises(SystemExit):
wayback(['-a'])
colls = os.path.join(self.root_dir, 'collections')
os.mkdir(colls)
pywb.manager.autoindex.keep_running = False
wayback(['-a'])
def test_create_first_coll(self): def test_create_first_coll(self):
""" Test first collection creation, with all required dirs """ Test first collection creation, with all required dirs
""" """
@ -452,6 +465,8 @@ class TestManagedColls(object):
archive_sub_dir = os.path.join(archive_dir, 'sub') archive_sub_dir = os.path.join(archive_dir, 'sub')
os.makedirs(archive_sub_dir) os.makedirs(archive_sub_dir)
pywb.manager.autoindex.keep_running = True
def do_copy(): def do_copy():
try: try:
time.sleep(1) time.sleep(1)
@ -459,7 +474,6 @@ class TestManagedColls(object):
shutil.copy(self._get_sample_warc('example-extra.warc'), archive_sub_dir) shutil.copy(self._get_sample_warc('example-extra.warc'), archive_sub_dir)
time.sleep(1) time.sleep(1)
finally: finally:
import pywb.manager.autoindex
pywb.manager.autoindex.keep_running = False pywb.manager.autoindex.keep_running = False
thread = threading.Thread(target=do_copy) thread = threading.Thread(target=do_copy)
@ -480,7 +494,6 @@ class TestManagedColls(object):
mtime = os.path.getmtime(index_file) mtime = os.path.getmtime(index_file)
# Update # Update
import pywb.manager.autoindex
pywb.manager.autoindex.keep_running = True pywb.manager.autoindex.keep_running = True
os.remove(index_file) os.remove(index_file)