1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

autoindex and cli: add autoindex to cli with 'wayback -a' option, #81

This commit is contained in:
Ilya Kreymer 2015-03-22 23:03:39 -07:00
parent e8db31d066
commit ae363ad368
4 changed files with 44 additions and 12 deletions

View File

@ -1,22 +1,38 @@
import os
import logging
from argparse import ArgumentParser, RawTextHelpFormatter
#=================================================================
def wayback(args=None):
from argparse import ArgumentParser, RawTextHelpFormatter
parser = ArgumentParser('pywb Wayback Web Archive Replay')
parser.add_argument('-p', '--port', type=int, default=8080)
parser.add_argument('-t', '--threads', type=int, default=4)
parser.add_argument('-a', '--autoindex', action='store_true')
help_dir='Specify root archive dir (default is current working directory)'
parser.add_argument('-d', '--directory', help=help_dir)
r = parser.parse_args(args)
if r.directory: #pragma: no cover
import os
os.chdir(r.directory)
# Load App
from pywb.apps.wayback import application
if r.autoindex:
from pywb.manager.manager import CollectionsManager
m = CollectionsManager('', must_exist=False)
if not os.path.isdir(m.colls_dir):
msg = 'No managed directory "{0}" for auto-indexing'
logging.error(msg.format(m.colls_dir))
import sys
sys.exit(2)
else:
msg = 'Auto-Indexing Enabled on "{0}"'
logging.info(msg.format(m.colls_dir))
m.autoindex(do_loop=False)
try:
from waitress import serve
serve(application, port=r.port, threads=r.threads)

View File

@ -23,11 +23,12 @@ class CDXAutoIndexer(RegexMatchingEventHandler):
def on_modified(self, event):
self.updater(event.src_path)
def do_watch(self, sleep_time=1):
observer = Observer()
observer.schedule(self, self.cdx_path, recursive=True)
observer.start()
def start_watch(self):
self.observer = Observer()
self.observer.schedule(self, self.cdx_path, recursive=True)
self.observer.start()
def do_loop(self, sleep_time=1):
try:
while keep_running:
time.sleep(sleep_time)

View File

@ -318,7 +318,7 @@ directory structure expected by pywb
migrate.convert_to_cdxj()
def autoindex(self):
def autoindex(self, do_loop=True):
from autoindex import CDXAutoIndexer
if self.coll_name:
@ -340,7 +340,9 @@ directory structure expected by pywb
indexer = CDXAutoIndexer(do_index, path)
indexer.do_watch()
indexer.start_watch()
if do_loop:
indexer.do_loop()
#=============================================================================
@ -455,7 +457,7 @@ Create manage file based web archive collections
# Auto Index
def do_autoindex(r):
m = CollectionsManager(r.coll_name, must_exist=False)
m.autoindex()
m.autoindex(True)
autoindex_help = 'Automatically index any change archive files'
autoindex = subparsers.add_parser('autoindex', help=autoindex_help)

View File

@ -13,6 +13,8 @@ from io import BytesIO
from pywb.webapp.pywb_init import create_wb_router
from pywb.manager.manager import main
import pywb.manager.autoindex
from pywb.warc.cdxindexer import main as cdxindexer_main
from pywb import get_test_dir
@ -73,10 +75,21 @@ class TestManagedColls(object):
@patch('waitress.serve', lambda *args, **kwargs: None)
def test_run_cli(self):
""" test new wayback cli interface
test autoindex error before collections inited
"""
from pywb.apps.cli import wayback
wayback([])
# Nothing to auto-index.. yet
with raises(SystemExit):
wayback(['-a'])
colls = os.path.join(self.root_dir, 'collections')
os.mkdir(colls)
pywb.manager.autoindex.keep_running = False
wayback(['-a'])
def test_create_first_coll(self):
""" Test first collection creation, with all required dirs
"""
@ -452,6 +465,8 @@ class TestManagedColls(object):
archive_sub_dir = os.path.join(archive_dir, 'sub')
os.makedirs(archive_sub_dir)
pywb.manager.autoindex.keep_running = True
def do_copy():
try:
time.sleep(1)
@ -459,7 +474,6 @@ class TestManagedColls(object):
shutil.copy(self._get_sample_warc('example-extra.warc'), archive_sub_dir)
time.sleep(1)
finally:
import pywb.manager.autoindex
pywb.manager.autoindex.keep_running = False
thread = threading.Thread(target=do_copy)
@ -480,7 +494,6 @@ class TestManagedColls(object):
mtime = os.path.getmtime(index_file)
# Update
import pywb.manager.autoindex
pywb.manager.autoindex.keep_running = True
os.remove(index_file)