mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
autoindex and cli: add autoindex to cli with 'wayback -a' option, #81
This commit is contained in:
parent
e8db31d066
commit
ae363ad368
@ -1,22 +1,38 @@
|
||||
import os
|
||||
import logging
|
||||
from argparse import ArgumentParser, RawTextHelpFormatter
|
||||
|
||||
|
||||
#=================================================================
|
||||
def wayback(args=None):
|
||||
from argparse import ArgumentParser, RawTextHelpFormatter
|
||||
|
||||
parser = ArgumentParser('pywb Wayback Web Archive Replay')
|
||||
parser.add_argument('-p', '--port', type=int, default=8080)
|
||||
parser.add_argument('-t', '--threads', type=int, default=4)
|
||||
parser.add_argument('-a', '--autoindex', action='store_true')
|
||||
|
||||
help_dir='Specify root archive dir (default is current working directory)'
|
||||
parser.add_argument('-d', '--directory', help=help_dir)
|
||||
|
||||
r = parser.parse_args(args)
|
||||
if r.directory: #pragma: no cover
|
||||
import os
|
||||
os.chdir(r.directory)
|
||||
|
||||
# Load App
|
||||
from pywb.apps.wayback import application
|
||||
|
||||
if r.autoindex:
|
||||
from pywb.manager.manager import CollectionsManager
|
||||
m = CollectionsManager('', must_exist=False)
|
||||
if not os.path.isdir(m.colls_dir):
|
||||
msg = 'No managed directory "{0}" for auto-indexing'
|
||||
logging.error(msg.format(m.colls_dir))
|
||||
import sys
|
||||
sys.exit(2)
|
||||
else:
|
||||
msg = 'Auto-Indexing Enabled on "{0}"'
|
||||
logging.info(msg.format(m.colls_dir))
|
||||
m.autoindex(do_loop=False)
|
||||
|
||||
try:
|
||||
from waitress import serve
|
||||
serve(application, port=r.port, threads=r.threads)
|
||||
|
@ -23,11 +23,12 @@ class CDXAutoIndexer(RegexMatchingEventHandler):
|
||||
def on_modified(self, event):
|
||||
self.updater(event.src_path)
|
||||
|
||||
def do_watch(self, sleep_time=1):
|
||||
observer = Observer()
|
||||
observer.schedule(self, self.cdx_path, recursive=True)
|
||||
observer.start()
|
||||
def start_watch(self):
|
||||
self.observer = Observer()
|
||||
self.observer.schedule(self, self.cdx_path, recursive=True)
|
||||
self.observer.start()
|
||||
|
||||
def do_loop(self, sleep_time=1):
|
||||
try:
|
||||
while keep_running:
|
||||
time.sleep(sleep_time)
|
||||
|
@ -318,7 +318,7 @@ directory structure expected by pywb
|
||||
|
||||
migrate.convert_to_cdxj()
|
||||
|
||||
def autoindex(self):
|
||||
def autoindex(self, do_loop=True):
|
||||
from autoindex import CDXAutoIndexer
|
||||
|
||||
if self.coll_name:
|
||||
@ -340,7 +340,9 @@ directory structure expected by pywb
|
||||
|
||||
|
||||
indexer = CDXAutoIndexer(do_index, path)
|
||||
indexer.do_watch()
|
||||
indexer.start_watch()
|
||||
if do_loop:
|
||||
indexer.do_loop()
|
||||
|
||||
|
||||
#=============================================================================
|
||||
@ -455,7 +457,7 @@ Create manage file based web archive collections
|
||||
# Auto Index
|
||||
def do_autoindex(r):
|
||||
m = CollectionsManager(r.coll_name, must_exist=False)
|
||||
m.autoindex()
|
||||
m.autoindex(True)
|
||||
|
||||
autoindex_help = 'Automatically index any change archive files'
|
||||
autoindex = subparsers.add_parser('autoindex', help=autoindex_help)
|
||||
|
@ -13,6 +13,8 @@ from io import BytesIO
|
||||
from pywb.webapp.pywb_init import create_wb_router
|
||||
from pywb.manager.manager import main
|
||||
|
||||
import pywb.manager.autoindex
|
||||
|
||||
from pywb.warc.cdxindexer import main as cdxindexer_main
|
||||
|
||||
from pywb import get_test_dir
|
||||
@ -73,10 +75,21 @@ class TestManagedColls(object):
|
||||
@patch('waitress.serve', lambda *args, **kwargs: None)
|
||||
def test_run_cli(self):
|
||||
""" test new wayback cli interface
|
||||
test autoindex error before collections inited
|
||||
"""
|
||||
from pywb.apps.cli import wayback
|
||||
wayback([])
|
||||
|
||||
# Nothing to auto-index.. yet
|
||||
with raises(SystemExit):
|
||||
wayback(['-a'])
|
||||
|
||||
colls = os.path.join(self.root_dir, 'collections')
|
||||
os.mkdir(colls)
|
||||
|
||||
pywb.manager.autoindex.keep_running = False
|
||||
wayback(['-a'])
|
||||
|
||||
def test_create_first_coll(self):
|
||||
""" Test first collection creation, with all required dirs
|
||||
"""
|
||||
@ -452,6 +465,8 @@ class TestManagedColls(object):
|
||||
archive_sub_dir = os.path.join(archive_dir, 'sub')
|
||||
os.makedirs(archive_sub_dir)
|
||||
|
||||
pywb.manager.autoindex.keep_running = True
|
||||
|
||||
def do_copy():
|
||||
try:
|
||||
time.sleep(1)
|
||||
@ -459,7 +474,6 @@ class TestManagedColls(object):
|
||||
shutil.copy(self._get_sample_warc('example-extra.warc'), archive_sub_dir)
|
||||
time.sleep(1)
|
||||
finally:
|
||||
import pywb.manager.autoindex
|
||||
pywb.manager.autoindex.keep_running = False
|
||||
|
||||
thread = threading.Thread(target=do_copy)
|
||||
@ -480,7 +494,6 @@ class TestManagedColls(object):
|
||||
mtime = os.path.getmtime(index_file)
|
||||
|
||||
# Update
|
||||
import pywb.manager.autoindex
|
||||
pywb.manager.autoindex.keep_running = True
|
||||
|
||||
os.remove(index_file)
|
||||
|
Loading…
x
Reference in New Issue
Block a user