mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-28 00:25:21 +01:00
autoindex and cli: add autoindex to cli with 'wayback -a' option, #81
This commit is contained in:
parent
e8db31d066
commit
ae363ad368
@ -1,22 +1,38 @@
|
|||||||
|
import os
|
||||||
|
import logging
|
||||||
|
from argparse import ArgumentParser, RawTextHelpFormatter
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def wayback(args=None):
|
def wayback(args=None):
|
||||||
from argparse import ArgumentParser, RawTextHelpFormatter
|
|
||||||
|
|
||||||
parser = ArgumentParser('pywb Wayback Web Archive Replay')
|
parser = ArgumentParser('pywb Wayback Web Archive Replay')
|
||||||
parser.add_argument('-p', '--port', type=int, default=8080)
|
parser.add_argument('-p', '--port', type=int, default=8080)
|
||||||
parser.add_argument('-t', '--threads', type=int, default=4)
|
parser.add_argument('-t', '--threads', type=int, default=4)
|
||||||
|
parser.add_argument('-a', '--autoindex', action='store_true')
|
||||||
|
|
||||||
help_dir='Specify root archive dir (default is current working directory)'
|
help_dir='Specify root archive dir (default is current working directory)'
|
||||||
parser.add_argument('-d', '--directory', help=help_dir)
|
parser.add_argument('-d', '--directory', help=help_dir)
|
||||||
|
|
||||||
r = parser.parse_args(args)
|
r = parser.parse_args(args)
|
||||||
if r.directory: #pragma: no cover
|
if r.directory: #pragma: no cover
|
||||||
import os
|
|
||||||
os.chdir(r.directory)
|
os.chdir(r.directory)
|
||||||
|
|
||||||
# Load App
|
# Load App
|
||||||
from pywb.apps.wayback import application
|
from pywb.apps.wayback import application
|
||||||
|
|
||||||
|
if r.autoindex:
|
||||||
|
from pywb.manager.manager import CollectionsManager
|
||||||
|
m = CollectionsManager('', must_exist=False)
|
||||||
|
if not os.path.isdir(m.colls_dir):
|
||||||
|
msg = 'No managed directory "{0}" for auto-indexing'
|
||||||
|
logging.error(msg.format(m.colls_dir))
|
||||||
|
import sys
|
||||||
|
sys.exit(2)
|
||||||
|
else:
|
||||||
|
msg = 'Auto-Indexing Enabled on "{0}"'
|
||||||
|
logging.info(msg.format(m.colls_dir))
|
||||||
|
m.autoindex(do_loop=False)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from waitress import serve
|
from waitress import serve
|
||||||
serve(application, port=r.port, threads=r.threads)
|
serve(application, port=r.port, threads=r.threads)
|
||||||
|
@ -23,11 +23,12 @@ class CDXAutoIndexer(RegexMatchingEventHandler):
|
|||||||
def on_modified(self, event):
|
def on_modified(self, event):
|
||||||
self.updater(event.src_path)
|
self.updater(event.src_path)
|
||||||
|
|
||||||
def do_watch(self, sleep_time=1):
|
def start_watch(self):
|
||||||
observer = Observer()
|
self.observer = Observer()
|
||||||
observer.schedule(self, self.cdx_path, recursive=True)
|
self.observer.schedule(self, self.cdx_path, recursive=True)
|
||||||
observer.start()
|
self.observer.start()
|
||||||
|
|
||||||
|
def do_loop(self, sleep_time=1):
|
||||||
try:
|
try:
|
||||||
while keep_running:
|
while keep_running:
|
||||||
time.sleep(sleep_time)
|
time.sleep(sleep_time)
|
||||||
|
@ -318,7 +318,7 @@ directory structure expected by pywb
|
|||||||
|
|
||||||
migrate.convert_to_cdxj()
|
migrate.convert_to_cdxj()
|
||||||
|
|
||||||
def autoindex(self):
|
def autoindex(self, do_loop=True):
|
||||||
from autoindex import CDXAutoIndexer
|
from autoindex import CDXAutoIndexer
|
||||||
|
|
||||||
if self.coll_name:
|
if self.coll_name:
|
||||||
@ -340,7 +340,9 @@ directory structure expected by pywb
|
|||||||
|
|
||||||
|
|
||||||
indexer = CDXAutoIndexer(do_index, path)
|
indexer = CDXAutoIndexer(do_index, path)
|
||||||
indexer.do_watch()
|
indexer.start_watch()
|
||||||
|
if do_loop:
|
||||||
|
indexer.do_loop()
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
@ -455,7 +457,7 @@ Create manage file based web archive collections
|
|||||||
# Auto Index
|
# Auto Index
|
||||||
def do_autoindex(r):
|
def do_autoindex(r):
|
||||||
m = CollectionsManager(r.coll_name, must_exist=False)
|
m = CollectionsManager(r.coll_name, must_exist=False)
|
||||||
m.autoindex()
|
m.autoindex(True)
|
||||||
|
|
||||||
autoindex_help = 'Automatically index any change archive files'
|
autoindex_help = 'Automatically index any change archive files'
|
||||||
autoindex = subparsers.add_parser('autoindex', help=autoindex_help)
|
autoindex = subparsers.add_parser('autoindex', help=autoindex_help)
|
||||||
|
@ -13,6 +13,8 @@ from io import BytesIO
|
|||||||
from pywb.webapp.pywb_init import create_wb_router
|
from pywb.webapp.pywb_init import create_wb_router
|
||||||
from pywb.manager.manager import main
|
from pywb.manager.manager import main
|
||||||
|
|
||||||
|
import pywb.manager.autoindex
|
||||||
|
|
||||||
from pywb.warc.cdxindexer import main as cdxindexer_main
|
from pywb.warc.cdxindexer import main as cdxindexer_main
|
||||||
|
|
||||||
from pywb import get_test_dir
|
from pywb import get_test_dir
|
||||||
@ -73,10 +75,21 @@ class TestManagedColls(object):
|
|||||||
@patch('waitress.serve', lambda *args, **kwargs: None)
|
@patch('waitress.serve', lambda *args, **kwargs: None)
|
||||||
def test_run_cli(self):
|
def test_run_cli(self):
|
||||||
""" test new wayback cli interface
|
""" test new wayback cli interface
|
||||||
|
test autoindex error before collections inited
|
||||||
"""
|
"""
|
||||||
from pywb.apps.cli import wayback
|
from pywb.apps.cli import wayback
|
||||||
wayback([])
|
wayback([])
|
||||||
|
|
||||||
|
# Nothing to auto-index.. yet
|
||||||
|
with raises(SystemExit):
|
||||||
|
wayback(['-a'])
|
||||||
|
|
||||||
|
colls = os.path.join(self.root_dir, 'collections')
|
||||||
|
os.mkdir(colls)
|
||||||
|
|
||||||
|
pywb.manager.autoindex.keep_running = False
|
||||||
|
wayback(['-a'])
|
||||||
|
|
||||||
def test_create_first_coll(self):
|
def test_create_first_coll(self):
|
||||||
""" Test first collection creation, with all required dirs
|
""" Test first collection creation, with all required dirs
|
||||||
"""
|
"""
|
||||||
@ -452,6 +465,8 @@ class TestManagedColls(object):
|
|||||||
archive_sub_dir = os.path.join(archive_dir, 'sub')
|
archive_sub_dir = os.path.join(archive_dir, 'sub')
|
||||||
os.makedirs(archive_sub_dir)
|
os.makedirs(archive_sub_dir)
|
||||||
|
|
||||||
|
pywb.manager.autoindex.keep_running = True
|
||||||
|
|
||||||
def do_copy():
|
def do_copy():
|
||||||
try:
|
try:
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
@ -459,7 +474,6 @@ class TestManagedColls(object):
|
|||||||
shutil.copy(self._get_sample_warc('example-extra.warc'), archive_sub_dir)
|
shutil.copy(self._get_sample_warc('example-extra.warc'), archive_sub_dir)
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
finally:
|
finally:
|
||||||
import pywb.manager.autoindex
|
|
||||||
pywb.manager.autoindex.keep_running = False
|
pywb.manager.autoindex.keep_running = False
|
||||||
|
|
||||||
thread = threading.Thread(target=do_copy)
|
thread = threading.Thread(target=do_copy)
|
||||||
@ -480,7 +494,6 @@ class TestManagedColls(object):
|
|||||||
mtime = os.path.getmtime(index_file)
|
mtime = os.path.getmtime(index_file)
|
||||||
|
|
||||||
# Update
|
# Update
|
||||||
import pywb.manager.autoindex
|
|
||||||
pywb.manager.autoindex.keep_running = True
|
pywb.manager.autoindex.keep_running = True
|
||||||
|
|
||||||
os.remove(index_file)
|
os.remove(index_file)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user