mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
refactor autoindex: switch to gevent-based simple polling, as watchdog doesn't work with gevent #200
This commit is contained in:
parent
fa247b8fe5
commit
ab77c1b6d9
@ -1,45 +1,60 @@
|
||||
import sys
|
||||
import gevent
|
||||
import time
|
||||
from watchdog.observers import Observer
|
||||
from watchdog.events import RegexMatchingEventHandler
|
||||
import re
|
||||
import os
|
||||
|
||||
|
||||
#=============================================================================
|
||||
EXT_REGEX = '.*\.w?arc(\.gz)?$'
|
||||
EXT_RX = re.compile('.*\.w?arc(\.gz)?$')
|
||||
|
||||
keep_running = True
|
||||
|
||||
|
||||
#=============================================================================
|
||||
class CDXAutoIndexer(RegexMatchingEventHandler):
|
||||
class CDXAutoIndexer(object):
|
||||
def __init__(self, updater, path):
|
||||
super(CDXAutoIndexer, self).__init__(regexes=[EXT_REGEX],
|
||||
ignore_directories=True)
|
||||
self.updater = updater
|
||||
self.cdx_path = path
|
||||
self.root_path = path
|
||||
|
||||
def on_created(self, event):
|
||||
self.updater(event.src_path)
|
||||
self.mtimes = {}
|
||||
|
||||
def on_modified(self, event):
|
||||
self.updater(event.src_path)
|
||||
def has_changed(self, *paths):
|
||||
full_path = os.path.join(*paths)
|
||||
try:
|
||||
mtime = os.path.getmtime(full_path)
|
||||
except:
|
||||
return False
|
||||
|
||||
def start_watch(self):
|
||||
self.observer = Observer()
|
||||
self.observer.schedule(self, self.cdx_path, recursive=True)
|
||||
self.observer.start()
|
||||
if mtime == self.mtimes.get(full_path):
|
||||
return False
|
||||
|
||||
def do_loop(self, sleep_time=1):
|
||||
self.mtimes[full_path] = mtime
|
||||
return full_path
|
||||
|
||||
def check_path(self):
|
||||
for dirName, subdirList, fileList in os.walk(self.root_path):
|
||||
if not subdirList and not self.has_changed(dirName):
|
||||
return False
|
||||
|
||||
for filename in fileList:
|
||||
if not EXT_RX.match(filename):
|
||||
continue
|
||||
|
||||
path = self.has_changed(self.root_path, dirName, filename)
|
||||
if not path:
|
||||
continue
|
||||
|
||||
self.updater(os.path.join(dirName, filename))
|
||||
|
||||
def do_loop(self, interval):
|
||||
try:
|
||||
while keep_running:
|
||||
time.sleep(sleep_time)
|
||||
self.check_path()
|
||||
time.sleep(interval)
|
||||
except KeyboardInterrupt: # pragma: no cover
|
||||
self.observer.stop()
|
||||
self.observer.join()
|
||||
return
|
||||
|
||||
def start(self, interval):
|
||||
self.ge = gevent.spawn(self.do_loop, interval)
|
||||
|
||||
|
||||
#=============================================================================
|
||||
if __name__ == "__main__":
|
||||
w = Watcher(sys.argv[1] if len(sys.argv) > 1 else '.')
|
||||
def p(x):
|
||||
print(x)
|
||||
w.run(p)
|
||||
|
@ -327,7 +327,7 @@ directory structure expected by pywb
|
||||
|
||||
migrate.convert_to_cdxj()
|
||||
|
||||
def autoindex(self, do_loop=True):
|
||||
def autoindex(self, interval=30.0, do_loop=True):
|
||||
from pywb.manager.autoindex import CDXAutoIndexer
|
||||
|
||||
if self.coll_name:
|
||||
@ -351,9 +351,10 @@ directory structure expected by pywb
|
||||
|
||||
|
||||
indexer = CDXAutoIndexer(do_index, path)
|
||||
indexer.start_watch()
|
||||
indexer.start(interval)
|
||||
#indexer.start_watch()
|
||||
if do_loop:
|
||||
indexer.do_loop()
|
||||
indexer.do_loop(interval)
|
||||
|
||||
|
||||
#=============================================================================
|
||||
@ -468,11 +469,12 @@ Create manage file based web archive collections
|
||||
# Auto Index
|
||||
def do_autoindex(r):
|
||||
m = CollectionsManager(r.coll_name, must_exist=False)
|
||||
m.autoindex(True)
|
||||
m.autoindex(r.interval, True)
|
||||
|
||||
autoindex_help = 'Automatically index any change archive files'
|
||||
autoindex = subparsers.add_parser('autoindex', help=autoindex_help)
|
||||
autoindex.add_argument('coll_name', nargs='?', default='')
|
||||
autoindex.add_argument('--interval', type=float, default=30.0)
|
||||
autoindex.set_defaults(func=do_autoindex)
|
||||
|
||||
r = parser.parse_args(args=args)
|
||||
|
@ -1,3 +1,5 @@
|
||||
from gevent.monkey import patch_all; patch_all()
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import shutil
|
||||
@ -6,7 +8,8 @@ import sys
|
||||
import webtest
|
||||
|
||||
import time
|
||||
import threading
|
||||
#import threading
|
||||
import gevent
|
||||
|
||||
from six import StringIO
|
||||
|
||||
@ -488,20 +491,22 @@ class TestManagedColls(object):
|
||||
|
||||
def do_copy():
|
||||
try:
|
||||
time.sleep(1)
|
||||
time.sleep(1.0)
|
||||
shutil.copy(self._get_sample_warc('example.warc.gz'), archive_dir)
|
||||
shutil.copy(self._get_sample_warc('example-extra.warc'), archive_sub_dir)
|
||||
time.sleep(1)
|
||||
time.sleep(1.0)
|
||||
finally:
|
||||
pywb.manager.autoindex.keep_running = False
|
||||
|
||||
thread = threading.Thread(target=do_copy)
|
||||
thread.daemon = True
|
||||
thread.start()
|
||||
#thread = threading.Thread(target=do_copy)
|
||||
#thread.daemon = True
|
||||
#thread.start()
|
||||
ge = gevent.spawn(do_copy)
|
||||
|
||||
main(['autoindex'])
|
||||
main(['autoindex', 'auto', '--interval', '0.25'])
|
||||
|
||||
thread.join()
|
||||
#thread.join()
|
||||
ge.join()
|
||||
|
||||
index_file = os.path.join(auto_dir, INDEX_DIR, AUTOINDEX_FILE)
|
||||
assert os.path.isfile(index_file)
|
||||
@ -519,13 +524,15 @@ class TestManagedColls(object):
|
||||
|
||||
os.remove(index_file)
|
||||
|
||||
thread = threading.Thread(target=do_copy)
|
||||
thread.daemon = True
|
||||
thread.start()
|
||||
#thread = threading.Thread(target=do_copy)
|
||||
#thread.daemon = True
|
||||
#thread.start()
|
||||
ge = gevent.spawn(do_copy)
|
||||
|
||||
main(['autoindex', 'auto'])
|
||||
main(['autoindex', 'auto', '--interval', '0.25'])
|
||||
|
||||
thread.join()
|
||||
#thread.join()
|
||||
ge.join()
|
||||
|
||||
# assert file was update
|
||||
assert os.path.getmtime(index_file) > mtime
|
||||
|
Loading…
x
Reference in New Issue
Block a user