mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-28 00:25:21 +01:00
refactor autoindex: switch to gevent-based simple polling, as watchdog doesn't work with gevent #200
This commit is contained in:
parent
fa247b8fe5
commit
ab77c1b6d9
@ -1,45 +1,60 @@
|
|||||||
import sys
|
import gevent
|
||||||
import time
|
import time
|
||||||
from watchdog.observers import Observer
|
import re
|
||||||
from watchdog.events import RegexMatchingEventHandler
|
import os
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
EXT_REGEX = '.*\.w?arc(\.gz)?$'
|
EXT_RX = re.compile('.*\.w?arc(\.gz)?$')
|
||||||
|
|
||||||
keep_running = True
|
keep_running = True
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
class CDXAutoIndexer(RegexMatchingEventHandler):
|
class CDXAutoIndexer(object):
|
||||||
def __init__(self, updater, path):
|
def __init__(self, updater, path):
|
||||||
super(CDXAutoIndexer, self).__init__(regexes=[EXT_REGEX],
|
|
||||||
ignore_directories=True)
|
|
||||||
self.updater = updater
|
self.updater = updater
|
||||||
self.cdx_path = path
|
self.root_path = path
|
||||||
|
|
||||||
def on_created(self, event):
|
self.mtimes = {}
|
||||||
self.updater(event.src_path)
|
|
||||||
|
|
||||||
def on_modified(self, event):
|
def has_changed(self, *paths):
|
||||||
self.updater(event.src_path)
|
full_path = os.path.join(*paths)
|
||||||
|
try:
|
||||||
|
mtime = os.path.getmtime(full_path)
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
def start_watch(self):
|
if mtime == self.mtimes.get(full_path):
|
||||||
self.observer = Observer()
|
return False
|
||||||
self.observer.schedule(self, self.cdx_path, recursive=True)
|
|
||||||
self.observer.start()
|
|
||||||
|
|
||||||
def do_loop(self, sleep_time=1):
|
self.mtimes[full_path] = mtime
|
||||||
|
return full_path
|
||||||
|
|
||||||
|
def check_path(self):
|
||||||
|
for dirName, subdirList, fileList in os.walk(self.root_path):
|
||||||
|
if not subdirList and not self.has_changed(dirName):
|
||||||
|
return False
|
||||||
|
|
||||||
|
for filename in fileList:
|
||||||
|
if not EXT_RX.match(filename):
|
||||||
|
continue
|
||||||
|
|
||||||
|
path = self.has_changed(self.root_path, dirName, filename)
|
||||||
|
if not path:
|
||||||
|
continue
|
||||||
|
|
||||||
|
self.updater(os.path.join(dirName, filename))
|
||||||
|
|
||||||
|
def do_loop(self, interval):
|
||||||
try:
|
try:
|
||||||
while keep_running:
|
while keep_running:
|
||||||
time.sleep(sleep_time)
|
self.check_path()
|
||||||
|
time.sleep(interval)
|
||||||
except KeyboardInterrupt: # pragma: no cover
|
except KeyboardInterrupt: # pragma: no cover
|
||||||
self.observer.stop()
|
return
|
||||||
self.observer.join()
|
|
||||||
|
def start(self, interval):
|
||||||
|
self.ge = gevent.spawn(self.do_loop, interval)
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
|
||||||
if __name__ == "__main__":
|
|
||||||
w = Watcher(sys.argv[1] if len(sys.argv) > 1 else '.')
|
|
||||||
def p(x):
|
|
||||||
print(x)
|
|
||||||
w.run(p)
|
|
||||||
|
@ -327,7 +327,7 @@ directory structure expected by pywb
|
|||||||
|
|
||||||
migrate.convert_to_cdxj()
|
migrate.convert_to_cdxj()
|
||||||
|
|
||||||
def autoindex(self, do_loop=True):
|
def autoindex(self, interval=30.0, do_loop=True):
|
||||||
from pywb.manager.autoindex import CDXAutoIndexer
|
from pywb.manager.autoindex import CDXAutoIndexer
|
||||||
|
|
||||||
if self.coll_name:
|
if self.coll_name:
|
||||||
@ -351,9 +351,10 @@ directory structure expected by pywb
|
|||||||
|
|
||||||
|
|
||||||
indexer = CDXAutoIndexer(do_index, path)
|
indexer = CDXAutoIndexer(do_index, path)
|
||||||
indexer.start_watch()
|
indexer.start(interval)
|
||||||
|
#indexer.start_watch()
|
||||||
if do_loop:
|
if do_loop:
|
||||||
indexer.do_loop()
|
indexer.do_loop(interval)
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
@ -468,11 +469,12 @@ Create manage file based web archive collections
|
|||||||
# Auto Index
|
# Auto Index
|
||||||
def do_autoindex(r):
|
def do_autoindex(r):
|
||||||
m = CollectionsManager(r.coll_name, must_exist=False)
|
m = CollectionsManager(r.coll_name, must_exist=False)
|
||||||
m.autoindex(True)
|
m.autoindex(r.interval, True)
|
||||||
|
|
||||||
autoindex_help = 'Automatically index any change archive files'
|
autoindex_help = 'Automatically index any change archive files'
|
||||||
autoindex = subparsers.add_parser('autoindex', help=autoindex_help)
|
autoindex = subparsers.add_parser('autoindex', help=autoindex_help)
|
||||||
autoindex.add_argument('coll_name', nargs='?', default='')
|
autoindex.add_argument('coll_name', nargs='?', default='')
|
||||||
|
autoindex.add_argument('--interval', type=float, default=30.0)
|
||||||
autoindex.set_defaults(func=do_autoindex)
|
autoindex.set_defaults(func=do_autoindex)
|
||||||
|
|
||||||
r = parser.parse_args(args=args)
|
r = parser.parse_args(args=args)
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
from gevent.monkey import patch_all; patch_all()
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
import shutil
|
import shutil
|
||||||
@ -6,7 +8,8 @@ import sys
|
|||||||
import webtest
|
import webtest
|
||||||
|
|
||||||
import time
|
import time
|
||||||
import threading
|
#import threading
|
||||||
|
import gevent
|
||||||
|
|
||||||
from six import StringIO
|
from six import StringIO
|
||||||
|
|
||||||
@ -488,20 +491,22 @@ class TestManagedColls(object):
|
|||||||
|
|
||||||
def do_copy():
|
def do_copy():
|
||||||
try:
|
try:
|
||||||
time.sleep(1)
|
time.sleep(1.0)
|
||||||
shutil.copy(self._get_sample_warc('example.warc.gz'), archive_dir)
|
shutil.copy(self._get_sample_warc('example.warc.gz'), archive_dir)
|
||||||
shutil.copy(self._get_sample_warc('example-extra.warc'), archive_sub_dir)
|
shutil.copy(self._get_sample_warc('example-extra.warc'), archive_sub_dir)
|
||||||
time.sleep(1)
|
time.sleep(1.0)
|
||||||
finally:
|
finally:
|
||||||
pywb.manager.autoindex.keep_running = False
|
pywb.manager.autoindex.keep_running = False
|
||||||
|
|
||||||
thread = threading.Thread(target=do_copy)
|
#thread = threading.Thread(target=do_copy)
|
||||||
thread.daemon = True
|
#thread.daemon = True
|
||||||
thread.start()
|
#thread.start()
|
||||||
|
ge = gevent.spawn(do_copy)
|
||||||
|
|
||||||
main(['autoindex'])
|
main(['autoindex', 'auto', '--interval', '0.25'])
|
||||||
|
|
||||||
thread.join()
|
#thread.join()
|
||||||
|
ge.join()
|
||||||
|
|
||||||
index_file = os.path.join(auto_dir, INDEX_DIR, AUTOINDEX_FILE)
|
index_file = os.path.join(auto_dir, INDEX_DIR, AUTOINDEX_FILE)
|
||||||
assert os.path.isfile(index_file)
|
assert os.path.isfile(index_file)
|
||||||
@ -519,13 +524,15 @@ class TestManagedColls(object):
|
|||||||
|
|
||||||
os.remove(index_file)
|
os.remove(index_file)
|
||||||
|
|
||||||
thread = threading.Thread(target=do_copy)
|
#thread = threading.Thread(target=do_copy)
|
||||||
thread.daemon = True
|
#thread.daemon = True
|
||||||
thread.start()
|
#thread.start()
|
||||||
|
ge = gevent.spawn(do_copy)
|
||||||
|
|
||||||
main(['autoindex', 'auto'])
|
main(['autoindex', 'auto', '--interval', '0.25'])
|
||||||
|
|
||||||
thread.join()
|
#thread.join()
|
||||||
|
ge.join()
|
||||||
|
|
||||||
# assert file was update
|
# assert file was update
|
||||||
assert os.path.getmtime(index_file) > mtime
|
assert os.path.getmtime(index_file) > mtime
|
||||||
|
Loading…
x
Reference in New Issue
Block a user