1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

Recorder App Support (#241)

recording support: now available for dynamic collections via config
- config.yaml 'recorder: live' entry enables /record/ subpath which records to any dynamic collections (can record from any collection, though usually live)
- autoindex refactor: simplified, standalone AutoIndexer() -- indexes any changed warc files to autoindex.cdxj
- windows autoindex support: also check for changed file size, as last modified time may not be changing
- manager: remove autoindex, now part of main cli
- tests: updated test_auto_colls with autoindex changes
- tests: add record/replay tests for recording and replay
This commit is contained in:
Ilya Kreymer 2017-09-21 22:12:57 -07:00 committed by GitHub
parent a05916617d
commit 93921aadb7
11 changed files with 288 additions and 147 deletions

View File

@ -88,19 +88,19 @@ class ReplayCli(BaseCli):
def run(self): def run(self):
if self.r.autoindex: if self.r.autoindex:
from pywb.manager.manager import CollectionsManager from pywb.manager.autoindex import AutoIndexer
import os import os
m = CollectionsManager('', must_exist=False) indexer = AutoIndexer(interval=self.r.auto_interval)
if not os.path.isdir(m.colls_dir): if not os.path.isdir(indexer.root_path):
msg = 'No managed directory "{0}" for auto-indexing' msg = 'No managed directory "{0}" for auto-indexing'
logging.error(msg.format(m.colls_dir)) logging.error(msg.format(indexer.root_path))
import sys import sys
sys.exit(2) sys.exit(2)
else:
msg = 'Auto-Indexing Enabled on "{0}", checking every {1} secs' msg = 'Auto-Indexing Enabled on "{0}", checking every {1} secs'
logging.info(msg.format(m.colls_dir, self.r.auto_interval)) logging.info(msg.format(indexer.root_path, self.r.auto_interval))
m.autoindex(interval=self.r.auto_interval, do_loop=False) indexer.start()
super(ReplayCli, self).run() super(ReplayCli, self).run()

View File

@ -9,6 +9,9 @@ from six import iteritems
from warcio.utils import to_native_str from warcio.utils import to_native_str
from pywb.recorder.multifilewarcwriter import MultiFileWARCWriter
from pywb.recorder.recorderapp import RecorderApp
from pywb.utils.loaders import load_yaml_config from pywb.utils.loaders import load_yaml_config
from pywb.utils.geventserver import GeventServer from pywb.utils.geventserver import GeventServer
@ -32,10 +35,14 @@ class FrontEndApp(object):
self.warcserver = WarcServer(config_file=config_file, self.warcserver = WarcServer(config_file=config_file,
custom_config=custom_config) custom_config=custom_config)
framed_replay = self.warcserver.config.get('framed_replay', True) config = self.warcserver.config
framed_replay = config.get('framed_replay', True)
self.warcserver_server = GeventServer(self.warcserver, port=0) self.warcserver_server = GeventServer(self.warcserver, port=0)
self.init_recorder(config)
self.static_handler = StaticHandler('pywb/static/') self.static_handler = StaticHandler('pywb/static/')
self.url_map = Map() self.url_map = Map()
@ -44,36 +51,59 @@ class FrontEndApp(object):
self.url_map.add(Rule('/collinfo.json', endpoint=self.serve_listing)) self.url_map.add(Rule('/collinfo.json', endpoint=self.serve_listing))
if self.is_valid_coll('$root'): if self.is_valid_coll('$root'):
self.url_map.add(Rule('/', endpoint=self.serve_coll_page)) coll_prefix = ''
self.url_map.add(Rule('/timemap/<timemap_output>/<path:url>', endpoint=self.serve_content))
self.url_map.add(Rule('/cdx', endpoint=self.serve_cdx))
self.url_map.add(Rule('/<path:url>', endpoint=self.serve_content))
else: else:
self.url_map.add(Rule('/<coll>/', endpoint=self.serve_coll_page)) coll_prefix = '/<coll>'
self.url_map.add(Rule('/<coll>/timemap/<timemap_output>/<path:url>', endpoint=self.serve_content))
self.url_map.add(Rule('/<coll>/cdx', endpoint=self.serve_cdx))
self.url_map.add(Rule('/<coll>/<path:url>', endpoint=self.serve_content))
self.url_map.add(Rule('/', endpoint=self.serve_home)) self.url_map.add(Rule('/', endpoint=self.serve_home))
self.url_map.add(Rule(coll_prefix + '/', endpoint=self.serve_coll_page))
self.url_map.add(Rule(coll_prefix + '/timemap/<timemap_output>/<path:url>', endpoint=self.serve_content))
self.url_map.add(Rule(coll_prefix + '/cdx', endpoint=self.serve_cdx))
if self.recorder:
self.url_map.add(Rule(coll_prefix + '/record/<path:url>', endpoint=self.serve_record))
self.url_map.add(Rule(coll_prefix + '/<path:url>', endpoint=self.serve_content))
upstream_paths = self.get_upstream_paths(self.warcserver_server.port) upstream_paths = self.get_upstream_paths(self.warcserver_server.port)
self.rewriterapp = RewriterApp(framed_replay, self.rewriterapp = RewriterApp(framed_replay,
config=self.warcserver.config, config=config,
paths=upstream_paths) paths=upstream_paths)
self.templates_dir = self.warcserver.config.get('templates_dir', 'templates') self.templates_dir = config.get('templates_dir', 'templates')
self.static_dir = self.warcserver.config.get('static_dir', 'static') self.static_dir = config.get('static_dir', 'static')
metadata_templ = os.path.join(self.warcserver.root_dir, '{coll}', 'metadata.yaml') metadata_templ = os.path.join(self.warcserver.root_dir, '{coll}', 'metadata.yaml')
self.metadata_cache = MetadataCache(metadata_templ) self.metadata_cache = MetadataCache(metadata_templ)
def get_upstream_paths(self, port): def get_upstream_paths(self, port):
return { base_paths = {
'replay': 'http://localhost:%s/{coll}/resource/postreq' % port, 'replay': 'http://localhost:%s/{coll}/resource/postreq' % port,
'cdx-server': 'http://localhost:%s/{coll}/index' % port, 'cdx-server': 'http://localhost:%s/{coll}/index' % port,
} }
if self.recorder:
base_paths['record'] = 'http://localhost:%s/%s/resource/postreq?param.recorder.coll={coll}' % (self.recorder_port, self.recorder_source)
return base_paths
def init_recorder(self, config):
self.recorder_source = config.get('recorder')
if not self.recorder_source:
self.recorder = None
self.recorder_server = None
self.recorder_port = 0
return
dedup_index = None
warc_writer = MultiFileWARCWriter(self.warcserver.archive_templ, max_size=1000000000, max_idle_secs=600,
dedup_index=dedup_index)
self.recorder = RecorderApp('http://localhost:' + str(self.warcserver_server.port), warc_writer)
self.recorder_server = GeventServer(self.recorder, port=0)
self.recorder_port = self.recorder_server.port
def serve_home(self, environ): def serve_home(self, environ):
home_view = BaseInsertView(self.rewriterapp.jinja_env, 'index.html') home_view = BaseInsertView(self.rewriterapp.jinja_env, 'index.html')
fixed_routes = self.warcserver.list_fixed_routes() fixed_routes = self.warcserver.list_fixed_routes()
@ -150,13 +180,19 @@ class FrontEndApp(object):
return WbResponse.bin_stream(res.raw, content_type=res.headers.get('Content-Type')) return WbResponse.bin_stream(res.raw, content_type=res.headers.get('Content-Type'))
except Exception as e: except Exception as e:
return WbResponse.text_content('Error: ' + str(e), status='400 Bad Request') return WbResponse.text_response('Error: ' + str(e), status='400 Bad Request')
def serve_content(self, environ, coll='$root', url='', timemap_output=''): def serve_record(self, environ, coll='$root', url=''):
if coll in self.warcserver.list_fixed_routes():
return WbResponse.text_response('Error: Can Not Record Into Custom Collection "{0}"'.format(coll))
return self.serve_content(environ, coll, url, record=True)
def serve_content(self, environ, coll='$root', url='', timemap_output='', record=False):
if not self.is_valid_coll(coll): if not self.is_valid_coll(coll):
self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll)) self.raise_not_found(environ, 'No handler for "/{0}"'.format(coll))
self.setup_paths(environ, coll) self.setup_paths(environ, coll, record)
wb_url_str = to_native_str(url) wb_url_str = to_native_str(url)
@ -164,6 +200,10 @@ class FrontEndApp(object):
wb_url_str += '?' + environ.get('QUERY_STRING') wb_url_str += '?' + environ.get('QUERY_STRING')
metadata = self.get_metadata(coll) metadata = self.get_metadata(coll)
if record:
metadata['type'] = 'record'
print('RECORD')
if timemap_output: if timemap_output:
metadata['output'] = timemap_output metadata['output'] = timemap_output
@ -175,12 +215,14 @@ class FrontEndApp(object):
return response return response
def setup_paths(self, environ, coll): def setup_paths(self, environ, coll, record=False):
if not coll or not self.warcserver.root_dir: if not coll or not self.warcserver.root_dir:
return return
if coll != '$root': if coll != '$root':
pop_path_info(environ) pop_path_info(environ)
if record:
pop_path_info(environ)
paths = [self.warcserver.root_dir] paths = [self.warcserver.root_dir]

View File

@ -2,59 +2,106 @@ import gevent
import time import time
import re import re
import os import os
import logging
from pywb.manager.manager import CollectionsManager
#============================================================================= #=============================================================================
EXT_RX = re.compile('.*\.w?arc(\.gz)?$') class AutoIndexer(object):
EXT_RX = re.compile('.*\.w?arc(\.gz)?$')
AUTO_INDEX_FILE = 'autoindex.cdxj'
keep_running = True def __init__(self, interval=30, keep_running=True):
self.manager = CollectionsManager('', must_exist=False)
self.root_path = self.manager.colls_dir
#============================================================================= self.keep_running = keep_running
class CDXAutoIndexer(object):
def __init__(self, updater, path):
self.updater = updater
self.root_path = path
self.mtimes = {} self.interval = interval
def has_changed(self, *paths): self.last_size = {}
full_path = os.path.join(*paths)
def is_newer_than(self, path1, path2, track=False):
try: try:
mtime = os.path.getmtime(full_path) mtime1 = os.path.getmtime(path1)
mtime2 = os.path.getmtime(path2)
newer = mtime1 > mtime2
except: except:
return False newer = True
if mtime == self.mtimes.get(full_path): if track:
return False size = os.path.getsize(path1)
try:
if size != self.last_size[path1]:
newer = True
except:
pass
self.mtimes[full_path] = mtime self.last_size[path1] = size
return full_path
return newer
def do_index(self, files):
logging.info('Auto-Indexing... ' + str(files))
self.manager.index_merge(files, self.AUTO_INDEX_FILE)
logging.info('...Done')
def check_path(self): def check_path(self):
for dirName, subdirList, fileList in os.walk(self.root_path): for coll in os.listdir(self.root_path):
if not subdirList and not self.has_changed(dirName): coll_dir = os.path.join(self.root_path, coll)
return False if not os.path.isdir(coll_dir):
continue
for filename in fileList: self.manager.change_collection(coll)
if not EXT_RX.match(filename):
archive_dir = self.manager.archive_dir
if not os.path.isdir(archive_dir):
continue
index_file = os.path.join(self.manager.indexes_dir, self.AUTO_INDEX_FILE)
if os.path.isfile(index_file):
if os.name != 'nt' and self.is_newer_than(archive_dir, index_file):
continue continue
else:
try:
os.makedirs(self.manager.indexes_dir)
except Exception as e:
pass
path = self.has_changed(self.root_path, dirName, filename) logging.info('Collection Possibly Changed: ' + coll)
if not path: to_index = []
continue for dirpath, dirnames, filenames in os.walk(archive_dir):
for filename in filenames:
if not self.EXT_RX.match(filename):
continue
self.updater(os.path.join(dirName, filename)) full_filename = os.path.join(dirpath, filename)
def do_loop(self, interval): if self.is_newer_than(full_filename, index_file, True):
to_index.append(full_filename)
if to_index:
self.do_index(to_index)
def run(self):
try: try:
while keep_running: while self.keep_running:
self.check_path() self.check_path()
time.sleep(interval) if not self.interval:
break
time.sleep(self.interval)
except KeyboardInterrupt: # pragma: no cover except KeyboardInterrupt: # pragma: no cover
return return
def start(self, interval): def start(self):
self.ge = gevent.spawn(self.do_loop, interval) self.ge = gevent.spawn(self.run)
def stop(self):
self.interval = 0
self.keep_running = False

View File

@ -33,7 +33,6 @@ It may be used via cmdline to setup and maintain the
directory structure expected by pywb directory structure expected by pywb
""" """
DEF_INDEX_FILE = 'index.cdxj' DEF_INDEX_FILE = 'index.cdxj'
AUTO_INDEX_FILE = 'autoindex.cdxj'
COLL_RX = re.compile('^[\w][-\w]*$') COLL_RX = re.compile('^[\w][-\w]*$')
@ -48,12 +47,12 @@ directory structure expected by pywb
self.colls_dir = os.path.join(os.getcwd(), colls_dir) self.colls_dir = os.path.join(os.getcwd(), colls_dir)
self._set_coll_dirs(coll_name) self.change_collection(coll_name)
if must_exist: if must_exist:
self._assert_coll_exists() self._assert_coll_exists()
def _set_coll_dirs(self, coll_name): def change_collection(self, coll_name):
self.coll_name = coll_name self.coll_name = coll_name
self.curr_coll_dir = os.path.join(self.colls_dir, coll_name) self.curr_coll_dir = os.path.join(self.colls_dir, coll_name)
@ -330,35 +329,6 @@ directory structure expected by pywb
migrate.convert_to_cdxj() migrate.convert_to_cdxj()
def autoindex(self, interval=30.0, do_loop=True):
from pywb.manager.autoindex import CDXAutoIndexer
if self.coll_name:
any_coll = False
path = self.archive_dir
else:
path = self.colls_dir
any_coll = True
def do_index(warc):
if any_coll:
coll_name = warc.split(self.colls_dir + os.path.sep)
coll_name = coll_name[-1].split(os.path.sep)[0]
if coll_name != self.coll_name:
self._set_coll_dirs(coll_name)
print('Auto-Indexing: ' + warc)
self.index_merge([warc], self.AUTO_INDEX_FILE)
print('Done.. Waiting for file updates')
indexer = CDXAutoIndexer(do_index, path)
indexer.start(interval)
#indexer.start_watch()
if do_loop:
indexer.do_loop(interval)
#============================================================================= #=============================================================================
def main(args=None): def main(args=None):
@ -469,17 +439,7 @@ Create manage file based web archive collections
migrate.add_argument('-f', '--force', action='store_true') migrate.add_argument('-f', '--force', action='store_true')
migrate.set_defaults(func=do_migrate) migrate.set_defaults(func=do_migrate)
# Auto Index # Parse
def do_autoindex(r):
m = CollectionsManager(r.coll_name, must_exist=False)
m.autoindex(r.interval, True)
autoindex_help = 'Automatically index any change archive files'
autoindex = subparsers.add_parser('autoindex', help=autoindex_help)
autoindex.add_argument('coll_name', nargs='?', default='')
autoindex.add_argument('--interval', type=float, default=30.0)
autoindex.set_defaults(func=do_autoindex)
r = parser.parse_args(args=args) r = parser.parse_args(args=args)
r.func(r) r.func(r)

View File

@ -58,7 +58,7 @@ class BaseLoader(object):
cdx['recorder_skip'] = '1' cdx['recorder_skip'] = '1'
out_headers['WebAgg-Cdx'] = to_native_str(cdx.to_cdxj().rstrip()) out_headers['WebAgg-Cdx'] = to_native_str(cdx.to_cdxj().rstrip())
out_headers['WebAgg-Source-Coll'] = source out_headers['WebAgg-Source-Coll'] = to_native_str(source)
if not warc_headers: if not warc_headers:
if other_headers: if other_headers:

View File

@ -85,8 +85,8 @@ class FakeRedisTests(object):
# ============================================================================ # ============================================================================
class TempDirTests(object): class TempDirTests(object):
@classmethod @classmethod
def setup_class(cls): def setup_class(cls, *args, **kwargs):
super(TempDirTests, cls).setup_class() super(TempDirTests, cls).setup_class(*args, **kwargs)
cls.root_dir = tempfile.mkdtemp() cls.root_dir = tempfile.mkdtemp()
@classmethod @classmethod

View File

@ -57,6 +57,9 @@ class WarcServer(BaseWarcServer):
self.fixed_routes = self.load_colls() self.fixed_routes = self.load_colls()
self.archive_templ = None
self.indexes_templ = None
for name, route in iteritems(self.fixed_routes): for name, route in iteritems(self.fixed_routes):
self.add_route('/' + name, route) self.add_route('/' + name, route)
@ -82,13 +85,13 @@ class WarcServer(BaseWarcServer):
return return
#indexes_templ = os.path.join('{coll}', 'indexes') + os.path.sep #indexes_templ = os.path.join('{coll}', 'indexes') + os.path.sep
indexes_templ = self.AUTO_DIR_INDEX_PATH.replace('/', os.path.sep) self.indexes_templ = self.AUTO_DIR_INDEX_PATH.replace('/', os.path.sep)
dir_source = CacheDirectoryIndexSource(self.root_dir, indexes_templ) dir_source = CacheDirectoryIndexSource(self.root_dir, self.indexes_templ)
archive_templ = self.AUTO_DIR_ARCHIVE_PATH.replace('/', os.path.sep) self.archive_templ = self.AUTO_DIR_ARCHIVE_PATH.replace('/', os.path.sep)
archive_templ = os.path.join(self.root_dir, archive_templ) self.archive_templ = os.path.join(self.root_dir, self.archive_templ)
handler = DefaultResourceHandler(dir_source, archive_templ) handler = DefaultResourceHandler(dir_source, self.archive_templ)
return handler return handler

View File

@ -3,7 +3,8 @@ from gevent import monkey; monkey.patch_all(thread=False)
import pytest import pytest
import webtest import webtest
from pywb.warcserver.test.testutils import BaseTestClass from pywb.warcserver.test.testutils import BaseTestClass, TempDirTests
from pywb.manager.manager import main, CollectionsManager
from pywb.apps.frontendapp import FrontEndApp from pywb.apps.frontendapp import FrontEndApp
import os import os
@ -24,16 +25,28 @@ class BaseConfigTest(BaseTestClass):
@classmethod @classmethod
def get_test_app(cls, config_file, override=None): def get_test_app(cls, config_file, override=None):
config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file) config_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file)
return webtest.TestApp(FrontEndApp(config_file=config_file, custom_config=override)) app = FrontEndApp(config_file=config_file, custom_config=override)
return app, webtest.TestApp(app)
@classmethod @classmethod
def setup_class(cls, config_file, include_non_frame=True): def setup_class(cls, config_file, include_non_frame=True):
super(BaseConfigTest, cls).setup_class() super(BaseConfigTest, cls).setup_class()
cls.testapp = cls.get_test_app(config_file) cls.app, cls.testapp = cls.get_test_app(config_file)
if include_non_frame: if include_non_frame:
cls.testapp_non_frame = cls.get_test_app(config_file, cls.app_non_frame, cls.testapp_non_frame = cls.get_test_app(config_file,
override={'framed_replay': False}) override={'framed_replay': False})
@classmethod
def teardown_class(cls):
if cls.app.recorder:
cls.app.recorder.writer.close()
if cls.app_non_frame.recorder:
cls.app_non_frame.recorder.writer.close()
super(BaseConfigTest, cls).teardown_class()
def _assert_basic_html(self, resp): def _assert_basic_html(self, resp):
assert resp.status_int == 200 assert resp.status_int == 200
assert resp.content_type == 'text/html' assert resp.content_type == 'text/html'
@ -61,3 +74,21 @@ class BaseConfigTest(BaseTestClass):
return app.head(url.format(fmod), *args, **kwargs) return app.head(url.format(fmod), *args, **kwargs)
#=============================================================================
class CollsDirMixin(TempDirTests):
COLLS_DIR = '_test_colls'
@classmethod
def setup_class(cls, *args, **kwargs):
super(CollsDirMixin, cls).setup_class(*args, **kwargs)
cls.orig_cwd = os.getcwd()
cls.root_dir = os.path.realpath(cls.root_dir)
os.chdir(cls.root_dir)
cls.orig_collections = CollectionsManager.COLLS_DIR
CollectionsManager.COLLS_DIR = cls.COLLS_DIR
@classmethod
def teardown_class(cls):
os.chdir(cls.orig_cwd)
CollectionsManager.COLLS_DIR = cls.orig_collections
super(CollsDirMixin, cls).teardown_class()

View File

@ -0,0 +1,10 @@
debug: true
collections_root: _test_colls
recorder: live
collections:
'live': '$live'

View File

@ -1,4 +1,4 @@
from gevent.monkey import patch_all; patch_all() from .base_config_test import CollsDirMixin
import os import os
import tempfile import tempfile
@ -17,11 +17,10 @@ from pytest import raises
from mock import patch from mock import patch
from pywb import get_test_dir from pywb import get_test_dir
from pywb.warcserver.test.testutils import TempDirTests, BaseTestClass from pywb.warcserver.test.testutils import BaseTestClass
from pywb.manager.manager import main, CollectionsManager from pywb.manager.autoindex import AutoIndexer
from pywb.manager.manager import main
import pywb.manager.autoindex
from pywb.indexer.cdxindexer import main as cdxindexer_main from pywb.indexer.cdxindexer import main as cdxindexer_main
from pywb.warcserver.index.cdxobject import CDXObject from pywb.warcserver.index.cdxobject import CDXObject
@ -34,26 +33,12 @@ ARCHIVE_DIR = 'archive'
INDEX_DIR = 'indexes' INDEX_DIR = 'indexes'
COLLECTIONS = '_test_colls' COLLECTIONS = '_test_colls'
CollectionsManager.COLLS_DIR = COLLECTIONS
INDEX_FILE = 'index.cdxj' INDEX_FILE = 'index.cdxj'
AUTOINDEX_FILE = 'autoindex.cdxj' AUTOINDEX_FILE = 'autoindex.cdxj'
#============================================================================= #=============================================================================
class TestManagedColls(TempDirTests, BaseTestClass): class TestManagedColls(CollsDirMixin, BaseTestClass):
@classmethod
def setup_class(cls):
super(TestManagedColls, cls).setup_class()
cls.orig_cwd = os.getcwd()
cls.root_dir = os.path.realpath(cls.root_dir)
os.chdir(cls.root_dir)
@classmethod
def teardown_class(cls):
os.chdir(cls.orig_cwd)
super(TestManagedColls, cls).teardown_class()
def _check_dirs(self, base, dirlist): def _check_dirs(self, base, dirlist):
for dir_ in dirlist: for dir_ in dirlist:
assert os.path.isdir(os.path.join(base, dir_)) assert os.path.isdir(os.path.join(base, dir_))
@ -82,8 +67,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
colls = os.path.join(self.root_dir, COLLECTIONS) colls = os.path.join(self.root_dir, COLLECTIONS)
os.mkdir(colls) os.mkdir(colls)
pywb.manager.autoindex.keep_running = False wayback(['-a', '-p', '0', '--auto-interval', '0'])
wayback(['-a', '-p', '0'])
def test_create_first_coll(self): def test_create_first_coll(self):
""" Test first collection creation, with all required dirs """ Test first collection creation, with all required dirs
@ -472,8 +456,6 @@ class TestManagedColls(TempDirTests, BaseTestClass):
archive_sub_dir = os.path.join(archive_dir, 'sub') archive_sub_dir = os.path.join(archive_dir, 'sub')
os.makedirs(archive_sub_dir) os.makedirs(archive_sub_dir)
pywb.manager.autoindex.keep_running = True
def do_copy(): def do_copy():
try: try:
time.sleep(1.0) time.sleep(1.0)
@ -481,16 +463,12 @@ class TestManagedColls(TempDirTests, BaseTestClass):
shutil.copy(self._get_sample_warc('example-extra.warc'), archive_sub_dir) shutil.copy(self._get_sample_warc('example-extra.warc'), archive_sub_dir)
time.sleep(1.0) time.sleep(1.0)
finally: finally:
pywb.manager.autoindex.keep_running = False indexer.interval = 0
indexer = AutoIndexer(interval=0.25)
indexer.start()
#thread = threading.Thread(target=do_copy)
#thread.daemon = True
#thread.start()
ge = gevent.spawn(do_copy) ge = gevent.spawn(do_copy)
main(['autoindex', 'auto', '--interval', '0.25'])
#thread.join()
ge.join() ge.join()
index_file = os.path.join(auto_dir, INDEX_DIR, AUTOINDEX_FILE) index_file = os.path.join(auto_dir, INDEX_DIR, AUTOINDEX_FILE)
@ -505,7 +483,8 @@ class TestManagedColls(TempDirTests, BaseTestClass):
mtime = os.path.getmtime(index_file) mtime = os.path.getmtime(index_file)
# Update # Update
pywb.manager.autoindex.keep_running = True indexer.interval = 0.25
indexer.start()
os.remove(index_file) os.remove(index_file)
@ -514,7 +493,7 @@ class TestManagedColls(TempDirTests, BaseTestClass):
#thread.start() #thread.start()
ge = gevent.spawn(do_copy) ge = gevent.spawn(do_copy)
main(['autoindex', 'auto', '--interval', '0.25']) #wayback(['-p', '0', '-a', '--auto-interval', '0.25'])
#thread.join() #thread.join()
ge.join() ge.join()

View File

@ -0,0 +1,69 @@
from .base_config_test import BaseConfigTest, fmod, CollsDirMixin
from pywb.manager.manager import main as manager
from pywb.manager.autoindex import AutoIndexer
import os
import time
# ============================================================================
class TestRecordReplay(CollsDirMixin, BaseConfigTest):
@classmethod
def setup_class(cls):
super(TestRecordReplay, cls).setup_class('config_test_record.yaml')
cls.indexer = AutoIndexer(interval=0.25)
cls.indexer.start()
@classmethod
def teardown_class(cls):
cls.indexer.stop()
super(TestRecordReplay, cls).teardown_class()
def test_init_coll(self):
manager(['init', 'test'])
assert os.path.isdir(os.path.join(self.root_dir, '_test_colls', 'test', 'archive'))
manager(['init', 'test2'])
assert os.path.isdir(os.path.join(self.root_dir, '_test_colls', 'test2', 'archive'))
def test_record_1(self, fmod):
fmod_slash = fmod + '/' if fmod else ''
res = self.get('/test/record/mp_/http://httpbin.org/get?A=B', fmod_slash)
assert '"A": "B"' in res.text
def test_replay_1(self, fmod):
time.sleep(0.5)
fmod_slash = fmod + '/' if fmod else ''
res = self.get('/test/mp_/http://httpbin.org/get?A=B', fmod_slash)
assert '"A": "B"' in res.text
def test_record_2(self, fmod):
fmod_slash = fmod + '/' if fmod else ''
res = self.get('/test2/record/{0}http://httpbin.org/get?C=D', fmod_slash)
assert '"C": "D"' in res.text
def test_replay_2(self, fmod):
time.sleep(0.5)
fmod_slash = fmod + '/' if fmod else ''
res = self.get('/test2/{0}http://httpbin.org/get?C=D', fmod_slash)
assert '"C": "D"' in res.text
def test_record_again_1(self, fmod):
fmod_slash = fmod + '/' if fmod else ''
res = self.get('/test/record/{0}http://httpbin.org/get?C=D', fmod_slash)
assert '"C": "D"' in res.text
def test_replay_again_1(self, fmod):
time.sleep(0.5)
fmod_slash = fmod + '/' if fmod else ''
res = self.get('/test/{0}http://httpbin.org/get?C=D', fmod_slash)
assert '"C": "D"' in res.text
# two warcs, for framed and non-framed capture
assert len(os.listdir(os.path.join(self.root_dir, '_test_colls', 'test', 'archive'))) == 2
assert len(os.listdir(os.path.join(self.root_dir, '_test_colls', 'test', 'indexes'))) == 1