1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

move sample/test data to ./sample_archive/warcs and ./sample_archive/cdx

pywb_init now driven by config.yaml! (#14)

Not yet supporting customized handlers, views, etc...
This commit is contained in:
Ilya Kreymer 2014-01-28 22:03:01 -08:00
parent 35f7cb0477
commit 43a46b373d
15 changed files with 142 additions and 15 deletions

18
config.yaml Normal file
View File

@ -0,0 +1,18 @@
routes:
pywb:
index_paths:
- ./sample_archive/cdx/
archive_paths:
- ./sample_archive/warcs/
head_insert_template: ./ui/head_insert.html
html_query_template: ./ui/query.html
hostpaths: http://localhost:8080/

View File

@ -380,7 +380,7 @@ if __name__ == "__main__" or utils.enable_doctests():
testloader = ArchiveLoader() testloader = ArchiveLoader()
def load_test_archive(test_file, offset, length): def load_test_archive(test_file, offset, length):
path = os.path.dirname(os.path.realpath(__file__)) + '/../test/' + test_file path = utils.test_data_dir() + 'warcs/' + test_file
archive = testloader.load(path, offset, length) archive = testloader.load(path, offset, length)
pprint.pprint((archive.type, archive.rec_headers, archive.status_headers)) pprint.pprint((archive.type, archive.rec_headers, archive.status_headers))

View File

@ -127,7 +127,7 @@ import utils
if __name__ == "__main__" or utils.enable_doctests(): if __name__ == "__main__" or utils.enable_doctests():
def create_test_cdx(test_file): def create_test_cdx(test_file):
path = os.path.dirname(os.path.realpath(__file__)) + '/../test/' + test_file path = utils.test_data_dir() + 'cdx/' + test_file
return FileReader(path) return FileReader(path)
test_cdx = create_test_cdx('iana.cdx') test_cdx = create_test_cdx('iana.cdx')

View File

@ -332,7 +332,7 @@ if __name__ == "__main__" or utils.enable_doctests():
import os import os
import sys import sys
test_dir = os.path.dirname(os.path.realpath(__file__)) + '/../test/' test_dir = utils.test_data_dir() + 'cdx/'
def test_cdx(key, match_func = binsearch.iter_exact, sources = [test_dir + 'iana.cdx'], **kwparams): def test_cdx(key, match_func = binsearch.iter_exact, sources = [test_dir + 'iana.cdx'], **kwparams):
for x in cdx_serve(key, kwparams, sources, match_func): for x in cdx_serve(key, kwparams, sources, match_func):

View File

@ -251,7 +251,7 @@ import utils
if __name__ == "__main__" or utils.enable_doctests(): if __name__ == "__main__" or utils.enable_doctests():
from pprint import pprint from pprint import pprint
test_dir = os.path.dirname(os.path.realpath(__file__)) + '/../test/' test_dir = utils.test_data_dir() + 'cdx/'
import doctest import doctest
doctest.testmod() doctest.testmod()

View File

@ -7,7 +7,9 @@ import replay_resolvers
import cdxserve import cdxserve
from archivalrouter import ArchivalRequestRouter, Route from archivalrouter import ArchivalRequestRouter, Route
import os import os
import yaml
import utils
import logging
## =========== ## ===========
default_head_insert = """ default_head_insert = """
@ -18,9 +20,10 @@ default_head_insert = """
<!-- End WB Insert --> <!-- End WB Insert -->
""" """
def pywb_config(): def pywb_config2():
# Current test dir # Current test dir
test_dir = os.path.dirname(os.path.realpath(__file__)) + '/../test/' #test_dir = utils.test_data_dir()
test_dir = './sample_archive/'
# Standard loader which supports WARC/ARC files # Standard loader which supports WARC/ARC files
aloader = archiveloader.ArchiveLoader() aloader = archiveloader.ArchiveLoader()
@ -28,19 +31,19 @@ def pywb_config():
# Source for cdx source # Source for cdx source
#query_h = query.QueryHandler(indexreader.RemoteCDXServer('http://cdx.example.com/cdx')) #query_h = query.QueryHandler(indexreader.RemoteCDXServer('http://cdx.example.com/cdx'))
#test_cdx = [test_dir + 'iana.cdx', test_dir + 'example.cdx', test_dir + 'dupes.cdx'] #test_cdx = [test_dir + 'iana.cdx', test_dir + 'example.cdx', test_dir + 'dupes.cdx']
indexs = indexreader.LocalCDXServer([test_dir]) indexs = indexreader.LocalCDXServer([test_dir + 'cdx/'])
# Loads warcs specified in cdx from these locations # Loads warcs specified in cdx from these locations
prefixes = [replay_resolvers.PrefixResolver(test_dir)] prefixes = [replay_resolvers.PrefixResolver(test_dir + 'warcs/')]
# Jinja2 head insert # Jinja2 head insert
head_insert = views.J2HeadInsertView('./ui/', 'head_insert.html') head_insert = views.J2HeadInsertView('./ui/head_insert.html')
# Create rewriting replay handler to rewrite records # Create rewriting replay handler to rewrite records
replayer = replay_views.RewritingReplayView(resolvers = prefixes, archiveloader = aloader, head_insert = head_insert, buffer_response = True) replayer = replay_views.RewritingReplayView(resolvers = prefixes, archiveloader = aloader, head_insert = head_insert, buffer_response = True)
# Create Jinja2 based html query view # Create Jinja2 based html query view
html_view = views.J2QueryView('./ui/', 'query.html') html_view = views.J2QueryView('./ui/query.html')
# WB handler which uses the index reader, replayer, and html_view # WB handler which uses the index reader, replayer, and html_view
wb_handler = handlers.WBHandler(indexs, replayer, html_view) wb_handler = handlers.WBHandler(indexs, replayer, html_view)
@ -61,3 +64,97 @@ def pywb_config():
hostpaths = ['http://localhost:8080/']) hostpaths = ['http://localhost:8080/'])
def pywb_config(filename = './pywb/config.yaml'):
config = yaml.load(open(filename))
routes = map(yaml_parse_route, config['routes'].iteritems())
hostpaths = config.get('hostpaths', ['http://localhost:8080/'])
return ArchivalRequestRouter(routes, hostpaths)
def yaml_parse_route((route_name, handler_def)):
return Route(route_name, yaml_parse_handler(handler_def))
def yaml_parse_index_loader(index_config):
# support mixed cdx streams and remote servers?
# for now, list implies local sources
if isinstance(index_config, list):
return indexreader.LocalCDXServer(index_config)
if isinstance(index_config, str):
uri = index_config
cookie = None
elif isinstance(index_config, dict):
uri = index_config['url']
cookie = index_config['cookie']
else:
raise Exception('Invalid Index Reader Config: ' + str(index_config))
# Check for remote cdx server
if (uri.startswith('http://') or uri.startswith('https://')) and not uri.endswith('.cdx'):
return indexreader.RemoteCDXServer(uri, cookie = cookie)
else:
return indexreader.LocalCDXServer([uri])
def yaml_parse_archive_resolvers(archive_paths):
#TODO: more options (remote files, contains param, etc..)
def make_resolver(path):
if path.startswith('redis://'):
return replay_resolvers.RedisResolver(path)
elif os.path.isfile(path):
return replay_resolvers.PathIndexResolver(path)
else:
logging.info('Adding Archive Source: ' + path)
return replay_resolvers.PrefixResolver(path)
return map(make_resolver, archive_paths)
def yaml_parse_head_insert(handler_def):
# First, try a template file
head_insert_file = handler_def.get('head_insert_template')
if head_insert_file:
logging.info('Adding Head-Insert Template: ' + head_insert_file)
return views.J2HeadInsertView(head_insert_file)
# Then, static head_insert text
head_insert_text = handler_def.get('head_insert_text', '')
logging.info('Adding Head-Insert Text: ' + head_insert_text)
return head_insert_text
def yaml_parse_handler(handler_def):
archive_loader = archiveloader.ArchiveLoader()
index_loader = yaml_parse_index_loader(handler_def['index_paths'])
archive_resolvers = yaml_parse_archive_resolvers(handler_def['archive_paths'])
head_insert = yaml_parse_head_insert(handler_def)
replayer = replay_views.RewritingReplayView(resolvers = archive_resolvers,
archiveloader = archive_loader,
head_insert = head_insert,
buffer_response = handler_def.get('buffer_response', False))
html_view_file = handler_def.get('html_query_template')
if html_view_file:
logging.info('Adding HTML Calendar Template: ' + html_view_file)
else:
logging.info('No HTML Calendar View Present')
html_view = views.J2QueryView(html_view_file) if html_view_file else None
wb_handler = handlers.WBHandler(index_loader, replayer, html_view)
return wb_handler
if __name__ == "__main__" or utils.enable_doctests():
pass
#print pywb_config('config.yaml')

View File

@ -203,6 +203,10 @@ is_in_nose = sys.argv[0].endswith('nosetests')
def enable_doctests(): def enable_doctests():
return is_in_nose return is_in_nose
def test_data_dir():
import os
return os.path.dirname(os.path.realpath(__file__)) + '/../sample_archive/'
#============================================ #============================================
if __name__ == "__main__" or enable_doctests(): if __name__ == "__main__" or enable_doctests():

View File

@ -4,6 +4,7 @@ import wbrequestresponse
import wbexceptions import wbexceptions
import time import time
from os import path
from itertools import imap from itertools import imap
from jinja2 import Environment, FileSystemLoader from jinja2 import Environment, FileSystemLoader
@ -16,7 +17,9 @@ class TextQueryView:
#================================================================= #=================================================================
class J2QueryView: class J2QueryView:
def __init__(self, template_dir, template_file, buffer_index = True): def __init__(self, filename, buffer_index = True):
template_dir, template_file = path.split(filename)
self.template_file = template_file self.template_file = template_file
self.buffer_index = buffer_index self.buffer_index = buffer_index
@ -41,7 +44,8 @@ class J2QueryView:
# Render the head insert (eg. banner) # Render the head insert (eg. banner)
#================================================================= #=================================================================
class J2HeadInsertView: class J2HeadInsertView:
def __init__(self, template_dir, template_file, buffer_index = True): def __init__(self, filename, buffer_index = True):
template_dir, template_file = path.split(filename)
self.template_file = template_file self.template_file = template_file
self.jinja_env = make_jinja_env(template_dir) self.jinja_env = make_jinja_env(template_dir)

View File

@ -77,6 +77,8 @@ def handle_exception(env, exc):
#================================================================= #=================================================================
DEFAULT_CONFIG_FILE = 'config.yaml'
def main(): def main():
try: try:
# Attempt to load real settings from globalwb module # Attempt to load real settings from globalwb module
@ -91,7 +93,9 @@ def main():
module = importlib.import_module(config_name) module = importlib.import_module(config_name)
app = create_wb_app(module.pywb_config()) config_file = DEFAULT_CONFIG_FILE
app = create_wb_app(module.pywb_config(config_file))
logging.info('') logging.info('')
logging.info('*** pywb inited with settings from {0}.pywb_config()!\n'.format(config_name)) logging.info('*** pywb inited with settings from {0}.pywb_config()!\n'.format(config_name))
return app return app
@ -105,6 +109,6 @@ def main():
if __name__ == "__main__" or utils.enable_doctests(): if __name__ == "__main__" or utils.enable_doctests():
import pywb_init import pywb_init
# Test sample settings # Test sample settings
application = create_wb_app(pywb_init.pywb_config()) application = create_wb_app(pywb_init.pywb_config('../' + DEFAULT_CONFIG_FILE))
else: else:
application = main() application = main()