1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

move sample/test data to ./sample_archive/warcs and ./sample_archive/cdx

pywb_init now driven by config.yaml! (#14)

Not yet supporting customized handlers, views, etc...
This commit is contained in:
Ilya Kreymer 2014-01-28 22:03:01 -08:00
parent 35f7cb0477
commit 43a46b373d
15 changed files with 142 additions and 15 deletions

18
config.yaml Normal file
View File

@ -0,0 +1,18 @@
routes:
pywb:
index_paths:
- ./sample_archive/cdx/
archive_paths:
- ./sample_archive/warcs/
head_insert_template: ./ui/head_insert.html
html_query_template: ./ui/query.html
hostpaths: http://localhost:8080/

View File

@ -380,7 +380,7 @@ if __name__ == "__main__" or utils.enable_doctests():
testloader = ArchiveLoader()
def load_test_archive(test_file, offset, length):
path = os.path.dirname(os.path.realpath(__file__)) + '/../test/' + test_file
path = utils.test_data_dir() + 'warcs/' + test_file
archive = testloader.load(path, offset, length)
pprint.pprint((archive.type, archive.rec_headers, archive.status_headers))

View File

@ -127,7 +127,7 @@ import utils
if __name__ == "__main__" or utils.enable_doctests():
def create_test_cdx(test_file):
path = os.path.dirname(os.path.realpath(__file__)) + '/../test/' + test_file
path = utils.test_data_dir() + 'cdx/' + test_file
return FileReader(path)
test_cdx = create_test_cdx('iana.cdx')

View File

@ -332,7 +332,7 @@ if __name__ == "__main__" or utils.enable_doctests():
import os
import sys
test_dir = os.path.dirname(os.path.realpath(__file__)) + '/../test/'
test_dir = utils.test_data_dir() + 'cdx/'
def test_cdx(key, match_func = binsearch.iter_exact, sources = [test_dir + 'iana.cdx'], **kwparams):
for x in cdx_serve(key, kwparams, sources, match_func):

View File

@ -251,7 +251,7 @@ import utils
if __name__ == "__main__" or utils.enable_doctests():
from pprint import pprint
test_dir = os.path.dirname(os.path.realpath(__file__)) + '/../test/'
test_dir = utils.test_data_dir() + 'cdx/'
import doctest
doctest.testmod()

View File

@ -7,7 +7,9 @@ import replay_resolvers
import cdxserve
from archivalrouter import ArchivalRequestRouter, Route
import os
import yaml
import utils
import logging
## ===========
default_head_insert = """
@ -18,9 +20,10 @@ default_head_insert = """
<!-- End WB Insert -->
"""
def pywb_config():
def pywb_config2():
# Current test dir
test_dir = os.path.dirname(os.path.realpath(__file__)) + '/../test/'
#test_dir = utils.test_data_dir()
test_dir = './sample_archive/'
# Standard loader which supports WARC/ARC files
aloader = archiveloader.ArchiveLoader()
@ -28,19 +31,19 @@ def pywb_config():
# Source for cdx source
#query_h = query.QueryHandler(indexreader.RemoteCDXServer('http://cdx.example.com/cdx'))
#test_cdx = [test_dir + 'iana.cdx', test_dir + 'example.cdx', test_dir + 'dupes.cdx']
indexs = indexreader.LocalCDXServer([test_dir])
indexs = indexreader.LocalCDXServer([test_dir + 'cdx/'])
# Loads warcs specified in cdx from these locations
prefixes = [replay_resolvers.PrefixResolver(test_dir)]
prefixes = [replay_resolvers.PrefixResolver(test_dir + 'warcs/')]
# Jinja2 head insert
head_insert = views.J2HeadInsertView('./ui/', 'head_insert.html')
head_insert = views.J2HeadInsertView('./ui/head_insert.html')
# Create rewriting replay handler to rewrite records
replayer = replay_views.RewritingReplayView(resolvers = prefixes, archiveloader = aloader, head_insert = head_insert, buffer_response = True)
# Create Jinja2 based html query view
html_view = views.J2QueryView('./ui/', 'query.html')
html_view = views.J2QueryView('./ui/query.html')
# WB handler which uses the index reader, replayer, and html_view
wb_handler = handlers.WBHandler(indexs, replayer, html_view)
@ -61,3 +64,97 @@ def pywb_config():
hostpaths = ['http://localhost:8080/'])
def pywb_config(filename = './pywb/config.yaml'):
config = yaml.load(open(filename))
routes = map(yaml_parse_route, config['routes'].iteritems())
hostpaths = config.get('hostpaths', ['http://localhost:8080/'])
return ArchivalRequestRouter(routes, hostpaths)
def yaml_parse_route((route_name, handler_def)):
return Route(route_name, yaml_parse_handler(handler_def))
def yaml_parse_index_loader(index_config):
# support mixed cdx streams and remote servers?
# for now, list implies local sources
if isinstance(index_config, list):
return indexreader.LocalCDXServer(index_config)
if isinstance(index_config, str):
uri = index_config
cookie = None
elif isinstance(index_config, dict):
uri = index_config['url']
cookie = index_config['cookie']
else:
raise Exception('Invalid Index Reader Config: ' + str(index_config))
# Check for remote cdx server
if (uri.startswith('http://') or uri.startswith('https://')) and not uri.endswith('.cdx'):
return indexreader.RemoteCDXServer(uri, cookie = cookie)
else:
return indexreader.LocalCDXServer([uri])
def yaml_parse_archive_resolvers(archive_paths):
#TODO: more options (remote files, contains param, etc..)
def make_resolver(path):
if path.startswith('redis://'):
return replay_resolvers.RedisResolver(path)
elif os.path.isfile(path):
return replay_resolvers.PathIndexResolver(path)
else:
logging.info('Adding Archive Source: ' + path)
return replay_resolvers.PrefixResolver(path)
return map(make_resolver, archive_paths)
def yaml_parse_head_insert(handler_def):
# First, try a template file
head_insert_file = handler_def.get('head_insert_template')
if head_insert_file:
logging.info('Adding Head-Insert Template: ' + head_insert_file)
return views.J2HeadInsertView(head_insert_file)
# Then, static head_insert text
head_insert_text = handler_def.get('head_insert_text', '')
logging.info('Adding Head-Insert Text: ' + head_insert_text)
return head_insert_text
def yaml_parse_handler(handler_def):
archive_loader = archiveloader.ArchiveLoader()
index_loader = yaml_parse_index_loader(handler_def['index_paths'])
archive_resolvers = yaml_parse_archive_resolvers(handler_def['archive_paths'])
head_insert = yaml_parse_head_insert(handler_def)
replayer = replay_views.RewritingReplayView(resolvers = archive_resolvers,
archiveloader = archive_loader,
head_insert = head_insert,
buffer_response = handler_def.get('buffer_response', False))
html_view_file = handler_def.get('html_query_template')
if html_view_file:
logging.info('Adding HTML Calendar Template: ' + html_view_file)
else:
logging.info('No HTML Calendar View Present')
html_view = views.J2QueryView(html_view_file) if html_view_file else None
wb_handler = handlers.WBHandler(index_loader, replayer, html_view)
return wb_handler
if __name__ == "__main__" or utils.enable_doctests():
pass
#print pywb_config('config.yaml')

View File

@ -203,6 +203,10 @@ is_in_nose = sys.argv[0].endswith('nosetests')
def enable_doctests():
return is_in_nose
def test_data_dir():
import os
return os.path.dirname(os.path.realpath(__file__)) + '/../sample_archive/'
#============================================
if __name__ == "__main__" or enable_doctests():

View File

@ -4,6 +4,7 @@ import wbrequestresponse
import wbexceptions
import time
from os import path
from itertools import imap
from jinja2 import Environment, FileSystemLoader
@ -16,7 +17,9 @@ class TextQueryView:
#=================================================================
class J2QueryView:
def __init__(self, template_dir, template_file, buffer_index = True):
def __init__(self, filename, buffer_index = True):
template_dir, template_file = path.split(filename)
self.template_file = template_file
self.buffer_index = buffer_index
@ -41,7 +44,8 @@ class J2QueryView:
# Render the head insert (eg. banner)
#=================================================================
class J2HeadInsertView:
def __init__(self, template_dir, template_file, buffer_index = True):
def __init__(self, filename, buffer_index = True):
template_dir, template_file = path.split(filename)
self.template_file = template_file
self.jinja_env = make_jinja_env(template_dir)

View File

@ -77,6 +77,8 @@ def handle_exception(env, exc):
#=================================================================
DEFAULT_CONFIG_FILE = 'config.yaml'
def main():
try:
# Attempt to load real settings from globalwb module
@ -91,7 +93,9 @@ def main():
module = importlib.import_module(config_name)
app = create_wb_app(module.pywb_config())
config_file = DEFAULT_CONFIG_FILE
app = create_wb_app(module.pywb_config(config_file))
logging.info('')
logging.info('*** pywb inited with settings from {0}.pywb_config()!\n'.format(config_name))
return app
@ -105,6 +109,6 @@ def main():
if __name__ == "__main__" or utils.enable_doctests():
import pywb_init
# Test sample settings
application = create_wb_app(pywb_init.pywb_config())
application = create_wb_app(pywb_init.pywb_config('../' + DEFAULT_CONFIG_FILE))
else:
application = main()