mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
move sample/test data to ./sample_archive/warcs and ./sample_archive/cdx
pywb_init now driven by config.yaml! (#14) Not yet supporting customized handlers, views, etc...
This commit is contained in:
parent
35f7cb0477
commit
43a46b373d
18
config.yaml
Normal file
18
config.yaml
Normal file
@ -0,0 +1,18 @@
|
||||
routes:
|
||||
pywb:
|
||||
index_paths:
|
||||
- ./sample_archive/cdx/
|
||||
|
||||
archive_paths:
|
||||
- ./sample_archive/warcs/
|
||||
|
||||
head_insert_template: ./ui/head_insert.html
|
||||
|
||||
html_query_template: ./ui/query.html
|
||||
|
||||
|
||||
hostpaths: http://localhost:8080/
|
||||
|
||||
|
||||
|
||||
|
@ -380,7 +380,7 @@ if __name__ == "__main__" or utils.enable_doctests():
|
||||
testloader = ArchiveLoader()
|
||||
|
||||
def load_test_archive(test_file, offset, length):
|
||||
path = os.path.dirname(os.path.realpath(__file__)) + '/../test/' + test_file
|
||||
path = utils.test_data_dir() + 'warcs/' + test_file
|
||||
|
||||
archive = testloader.load(path, offset, length)
|
||||
pprint.pprint((archive.type, archive.rec_headers, archive.status_headers))
|
||||
|
@ -127,7 +127,7 @@ import utils
|
||||
if __name__ == "__main__" or utils.enable_doctests():
|
||||
|
||||
def create_test_cdx(test_file):
|
||||
path = os.path.dirname(os.path.realpath(__file__)) + '/../test/' + test_file
|
||||
path = utils.test_data_dir() + 'cdx/' + test_file
|
||||
return FileReader(path)
|
||||
|
||||
test_cdx = create_test_cdx('iana.cdx')
|
||||
|
@ -332,7 +332,7 @@ if __name__ == "__main__" or utils.enable_doctests():
|
||||
import os
|
||||
import sys
|
||||
|
||||
test_dir = os.path.dirname(os.path.realpath(__file__)) + '/../test/'
|
||||
test_dir = utils.test_data_dir() + 'cdx/'
|
||||
|
||||
def test_cdx(key, match_func = binsearch.iter_exact, sources = [test_dir + 'iana.cdx'], **kwparams):
|
||||
for x in cdx_serve(key, kwparams, sources, match_func):
|
||||
|
@ -251,7 +251,7 @@ import utils
|
||||
if __name__ == "__main__" or utils.enable_doctests():
|
||||
from pprint import pprint
|
||||
|
||||
test_dir = os.path.dirname(os.path.realpath(__file__)) + '/../test/'
|
||||
test_dir = utils.test_data_dir() + 'cdx/'
|
||||
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
|
@ -7,7 +7,9 @@ import replay_resolvers
|
||||
import cdxserve
|
||||
from archivalrouter import ArchivalRequestRouter, Route
|
||||
import os
|
||||
|
||||
import yaml
|
||||
import utils
|
||||
import logging
|
||||
|
||||
## ===========
|
||||
default_head_insert = """
|
||||
@ -18,9 +20,10 @@ default_head_insert = """
|
||||
<!-- End WB Insert -->
|
||||
"""
|
||||
|
||||
def pywb_config():
|
||||
def pywb_config2():
|
||||
# Current test dir
|
||||
test_dir = os.path.dirname(os.path.realpath(__file__)) + '/../test/'
|
||||
#test_dir = utils.test_data_dir()
|
||||
test_dir = './sample_archive/'
|
||||
|
||||
# Standard loader which supports WARC/ARC files
|
||||
aloader = archiveloader.ArchiveLoader()
|
||||
@ -28,19 +31,19 @@ def pywb_config():
|
||||
# Source for cdx source
|
||||
#query_h = query.QueryHandler(indexreader.RemoteCDXServer('http://cdx.example.com/cdx'))
|
||||
#test_cdx = [test_dir + 'iana.cdx', test_dir + 'example.cdx', test_dir + 'dupes.cdx']
|
||||
indexs = indexreader.LocalCDXServer([test_dir])
|
||||
indexs = indexreader.LocalCDXServer([test_dir + 'cdx/'])
|
||||
|
||||
# Loads warcs specified in cdx from these locations
|
||||
prefixes = [replay_resolvers.PrefixResolver(test_dir)]
|
||||
prefixes = [replay_resolvers.PrefixResolver(test_dir + 'warcs/')]
|
||||
|
||||
# Jinja2 head insert
|
||||
head_insert = views.J2HeadInsertView('./ui/', 'head_insert.html')
|
||||
head_insert = views.J2HeadInsertView('./ui/head_insert.html')
|
||||
|
||||
# Create rewriting replay handler to rewrite records
|
||||
replayer = replay_views.RewritingReplayView(resolvers = prefixes, archiveloader = aloader, head_insert = head_insert, buffer_response = True)
|
||||
|
||||
# Create Jinja2 based html query view
|
||||
html_view = views.J2QueryView('./ui/', 'query.html')
|
||||
html_view = views.J2QueryView('./ui/query.html')
|
||||
|
||||
# WB handler which uses the index reader, replayer, and html_view
|
||||
wb_handler = handlers.WBHandler(indexs, replayer, html_view)
|
||||
@ -61,3 +64,97 @@ def pywb_config():
|
||||
hostpaths = ['http://localhost:8080/'])
|
||||
|
||||
|
||||
def pywb_config(filename = './pywb/config.yaml'):
|
||||
config = yaml.load(open(filename))
|
||||
|
||||
routes = map(yaml_parse_route, config['routes'].iteritems())
|
||||
|
||||
hostpaths = config.get('hostpaths', ['http://localhost:8080/'])
|
||||
|
||||
return ArchivalRequestRouter(routes, hostpaths)
|
||||
|
||||
|
||||
|
||||
def yaml_parse_route((route_name, handler_def)):
|
||||
return Route(route_name, yaml_parse_handler(handler_def))
|
||||
|
||||
|
||||
def yaml_parse_index_loader(index_config):
|
||||
# support mixed cdx streams and remote servers?
|
||||
# for now, list implies local sources
|
||||
if isinstance(index_config, list):
|
||||
return indexreader.LocalCDXServer(index_config)
|
||||
|
||||
if isinstance(index_config, str):
|
||||
uri = index_config
|
||||
cookie = None
|
||||
elif isinstance(index_config, dict):
|
||||
uri = index_config['url']
|
||||
cookie = index_config['cookie']
|
||||
else:
|
||||
raise Exception('Invalid Index Reader Config: ' + str(index_config))
|
||||
|
||||
# Check for remote cdx server
|
||||
if (uri.startswith('http://') or uri.startswith('https://')) and not uri.endswith('.cdx'):
|
||||
return indexreader.RemoteCDXServer(uri, cookie = cookie)
|
||||
else:
|
||||
return indexreader.LocalCDXServer([uri])
|
||||
|
||||
|
||||
def yaml_parse_archive_resolvers(archive_paths):
|
||||
|
||||
#TODO: more options (remote files, contains param, etc..)
|
||||
def make_resolver(path):
|
||||
if path.startswith('redis://'):
|
||||
return replay_resolvers.RedisResolver(path)
|
||||
elif os.path.isfile(path):
|
||||
return replay_resolvers.PathIndexResolver(path)
|
||||
else:
|
||||
logging.info('Adding Archive Source: ' + path)
|
||||
return replay_resolvers.PrefixResolver(path)
|
||||
|
||||
return map(make_resolver, archive_paths)
|
||||
|
||||
def yaml_parse_head_insert(handler_def):
|
||||
# First, try a template file
|
||||
head_insert_file = handler_def.get('head_insert_template')
|
||||
if head_insert_file:
|
||||
logging.info('Adding Head-Insert Template: ' + head_insert_file)
|
||||
return views.J2HeadInsertView(head_insert_file)
|
||||
|
||||
# Then, static head_insert text
|
||||
head_insert_text = handler_def.get('head_insert_text', '')
|
||||
logging.info('Adding Head-Insert Text: ' + head_insert_text)
|
||||
return head_insert_text
|
||||
|
||||
|
||||
def yaml_parse_handler(handler_def):
|
||||
archive_loader = archiveloader.ArchiveLoader()
|
||||
|
||||
index_loader = yaml_parse_index_loader(handler_def['index_paths'])
|
||||
|
||||
archive_resolvers = yaml_parse_archive_resolvers(handler_def['archive_paths'])
|
||||
|
||||
head_insert = yaml_parse_head_insert(handler_def)
|
||||
|
||||
replayer = replay_views.RewritingReplayView(resolvers = archive_resolvers,
|
||||
archiveloader = archive_loader,
|
||||
head_insert = head_insert,
|
||||
buffer_response = handler_def.get('buffer_response', False))
|
||||
|
||||
html_view_file = handler_def.get('html_query_template')
|
||||
if html_view_file:
|
||||
logging.info('Adding HTML Calendar Template: ' + html_view_file)
|
||||
else:
|
||||
logging.info('No HTML Calendar View Present')
|
||||
|
||||
html_view = views.J2QueryView(html_view_file) if html_view_file else None
|
||||
|
||||
wb_handler = handlers.WBHandler(index_loader, replayer, html_view)
|
||||
return wb_handler
|
||||
|
||||
if __name__ == "__main__" or utils.enable_doctests():
|
||||
pass
|
||||
#print pywb_config('config.yaml')
|
||||
|
||||
|
||||
|
@ -203,6 +203,10 @@ is_in_nose = sys.argv[0].endswith('nosetests')
|
||||
def enable_doctests():
|
||||
return is_in_nose
|
||||
|
||||
def test_data_dir():
|
||||
import os
|
||||
return os.path.dirname(os.path.realpath(__file__)) + '/../sample_archive/'
|
||||
|
||||
#============================================
|
||||
|
||||
if __name__ == "__main__" or enable_doctests():
|
||||
|
@ -4,6 +4,7 @@ import wbrequestresponse
|
||||
import wbexceptions
|
||||
import time
|
||||
|
||||
from os import path
|
||||
from itertools import imap
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
@ -16,7 +17,9 @@ class TextQueryView:
|
||||
|
||||
#=================================================================
|
||||
class J2QueryView:
|
||||
def __init__(self, template_dir, template_file, buffer_index = True):
|
||||
def __init__(self, filename, buffer_index = True):
|
||||
template_dir, template_file = path.split(filename)
|
||||
|
||||
self.template_file = template_file
|
||||
self.buffer_index = buffer_index
|
||||
|
||||
@ -41,7 +44,8 @@ class J2QueryView:
|
||||
# Render the head insert (eg. banner)
|
||||
#=================================================================
|
||||
class J2HeadInsertView:
|
||||
def __init__(self, template_dir, template_file, buffer_index = True):
|
||||
def __init__(self, filename, buffer_index = True):
|
||||
template_dir, template_file = path.split(filename)
|
||||
self.template_file = template_file
|
||||
|
||||
self.jinja_env = make_jinja_env(template_dir)
|
||||
|
@ -77,6 +77,8 @@ def handle_exception(env, exc):
|
||||
|
||||
|
||||
#=================================================================
|
||||
DEFAULT_CONFIG_FILE = 'config.yaml'
|
||||
|
||||
def main():
|
||||
try:
|
||||
# Attempt to load real settings from globalwb module
|
||||
@ -91,7 +93,9 @@ def main():
|
||||
|
||||
module = importlib.import_module(config_name)
|
||||
|
||||
app = create_wb_app(module.pywb_config())
|
||||
config_file = DEFAULT_CONFIG_FILE
|
||||
|
||||
app = create_wb_app(module.pywb_config(config_file))
|
||||
logging.info('')
|
||||
logging.info('*** pywb inited with settings from {0}.pywb_config()!\n'.format(config_name))
|
||||
return app
|
||||
@ -105,6 +109,6 @@ def main():
|
||||
if __name__ == "__main__" or utils.enable_doctests():
|
||||
import pywb_init
|
||||
# Test sample settings
|
||||
application = create_wb_app(pywb_init.pywb_config())
|
||||
application = create_wb_app(pywb_init.pywb_config('../' + DEFAULT_CONFIG_FILE))
|
||||
else:
|
||||
application = main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user