mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
move sample/test data to ./sample_archive/warcs and ./sample_archive/cdx
pywb_init now driven by config.yaml! (#14) Not yet supporting customized handlers, views, etc...
This commit is contained in:
parent
35f7cb0477
commit
43a46b373d
18
config.yaml
Normal file
18
config.yaml
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
routes:
|
||||||
|
pywb:
|
||||||
|
index_paths:
|
||||||
|
- ./sample_archive/cdx/
|
||||||
|
|
||||||
|
archive_paths:
|
||||||
|
- ./sample_archive/warcs/
|
||||||
|
|
||||||
|
head_insert_template: ./ui/head_insert.html
|
||||||
|
|
||||||
|
html_query_template: ./ui/query.html
|
||||||
|
|
||||||
|
|
||||||
|
hostpaths: http://localhost:8080/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -380,7 +380,7 @@ if __name__ == "__main__" or utils.enable_doctests():
|
|||||||
testloader = ArchiveLoader()
|
testloader = ArchiveLoader()
|
||||||
|
|
||||||
def load_test_archive(test_file, offset, length):
|
def load_test_archive(test_file, offset, length):
|
||||||
path = os.path.dirname(os.path.realpath(__file__)) + '/../test/' + test_file
|
path = utils.test_data_dir() + 'warcs/' + test_file
|
||||||
|
|
||||||
archive = testloader.load(path, offset, length)
|
archive = testloader.load(path, offset, length)
|
||||||
pprint.pprint((archive.type, archive.rec_headers, archive.status_headers))
|
pprint.pprint((archive.type, archive.rec_headers, archive.status_headers))
|
||||||
|
@ -127,7 +127,7 @@ import utils
|
|||||||
if __name__ == "__main__" or utils.enable_doctests():
|
if __name__ == "__main__" or utils.enable_doctests():
|
||||||
|
|
||||||
def create_test_cdx(test_file):
|
def create_test_cdx(test_file):
|
||||||
path = os.path.dirname(os.path.realpath(__file__)) + '/../test/' + test_file
|
path = utils.test_data_dir() + 'cdx/' + test_file
|
||||||
return FileReader(path)
|
return FileReader(path)
|
||||||
|
|
||||||
test_cdx = create_test_cdx('iana.cdx')
|
test_cdx = create_test_cdx('iana.cdx')
|
||||||
|
@ -332,7 +332,7 @@ if __name__ == "__main__" or utils.enable_doctests():
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
test_dir = os.path.dirname(os.path.realpath(__file__)) + '/../test/'
|
test_dir = utils.test_data_dir() + 'cdx/'
|
||||||
|
|
||||||
def test_cdx(key, match_func = binsearch.iter_exact, sources = [test_dir + 'iana.cdx'], **kwparams):
|
def test_cdx(key, match_func = binsearch.iter_exact, sources = [test_dir + 'iana.cdx'], **kwparams):
|
||||||
for x in cdx_serve(key, kwparams, sources, match_func):
|
for x in cdx_serve(key, kwparams, sources, match_func):
|
||||||
|
@ -251,7 +251,7 @@ import utils
|
|||||||
if __name__ == "__main__" or utils.enable_doctests():
|
if __name__ == "__main__" or utils.enable_doctests():
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
|
|
||||||
test_dir = os.path.dirname(os.path.realpath(__file__)) + '/../test/'
|
test_dir = utils.test_data_dir() + 'cdx/'
|
||||||
|
|
||||||
import doctest
|
import doctest
|
||||||
doctest.testmod()
|
doctest.testmod()
|
||||||
|
@ -7,7 +7,9 @@ import replay_resolvers
|
|||||||
import cdxserve
|
import cdxserve
|
||||||
from archivalrouter import ArchivalRequestRouter, Route
|
from archivalrouter import ArchivalRequestRouter, Route
|
||||||
import os
|
import os
|
||||||
|
import yaml
|
||||||
|
import utils
|
||||||
|
import logging
|
||||||
|
|
||||||
## ===========
|
## ===========
|
||||||
default_head_insert = """
|
default_head_insert = """
|
||||||
@ -18,9 +20,10 @@ default_head_insert = """
|
|||||||
<!-- End WB Insert -->
|
<!-- End WB Insert -->
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def pywb_config():
|
def pywb_config2():
|
||||||
# Current test dir
|
# Current test dir
|
||||||
test_dir = os.path.dirname(os.path.realpath(__file__)) + '/../test/'
|
#test_dir = utils.test_data_dir()
|
||||||
|
test_dir = './sample_archive/'
|
||||||
|
|
||||||
# Standard loader which supports WARC/ARC files
|
# Standard loader which supports WARC/ARC files
|
||||||
aloader = archiveloader.ArchiveLoader()
|
aloader = archiveloader.ArchiveLoader()
|
||||||
@ -28,19 +31,19 @@ def pywb_config():
|
|||||||
# Source for cdx source
|
# Source for cdx source
|
||||||
#query_h = query.QueryHandler(indexreader.RemoteCDXServer('http://cdx.example.com/cdx'))
|
#query_h = query.QueryHandler(indexreader.RemoteCDXServer('http://cdx.example.com/cdx'))
|
||||||
#test_cdx = [test_dir + 'iana.cdx', test_dir + 'example.cdx', test_dir + 'dupes.cdx']
|
#test_cdx = [test_dir + 'iana.cdx', test_dir + 'example.cdx', test_dir + 'dupes.cdx']
|
||||||
indexs = indexreader.LocalCDXServer([test_dir])
|
indexs = indexreader.LocalCDXServer([test_dir + 'cdx/'])
|
||||||
|
|
||||||
# Loads warcs specified in cdx from these locations
|
# Loads warcs specified in cdx from these locations
|
||||||
prefixes = [replay_resolvers.PrefixResolver(test_dir)]
|
prefixes = [replay_resolvers.PrefixResolver(test_dir + 'warcs/')]
|
||||||
|
|
||||||
# Jinja2 head insert
|
# Jinja2 head insert
|
||||||
head_insert = views.J2HeadInsertView('./ui/', 'head_insert.html')
|
head_insert = views.J2HeadInsertView('./ui/head_insert.html')
|
||||||
|
|
||||||
# Create rewriting replay handler to rewrite records
|
# Create rewriting replay handler to rewrite records
|
||||||
replayer = replay_views.RewritingReplayView(resolvers = prefixes, archiveloader = aloader, head_insert = head_insert, buffer_response = True)
|
replayer = replay_views.RewritingReplayView(resolvers = prefixes, archiveloader = aloader, head_insert = head_insert, buffer_response = True)
|
||||||
|
|
||||||
# Create Jinja2 based html query view
|
# Create Jinja2 based html query view
|
||||||
html_view = views.J2QueryView('./ui/', 'query.html')
|
html_view = views.J2QueryView('./ui/query.html')
|
||||||
|
|
||||||
# WB handler which uses the index reader, replayer, and html_view
|
# WB handler which uses the index reader, replayer, and html_view
|
||||||
wb_handler = handlers.WBHandler(indexs, replayer, html_view)
|
wb_handler = handlers.WBHandler(indexs, replayer, html_view)
|
||||||
@ -61,3 +64,97 @@ def pywb_config():
|
|||||||
hostpaths = ['http://localhost:8080/'])
|
hostpaths = ['http://localhost:8080/'])
|
||||||
|
|
||||||
|
|
||||||
|
def pywb_config(filename = './pywb/config.yaml'):
|
||||||
|
config = yaml.load(open(filename))
|
||||||
|
|
||||||
|
routes = map(yaml_parse_route, config['routes'].iteritems())
|
||||||
|
|
||||||
|
hostpaths = config.get('hostpaths', ['http://localhost:8080/'])
|
||||||
|
|
||||||
|
return ArchivalRequestRouter(routes, hostpaths)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def yaml_parse_route((route_name, handler_def)):
|
||||||
|
return Route(route_name, yaml_parse_handler(handler_def))
|
||||||
|
|
||||||
|
|
||||||
|
def yaml_parse_index_loader(index_config):
|
||||||
|
# support mixed cdx streams and remote servers?
|
||||||
|
# for now, list implies local sources
|
||||||
|
if isinstance(index_config, list):
|
||||||
|
return indexreader.LocalCDXServer(index_config)
|
||||||
|
|
||||||
|
if isinstance(index_config, str):
|
||||||
|
uri = index_config
|
||||||
|
cookie = None
|
||||||
|
elif isinstance(index_config, dict):
|
||||||
|
uri = index_config['url']
|
||||||
|
cookie = index_config['cookie']
|
||||||
|
else:
|
||||||
|
raise Exception('Invalid Index Reader Config: ' + str(index_config))
|
||||||
|
|
||||||
|
# Check for remote cdx server
|
||||||
|
if (uri.startswith('http://') or uri.startswith('https://')) and not uri.endswith('.cdx'):
|
||||||
|
return indexreader.RemoteCDXServer(uri, cookie = cookie)
|
||||||
|
else:
|
||||||
|
return indexreader.LocalCDXServer([uri])
|
||||||
|
|
||||||
|
|
||||||
|
def yaml_parse_archive_resolvers(archive_paths):
|
||||||
|
|
||||||
|
#TODO: more options (remote files, contains param, etc..)
|
||||||
|
def make_resolver(path):
|
||||||
|
if path.startswith('redis://'):
|
||||||
|
return replay_resolvers.RedisResolver(path)
|
||||||
|
elif os.path.isfile(path):
|
||||||
|
return replay_resolvers.PathIndexResolver(path)
|
||||||
|
else:
|
||||||
|
logging.info('Adding Archive Source: ' + path)
|
||||||
|
return replay_resolvers.PrefixResolver(path)
|
||||||
|
|
||||||
|
return map(make_resolver, archive_paths)
|
||||||
|
|
||||||
|
def yaml_parse_head_insert(handler_def):
|
||||||
|
# First, try a template file
|
||||||
|
head_insert_file = handler_def.get('head_insert_template')
|
||||||
|
if head_insert_file:
|
||||||
|
logging.info('Adding Head-Insert Template: ' + head_insert_file)
|
||||||
|
return views.J2HeadInsertView(head_insert_file)
|
||||||
|
|
||||||
|
# Then, static head_insert text
|
||||||
|
head_insert_text = handler_def.get('head_insert_text', '')
|
||||||
|
logging.info('Adding Head-Insert Text: ' + head_insert_text)
|
||||||
|
return head_insert_text
|
||||||
|
|
||||||
|
|
||||||
|
def yaml_parse_handler(handler_def):
|
||||||
|
archive_loader = archiveloader.ArchiveLoader()
|
||||||
|
|
||||||
|
index_loader = yaml_parse_index_loader(handler_def['index_paths'])
|
||||||
|
|
||||||
|
archive_resolvers = yaml_parse_archive_resolvers(handler_def['archive_paths'])
|
||||||
|
|
||||||
|
head_insert = yaml_parse_head_insert(handler_def)
|
||||||
|
|
||||||
|
replayer = replay_views.RewritingReplayView(resolvers = archive_resolvers,
|
||||||
|
archiveloader = archive_loader,
|
||||||
|
head_insert = head_insert,
|
||||||
|
buffer_response = handler_def.get('buffer_response', False))
|
||||||
|
|
||||||
|
html_view_file = handler_def.get('html_query_template')
|
||||||
|
if html_view_file:
|
||||||
|
logging.info('Adding HTML Calendar Template: ' + html_view_file)
|
||||||
|
else:
|
||||||
|
logging.info('No HTML Calendar View Present')
|
||||||
|
|
||||||
|
html_view = views.J2QueryView(html_view_file) if html_view_file else None
|
||||||
|
|
||||||
|
wb_handler = handlers.WBHandler(index_loader, replayer, html_view)
|
||||||
|
return wb_handler
|
||||||
|
|
||||||
|
if __name__ == "__main__" or utils.enable_doctests():
|
||||||
|
pass
|
||||||
|
#print pywb_config('config.yaml')
|
||||||
|
|
||||||
|
|
||||||
|
@ -203,6 +203,10 @@ is_in_nose = sys.argv[0].endswith('nosetests')
|
|||||||
def enable_doctests():
|
def enable_doctests():
|
||||||
return is_in_nose
|
return is_in_nose
|
||||||
|
|
||||||
|
def test_data_dir():
|
||||||
|
import os
|
||||||
|
return os.path.dirname(os.path.realpath(__file__)) + '/../sample_archive/'
|
||||||
|
|
||||||
#============================================
|
#============================================
|
||||||
|
|
||||||
if __name__ == "__main__" or enable_doctests():
|
if __name__ == "__main__" or enable_doctests():
|
||||||
|
@ -4,6 +4,7 @@ import wbrequestresponse
|
|||||||
import wbexceptions
|
import wbexceptions
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
from os import path
|
||||||
from itertools import imap
|
from itertools import imap
|
||||||
from jinja2 import Environment, FileSystemLoader
|
from jinja2 import Environment, FileSystemLoader
|
||||||
|
|
||||||
@ -16,7 +17,9 @@ class TextQueryView:
|
|||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class J2QueryView:
|
class J2QueryView:
|
||||||
def __init__(self, template_dir, template_file, buffer_index = True):
|
def __init__(self, filename, buffer_index = True):
|
||||||
|
template_dir, template_file = path.split(filename)
|
||||||
|
|
||||||
self.template_file = template_file
|
self.template_file = template_file
|
||||||
self.buffer_index = buffer_index
|
self.buffer_index = buffer_index
|
||||||
|
|
||||||
@ -41,7 +44,8 @@ class J2QueryView:
|
|||||||
# Render the head insert (eg. banner)
|
# Render the head insert (eg. banner)
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class J2HeadInsertView:
|
class J2HeadInsertView:
|
||||||
def __init__(self, template_dir, template_file, buffer_index = True):
|
def __init__(self, filename, buffer_index = True):
|
||||||
|
template_dir, template_file = path.split(filename)
|
||||||
self.template_file = template_file
|
self.template_file = template_file
|
||||||
|
|
||||||
self.jinja_env = make_jinja_env(template_dir)
|
self.jinja_env = make_jinja_env(template_dir)
|
||||||
|
@ -77,6 +77,8 @@ def handle_exception(env, exc):
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
DEFAULT_CONFIG_FILE = 'config.yaml'
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
try:
|
try:
|
||||||
# Attempt to load real settings from globalwb module
|
# Attempt to load real settings from globalwb module
|
||||||
@ -91,7 +93,9 @@ def main():
|
|||||||
|
|
||||||
module = importlib.import_module(config_name)
|
module = importlib.import_module(config_name)
|
||||||
|
|
||||||
app = create_wb_app(module.pywb_config())
|
config_file = DEFAULT_CONFIG_FILE
|
||||||
|
|
||||||
|
app = create_wb_app(module.pywb_config(config_file))
|
||||||
logging.info('')
|
logging.info('')
|
||||||
logging.info('*** pywb inited with settings from {0}.pywb_config()!\n'.format(config_name))
|
logging.info('*** pywb inited with settings from {0}.pywb_config()!\n'.format(config_name))
|
||||||
return app
|
return app
|
||||||
@ -105,6 +109,6 @@ def main():
|
|||||||
if __name__ == "__main__" or utils.enable_doctests():
|
if __name__ == "__main__" or utils.enable_doctests():
|
||||||
import pywb_init
|
import pywb_init
|
||||||
# Test sample settings
|
# Test sample settings
|
||||||
application = create_wb_app(pywb_init.pywb_config())
|
application = create_wb_app(pywb_init.pywb_config('../' + DEFAULT_CONFIG_FILE))
|
||||||
else:
|
else:
|
||||||
application = main()
|
application = main()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user