mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
wsgi wrapper reorg!
support pluggable wsgi apps utils: BlockLoader() supports loading from package exceptions: base WbException moved to utils
This commit is contained in:
parent
47271bbfab
commit
f1acad53fc
0
pywb/apps/__init__.py
Normal file
0
pywb/apps/__init__.py
Normal file
10
pywb/apps/wayback.py
Normal file
10
pywb/apps/wayback.py
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
from pywb.bootstrap.wsgi_wrappers import init_app, start_wsgi_server
|
||||||
|
from pywb.bootstrap.pywb_init import create_wb_router
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
# init pywb app
|
||||||
|
#=================================================================
|
||||||
|
application = init_app(create_wb_router, load_yaml=True)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
start_wsgi_server(application)
|
@ -1,56 +0,0 @@
|
|||||||
import logging
|
|
||||||
|
|
||||||
from pywb.warc.recordloader import ArcWarcRecordLoader
|
|
||||||
from pywb.warc.resolvingloader import ResolvingLoader
|
|
||||||
from pywb.rewrite.rewrite_content import RewriteContent
|
|
||||||
from pywb.core.views import J2TemplateView, J2HtmlCapturesView
|
|
||||||
from pywb.core.handlers import WBHandler
|
|
||||||
from pywb.core.replay_views import ReplayView
|
|
||||||
|
|
||||||
#=================================================================
|
|
||||||
# Config Loading
|
|
||||||
#=================================================================
|
|
||||||
def load_template_file(file, desc = None, view_class = J2TemplateView):
|
|
||||||
if file:
|
|
||||||
logging.debug('Adding {0}: {1}'.format(desc if desc else name, file))
|
|
||||||
file = view_class(file)
|
|
||||||
|
|
||||||
return file
|
|
||||||
|
|
||||||
#=================================================================
|
|
||||||
def create_wb_handler(cdx_server, config, ds_rules_file=None):
|
|
||||||
|
|
||||||
record_loader = ArcWarcRecordLoader(cookie_maker = config.get('cookie_maker'))
|
|
||||||
paths = config.get('archive_paths')
|
|
||||||
|
|
||||||
resolving_loader = ResolvingLoader(paths=paths,
|
|
||||||
cdx_server=cdx_server,
|
|
||||||
record_loader=record_loader)
|
|
||||||
|
|
||||||
replayer = ReplayView(
|
|
||||||
content_loader = resolving_loader,
|
|
||||||
|
|
||||||
content_rewriter = RewriteContent(ds_rules_file=ds_rules_file),
|
|
||||||
|
|
||||||
head_insert_view = load_template_file(config.get('head_insert_html'), 'Head Insert'),
|
|
||||||
|
|
||||||
buffer_response = config.get('buffer_response', True),
|
|
||||||
|
|
||||||
redir_to_exact = config.get('redir_to_exact', True),
|
|
||||||
|
|
||||||
reporter = config.get('reporter')
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
wb_handler = WBHandler(
|
|
||||||
cdx_server,
|
|
||||||
|
|
||||||
replayer,
|
|
||||||
|
|
||||||
html_view = load_template_file(config.get('query_html'), 'Captures Page', J2HtmlCapturesView),
|
|
||||||
|
|
||||||
search_view = load_template_file(config.get('search_html'), 'Search Page'),
|
|
||||||
)
|
|
||||||
|
|
||||||
return wb_handler
|
|
||||||
|
|
@ -1,10 +1,20 @@
|
|||||||
from pywb.core.handlers import CDXHandler, StaticHandler
|
|
||||||
from pywb.core.handlers import DebugEchoHandler, DebugEchoEnvHandler
|
|
||||||
from pywb.dispatch.archivalrouter import ArchivalRouter, Route
|
from pywb.dispatch.archivalrouter import ArchivalRouter, Route
|
||||||
from pywb.dispatch.proxy import ProxyArchivalRouter
|
from pywb.dispatch.proxy import ProxyArchivalRouter
|
||||||
from pywb.core.indexreader import IndexReader
|
|
||||||
|
|
||||||
import config_utils
|
from pywb.warc.recordloader import ArcWarcRecordLoader
|
||||||
|
from pywb.warc.resolvingloader import ResolvingLoader
|
||||||
|
|
||||||
|
from pywb.rewrite.rewrite_content import RewriteContent
|
||||||
|
|
||||||
|
from pywb.core.indexreader import IndexReader
|
||||||
|
from pywb.core.views import J2TemplateView, J2HtmlCapturesView
|
||||||
|
from pywb.core.handlers import WBHandler
|
||||||
|
from pywb.core.replay_views import ReplayView
|
||||||
|
|
||||||
|
from pywb.core.handlers import CDXHandler, StaticHandler
|
||||||
|
from pywb.core.handlers import DebugEchoHandler, DebugEchoEnvHandler
|
||||||
|
|
||||||
|
from pywb.utils.loaders import BlockLoader
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import yaml
|
import yaml
|
||||||
@ -27,6 +37,7 @@ DEFAULTS = {
|
|||||||
'domain_specific_rules': 'rules.yaml',
|
'domain_specific_rules': 'rules.yaml',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
class DictChain:
|
class DictChain:
|
||||||
def __init__(self, *dicts):
|
def __init__(self, *dicts):
|
||||||
self.dicts = dicts
|
self.dicts = dicts
|
||||||
@ -40,9 +51,63 @@ class DictChain:
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
## Reference non-YAML config
|
def load_template_file(file, desc=None, view_class=J2TemplateView):
|
||||||
|
if file:
|
||||||
|
logging.debug('Adding {0}: {1}'.format(desc if desc else name, file))
|
||||||
|
file = view_class(file)
|
||||||
|
|
||||||
|
return file
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def pywb_config_manual(passed_config = {}):
|
def create_wb_handler(cdx_server, config, ds_rules_file=None):
|
||||||
|
|
||||||
|
cookie_maker=config.get('cookie_maker')
|
||||||
|
record_loader = ArcWarcRecordLoader(cookie_maker=cookie_maker)
|
||||||
|
|
||||||
|
paths = config.get('archive_paths')
|
||||||
|
|
||||||
|
resolving_loader = ResolvingLoader(paths=paths,
|
||||||
|
cdx_server=cdx_server,
|
||||||
|
record_loader=record_loader)
|
||||||
|
|
||||||
|
head_insert_view = load_template_file(config.get('head_insert_html'),
|
||||||
|
'Head Insert')
|
||||||
|
|
||||||
|
replayer = ReplayView(
|
||||||
|
content_loader=resolving_loader,
|
||||||
|
|
||||||
|
content_rewriter=RewriteContent(ds_rules_file=ds_rules_file),
|
||||||
|
|
||||||
|
head_insert_view=head_insert_view,
|
||||||
|
|
||||||
|
buffer_response=config.get('buffer_response', True),
|
||||||
|
|
||||||
|
redir_to_exact=config.get('redir_to_exact', True),
|
||||||
|
|
||||||
|
reporter=config.get('reporter')
|
||||||
|
)
|
||||||
|
|
||||||
|
html_view = load_template_file(config.get('query_html'),
|
||||||
|
'Captures Page',
|
||||||
|
J2HtmlCapturesView)
|
||||||
|
|
||||||
|
|
||||||
|
search_view = load_template_file(config.get('search_html'),
|
||||||
|
'Search Page')
|
||||||
|
|
||||||
|
wb_handler = WBHandler(
|
||||||
|
cdx_server,
|
||||||
|
replayer,
|
||||||
|
html_view=html_view,
|
||||||
|
search_view=search_view,
|
||||||
|
)
|
||||||
|
|
||||||
|
return wb_handler
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
def create_wb_router(passed_config = {}):
|
||||||
|
|
||||||
config = DictChain(passed_config, DEFAULTS)
|
config = DictChain(passed_config, DEFAULTS)
|
||||||
|
|
||||||
@ -62,7 +127,7 @@ def pywb_config_manual(passed_config = {}):
|
|||||||
ds_rules_file = route_config.get('domain_specific_rules', None)
|
ds_rules_file = route_config.get('domain_specific_rules', None)
|
||||||
cdx_server = IndexReader(route_config, ds_rules_file)
|
cdx_server = IndexReader(route_config, ds_rules_file)
|
||||||
|
|
||||||
wb_handler = config_utils.create_wb_handler(
|
wb_handler = create_wb_handler(
|
||||||
cdx_server=cdx_server,
|
cdx_server=cdx_server,
|
||||||
config=route_config,
|
config=route_config,
|
||||||
ds_rules_file=ds_rules_file,
|
ds_rules_file=ds_rules_file,
|
||||||
@ -107,24 +172,6 @@ def pywb_config_manual(passed_config = {}):
|
|||||||
|
|
||||||
abs_path = config.get('absolute_paths', True),
|
abs_path = config.get('absolute_paths', True),
|
||||||
|
|
||||||
home_view = config_utils.load_template_file(config.get('home_html'), 'Home Page'),
|
home_view = load_template_file(config.get('home_html'), 'Home Page'),
|
||||||
error_view = config_utils.load_template_file(config.get('error_html'), 'Error Page')
|
error_view = load_template_file(config.get('error_html'), 'Error Page')
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
|
||||||
# YAML config loader
|
|
||||||
#=================================================================
|
|
||||||
DEFAULT_CONFIG_FILE = 'config.yaml'
|
|
||||||
|
|
||||||
|
|
||||||
def pywb_config(config_file = None):
|
|
||||||
if not config_file:
|
|
||||||
config_file = os.environ.get('PYWB_CONFIG', DEFAULT_CONFIG_FILE)
|
|
||||||
|
|
||||||
with open(config_file) as fh:
|
|
||||||
config = yaml.load(fh)
|
|
||||||
|
|
||||||
return pywb_config_manual(config)
|
|
||||||
|
|
||||||
|
@ -1,20 +1,19 @@
|
|||||||
from pywb.core.wbexceptions import WbException, NotFoundException, InternalRedirect
|
from pywb.utils.wbexception import WbException
|
||||||
|
from pywb.core.wbexceptions import NotFoundException, InternalRedirect
|
||||||
from pywb.core.wbrequestresponse import WbResponse, StatusAndHeaders
|
from pywb.core.wbrequestresponse import WbResponse, StatusAndHeaders
|
||||||
|
|
||||||
from pywb.cdx.cdxserver import CDXException
|
from pywb.utils.loaders import BlockLoader
|
||||||
from pywb.utils.canonicalize import UrlCanonicalizeException
|
|
||||||
from pywb.warc.recordloader import ArchiveLoadFailed
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import importlib
|
import importlib
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# adapted -from wsgiref.request_uri, but doesn't include domain name and allows all characters
|
# adapted from wsgiref.request_uri, but doesn't include domain name and allows all characters
|
||||||
# allowed in the path segment according to: http://tools.ietf.org/html/rfc3986#section-3.3
|
# allowed in the path segment according to: http://tools.ietf.org/html/rfc3986#section-3.3
|
||||||
# explained here: http://stackoverflow.com/questions/4669692/valid-characters-for-directory-part-of-a-url-for-short-links
|
# explained here:
|
||||||
|
# http://stackoverflow.com/questions/4669692/valid-characters-for-directory-part-of-a-url-for-short-links
|
||||||
def rel_request_uri(environ, include_query=1):
|
def rel_request_uri(environ, include_query=1):
|
||||||
"""
|
"""
|
||||||
Return the requested path, optionally including the query string
|
Return the requested path, optionally including the query string
|
||||||
@ -35,9 +34,9 @@ def rel_request_uri(environ, include_query=1):
|
|||||||
|
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def create_wb_app(wb_router):
|
def create_wb_app(wb_router):
|
||||||
|
|
||||||
# Top-level wsgi application
|
# Top-level wsgi application
|
||||||
def application(env, start_response):
|
def application(env, start_response):
|
||||||
if env.get('SCRIPT_NAME') or not env.get('REQUEST_URI'):
|
if env.get('SCRIPT_NAME') or not env.get('REQUEST_URI'):
|
||||||
@ -56,8 +55,7 @@ def create_wb_app(wb_router):
|
|||||||
except InternalRedirect as ir:
|
except InternalRedirect as ir:
|
||||||
response = WbResponse(StatusAndHeaders(ir.status, ir.httpHeaders))
|
response = WbResponse(StatusAndHeaders(ir.status, ir.httpHeaders))
|
||||||
|
|
||||||
except (WbException, CDXException,
|
except WbException as e:
|
||||||
UrlCanonicalizeException, ArchiveLoadFailed) as e:
|
|
||||||
response = handle_exception(env, wb_router.error_view, e, False)
|
response = handle_exception(env, wb_router.error_view, e, False)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -69,6 +67,7 @@ def create_wb_app(wb_router):
|
|||||||
return application
|
return application
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
def handle_exception(env, error_view, exc, print_trace):
|
def handle_exception(env, error_view, exc, print_trace):
|
||||||
if hasattr(exc, 'status'):
|
if hasattr(exc, 'status'):
|
||||||
status = exc.status()
|
status = exc.status()
|
||||||
@ -85,44 +84,82 @@ def handle_exception(env, error_view, exc, print_trace):
|
|||||||
|
|
||||||
if error_view:
|
if error_view:
|
||||||
import traceback
|
import traceback
|
||||||
return error_view.render_response(err_msg = str(exc), err_details = err_details, status = status)
|
return error_view.render_response(err_msg=str(exc),
|
||||||
|
err_details=err_details,
|
||||||
|
status=status)
|
||||||
else:
|
else:
|
||||||
return WbResponse.text_response(status + ' Error: ' + str(exc), status = status)
|
return WbResponse.text_response(status + ' Error: ' + str(exc),
|
||||||
|
status=status)
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
DEFAULT_CONFIG_FILE = 'config.yaml'
|
DEFAULT_CONFIG_FILE = 'config.yaml'
|
||||||
|
|
||||||
DEFAULT_INIT_MODULE = 'pywb.bootstrap.pywb_init'
|
def load_yaml_config(config_file=None):
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
if not config_file:
|
||||||
|
config_file = DEFAULT_CONFIG_FILE
|
||||||
|
|
||||||
|
configdata = BlockLoader().load(config_file)
|
||||||
|
config = yaml.load(configdata)
|
||||||
|
return config
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def main():
|
def init_app(init_func, load_yaml=True, config_file=None):
|
||||||
|
logging.basicConfig(format='%(asctime)s: [%(levelname)s]: %(message)s',
|
||||||
|
level=logging.DEBUG)
|
||||||
|
logging.info('')
|
||||||
|
|
||||||
|
if load_yaml:
|
||||||
|
if not config_file:
|
||||||
|
config_file = os.environ.get('PYWB_CONFIG_FILE')
|
||||||
|
config = load_yaml_config(config_file)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
logging.basicConfig(format = '%(asctime)s: [%(levelname)s]: %(message)s', level = logging.DEBUG)
|
if load_yaml:
|
||||||
|
wb_router = init_func(config)
|
||||||
# see if there's a custom init module
|
else:
|
||||||
config_name = os.environ.get('PYWB_CONFIG_MODULE')
|
wb_router = init_func()
|
||||||
|
except:
|
||||||
if not config_name:
|
msg = '*** pywb app init FAILED config from "%s"!\n'
|
||||||
# use default module
|
logging.exception(msg, init_func.__name__)
|
||||||
config_name = DEFAULT_INIT_MODULE
|
|
||||||
logging.info('Loading from default config module "{0}"'.format(config_name))
|
|
||||||
logging.info('')
|
|
||||||
|
|
||||||
module = importlib.import_module(config_name)
|
|
||||||
|
|
||||||
app = create_wb_app(module.pywb_config())
|
|
||||||
logging.info('')
|
|
||||||
logging.info('*** pywb inited with settings from {0}.pywb_config()!\n'.format(config_name))
|
|
||||||
return app
|
|
||||||
|
|
||||||
except Exception:
|
|
||||||
logging.exception('*** pywb could not init with settings from {0}.pywb_config()!\n'.format(config_name))
|
|
||||||
raise
|
raise
|
||||||
|
else:
|
||||||
|
msg = '*** pywb app inited with config from "%s"!\n'
|
||||||
|
logging.info(msg, init_func.__name__)
|
||||||
|
|
||||||
|
return create_wb_app(wb_router)
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
if __name__ == "__main__":
|
DEFAULT_PORT = 8080
|
||||||
pass
|
|
||||||
else:
|
def start_wsgi_server(the_app):
|
||||||
application = main()
|
from wsgiref.simple_server import make_server
|
||||||
|
from optparse import OptionParser
|
||||||
|
|
||||||
|
opt = OptionParser('%prog [OPTIONS]')
|
||||||
|
opt.add_option('-p', '--port', type='int', default=None)
|
||||||
|
|
||||||
|
options, args = opt.parse_args()
|
||||||
|
|
||||||
|
port = options.port
|
||||||
|
|
||||||
|
if port is None:
|
||||||
|
try:
|
||||||
|
config = load_default_config()
|
||||||
|
port = config.get('port', DEFAULT_PORT)
|
||||||
|
except:
|
||||||
|
port = DEFAULT_PORT
|
||||||
|
|
||||||
|
|
||||||
|
logging.debug('Starting CDX Server on port %s', port)
|
||||||
|
|
||||||
|
try:
|
||||||
|
httpd = make_server('', port, the_app)
|
||||||
|
httpd.serve_forever()
|
||||||
|
except KeyboardInterrupt as ex:
|
||||||
|
pass
|
||||||
|
|
||||||
|
logging.debug('Stopping CDX Server')
|
@ -9,6 +9,7 @@ from pywb.utils.canonicalize import unsurt, UrlCanonicalizer
|
|||||||
|
|
||||||
from query import CDXQuery
|
from query import CDXQuery
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def load_domain_specific_cdx_rules(ds_rules_file, surt_ordered):
|
def load_domain_specific_cdx_rules(ds_rules_file, surt_ordered):
|
||||||
"""
|
"""
|
||||||
|
@ -4,9 +4,11 @@ import itertools
|
|||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from urlparse import parse_qs
|
from urlparse import parse_qs
|
||||||
|
|
||||||
|
from pywb.utils.wbexception import WbException
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class CDXException(Exception):
|
class CDXException(WbException):
|
||||||
def status(self):
|
def status(self):
|
||||||
return '400 Bad Request'
|
return '400 Bad Request'
|
||||||
|
|
||||||
|
@ -33,6 +33,7 @@ def cdx_load(sources, query, perms_checker=None, process=True):
|
|||||||
|
|
||||||
return cdx_iter
|
return cdx_iter
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def restrict_cdx(cdx_iter, query, perms_checker):
|
def restrict_cdx(cdx_iter, query, perms_checker):
|
||||||
"""
|
"""
|
||||||
@ -56,6 +57,7 @@ def restrict_cdx(cdx_iter, query, perms_checker):
|
|||||||
|
|
||||||
yield cdx
|
yield cdx
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def process_cdx(cdx_iter, query):
|
def process_cdx(cdx_iter, query):
|
||||||
if query.resolve_revisits:
|
if query.resolve_revisits:
|
||||||
@ -255,7 +257,6 @@ def cdx_resolve_revisits(cdx_iter):
|
|||||||
originals = {}
|
originals = {}
|
||||||
|
|
||||||
for cdx in cdx_iter:
|
for cdx in cdx_iter:
|
||||||
|
|
||||||
is_revisit = cdx.is_revisit()
|
is_revisit = cdx.is_revisit()
|
||||||
|
|
||||||
digest = cdx['digest']
|
digest = cdx['digest']
|
||||||
|
@ -126,14 +126,19 @@ class CDXServer(BaseCDXServer):
|
|||||||
logging.warn('No CDX Sources configured from paths=%s', paths)
|
logging.warn('No CDX Sources configured from paths=%s', paths)
|
||||||
|
|
||||||
def _add_cdx_source(self, source):
|
def _add_cdx_source(self, source):
|
||||||
if source is None: return
|
if source is None:
|
||||||
|
return
|
||||||
|
|
||||||
logging.debug('Adding CDX Source: %s', source)
|
logging.debug('Adding CDX Source: %s', source)
|
||||||
self.sources.append(source)
|
self.sources.append(source)
|
||||||
|
|
||||||
def add_cdx_source(self, source, config):
|
def add_cdx_source(self, source, config):
|
||||||
if source is None: return
|
if source is None:
|
||||||
|
return
|
||||||
|
|
||||||
if isinstance(source, CDXSource):
|
if isinstance(source, CDXSource):
|
||||||
self._add_cdx_source(source)
|
self._add_cdx_source(source)
|
||||||
|
|
||||||
elif isinstance(source, str):
|
elif isinstance(source, str):
|
||||||
if os.path.isdir(source):
|
if os.path.isdir(source):
|
||||||
for fn in os.listdir(source):
|
for fn in os.listdir(source):
|
||||||
@ -213,5 +218,3 @@ def create_cdx_server(config, ds_rules_file=None):
|
|||||||
surt_ordered=surt_ordered,
|
surt_ordered=surt_ordered,
|
||||||
ds_rules_file=ds_rules_file,
|
ds_rules_file=ds_rules_file,
|
||||||
perms_checker=perms_checker)
|
perms_checker=perms_checker)
|
||||||
|
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@ import urllib
|
|||||||
import urllib2
|
import urllib2
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class CDXSource(object):
|
class CDXSource(object):
|
||||||
"""
|
"""
|
||||||
@ -92,7 +93,6 @@ class RedisCDXSource(CDXSource):
|
|||||||
if config:
|
if config:
|
||||||
self.key_prefix = config.get('redis_key_prefix', self.key_prefix)
|
self.key_prefix = config.get('redis_key_prefix', self.key_prefix)
|
||||||
|
|
||||||
|
|
||||||
def load_cdx(self, query):
|
def load_cdx(self, query):
|
||||||
"""
|
"""
|
||||||
Load cdx from redis cache, from an ordered list
|
Load cdx from redis cache, from an ordered list
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
|
from pywb.utils.wbexception import WbException
|
||||||
|
|
||||||
|
|
||||||
class WbException(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class NotFoundException(WbException):
|
class NotFoundException(WbException):
|
||||||
def status(self):
|
def status(self):
|
||||||
return '404 Not Found'
|
return '404 Not Found'
|
||||||
|
@ -4,6 +4,9 @@
|
|||||||
import surt
|
import surt
|
||||||
import urlparse
|
import urlparse
|
||||||
|
|
||||||
|
from wbexception import WbException
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class UrlCanonicalizer(object):
|
class UrlCanonicalizer(object):
|
||||||
def __init__(self, surt_ordered=True):
|
def __init__(self, surt_ordered=True):
|
||||||
@ -14,7 +17,7 @@ class UrlCanonicalizer(object):
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class UrlCanonicalizeException(Exception):
|
class UrlCanonicalizeException(WbException):
|
||||||
def status(self):
|
def status(self):
|
||||||
return '400 Bad Request'
|
return '400 Bad Request'
|
||||||
|
|
||||||
@ -164,7 +167,8 @@ def calc_search_range(url, match_type, surt_ordered=True, url_canon=None):
|
|||||||
|
|
||||||
elif match_type == 'domain':
|
elif match_type == 'domain':
|
||||||
if not surt_ordered:
|
if not surt_ordered:
|
||||||
raise UrlCanonicalizeException('matchType=domain unsupported for non-surt')
|
msg = 'matchType=domain unsupported for non-surt'
|
||||||
|
raise UrlCanonicalizeException(msg)
|
||||||
|
|
||||||
host = start_key.split(')/')[0]
|
host = start_key.split(')/')[0]
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ import os
|
|||||||
import hmac
|
import hmac
|
||||||
import urllib2
|
import urllib2
|
||||||
import time
|
import time
|
||||||
|
from pkg_resources import resource_stream
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
@ -24,16 +25,16 @@ class BlockLoader(object):
|
|||||||
def __init__(self, cookie_maker=None):
|
def __init__(self, cookie_maker=None):
|
||||||
self.cookie_maker = cookie_maker
|
self.cookie_maker = cookie_maker
|
||||||
|
|
||||||
def load(self, url, offset, length):
|
def load(self, url, offset=0, length=-1):
|
||||||
"""
|
"""
|
||||||
Determine loading method based on uri
|
Determine loading method based on uri
|
||||||
"""
|
"""
|
||||||
if is_http(url):
|
if is_http(url):
|
||||||
return self.load_http(url, offset, length)
|
return self.load_http(url, offset, length)
|
||||||
else:
|
else:
|
||||||
return self.load_file(url, offset, length)
|
return self.load_file_or_resource(url, offset, length)
|
||||||
|
|
||||||
def load_file(self, url, offset, length):
|
def load_file_or_resource(self, url, offset, length):
|
||||||
"""
|
"""
|
||||||
Load a file-like reader from the local file system
|
Load a file-like reader from the local file system
|
||||||
"""
|
"""
|
||||||
@ -41,10 +42,18 @@ class BlockLoader(object):
|
|||||||
if url.startswith('file://'):
|
if url.startswith('file://'):
|
||||||
url = url[len('file://'):]
|
url = url[len('file://'):]
|
||||||
|
|
||||||
afile = open(url, 'rb')
|
try:
|
||||||
afile.seek(offset)
|
# first, try as file
|
||||||
|
afile = open(url, 'rb')
|
||||||
|
except IOError as file_err:
|
||||||
|
# then, try as package.path/file
|
||||||
|
pkg_split = url.split('/', 1)
|
||||||
|
afile = resource_stream(pkg_split[0], pkg_split[1])
|
||||||
|
|
||||||
if length > 0:
|
if offset > 0:
|
||||||
|
afile.seek(offset)
|
||||||
|
|
||||||
|
if length >= 0:
|
||||||
return LimitReader(afile, length)
|
return LimitReader(afile, length)
|
||||||
else:
|
else:
|
||||||
return afile
|
return afile
|
||||||
|
@ -171,7 +171,6 @@ def timestamp_to_datetime(string):
|
|||||||
# pad to 6 digits
|
# pad to 6 digits
|
||||||
string = _pad_timestamp(string, PAD_6)
|
string = _pad_timestamp(string, PAD_6)
|
||||||
|
|
||||||
|
|
||||||
def clamp(val, min_, max_):
|
def clamp(val, min_, max_):
|
||||||
try:
|
try:
|
||||||
val = int(val)
|
val = int(val)
|
||||||
|
3
pywb/utils/wbexception.py
Normal file
3
pywb/utils/wbexception.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
class WbException(Exception):
|
||||||
|
def status(self):
|
||||||
|
return '500 Internal Server Error'
|
@ -9,6 +9,9 @@ from pywb.utils.statusandheaders import StatusAndHeadersParserException
|
|||||||
from pywb.utils.loaders import BlockLoader
|
from pywb.utils.loaders import BlockLoader
|
||||||
from pywb.utils.bufferedreaders import DecompressingBufferedReader
|
from pywb.utils.bufferedreaders import DecompressingBufferedReader
|
||||||
|
|
||||||
|
from pywb.utils.wbexception import WbException
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
ArcWarcRecord = collections.namedtuple('ArchiveRecord',
|
ArcWarcRecord = collections.namedtuple('ArchiveRecord',
|
||||||
'type, rec_headers, ' +
|
'type, rec_headers, ' +
|
||||||
@ -16,7 +19,7 @@ ArcWarcRecord = collections.namedtuple('ArchiveRecord',
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class ArchiveLoadFailed(Exception):
|
class ArchiveLoadFailed(WbException):
|
||||||
def __init__(self, reason, filename=''):
|
def __init__(self, reason, filename=''):
|
||||||
super(ArchiveLoadFailed, self).__init__(filename + ':' + str(reason))
|
super(ArchiveLoadFailed, self).__init__(filename + ':' + str(reason))
|
||||||
#self.filename = filename
|
#self.filename = filename
|
||||||
@ -62,9 +65,9 @@ class ArcWarcRecordLoader:
|
|||||||
decomp_type = 'gzip'
|
decomp_type = 'gzip'
|
||||||
|
|
||||||
# Create decompressing stream
|
# Create decompressing stream
|
||||||
stream = DecompressingBufferedReader(stream = raw,
|
stream = DecompressingBufferedReader(stream=raw,
|
||||||
decomp_type = decomp_type,
|
decomp_type=decomp_type,
|
||||||
block_size = self.block_size)
|
block_size=self.block_size)
|
||||||
|
|
||||||
(the_format, rec_headers) = self._detect_type_load_headers(stream)
|
(the_format, rec_headers) = self._detect_type_load_headers(stream)
|
||||||
|
|
||||||
|
2
run.sh
2
run.sh
@ -10,7 +10,7 @@ mypath=$(cd `dirname $0` && pwd)
|
|||||||
# ex: my_pywb.pywb_config()
|
# ex: my_pywb.pywb_config()
|
||||||
#export 'PYWB_CONFIG=my_pywb'
|
#export 'PYWB_CONFIG=my_pywb'
|
||||||
|
|
||||||
app="pywb.bootstrap.wbapp"
|
app="pywb.apps.wayback"
|
||||||
|
|
||||||
params="--http-socket :8080 -b 65536"
|
params="--http-socket :8080 -b 65536"
|
||||||
#params="--static-map /static=$mypath/static --http-socket :8080 -b 65536"
|
#params="--static-map /static=$mypath/static --http-socket :8080 -b 65536"
|
||||||
|
2
setup.py
2
setup.py
@ -22,6 +22,7 @@ setup(
|
|||||||
'pywb.core',
|
'pywb.core',
|
||||||
'pywb.dispatch',
|
'pywb.dispatch',
|
||||||
'pywb.bootstrap'
|
'pywb.bootstrap'
|
||||||
|
'pywb.apps'
|
||||||
],
|
],
|
||||||
package_data={
|
package_data={
|
||||||
'pywb': ['ui/*', 'static/*', '*.yaml'],
|
'pywb': ['ui/*', 'static/*', '*.yaml'],
|
||||||
@ -41,7 +42,6 @@ setup(
|
|||||||
'pyyaml',
|
'pyyaml',
|
||||||
'WebTest',
|
'WebTest',
|
||||||
'pytest',
|
'pytest',
|
||||||
'werkzeug>=0.9.4',
|
|
||||||
],
|
],
|
||||||
# tests_require=['WebTest', 'pytest'],
|
# tests_require=['WebTest', 'pytest'],
|
||||||
zip_safe=False
|
zip_safe=False
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import webtest
|
import webtest
|
||||||
from pywb.bootstrap.pywb_init import pywb_config
|
from pywb.bootstrap.pywb_init import create_wb_router
|
||||||
from pywb.bootstrap.wbapp import create_wb_app
|
from pywb.bootstrap.wsgi_wrappers import init_app
|
||||||
from pywb.cdx.cdxobject import CDXObject
|
from pywb.cdx.cdxobject import CDXObject
|
||||||
|
|
||||||
from fixture import TestExclusionPerms
|
from fixture import TestExclusionPerms
|
||||||
@ -11,8 +11,13 @@ class TestWb:
|
|||||||
def setup(self):
|
def setup(self):
|
||||||
#self.app = pywb.wbapp.create_wb_app(pywb.pywb_init.pywb_config())
|
#self.app = pywb.wbapp.create_wb_app(pywb.pywb_init.pywb_config())
|
||||||
# save it in self - useful for debugging
|
# save it in self - useful for debugging
|
||||||
self.router = pywb_config(self.TEST_CONFIG)
|
self.app = init_app(create_wb_router,
|
||||||
self.app = create_wb_app(self.router)
|
load_yaml=True,
|
||||||
|
config_file=self.TEST_CONFIG)
|
||||||
|
|
||||||
|
#self.router = pywb_config(self.TEST_CONFIG)
|
||||||
|
#self.app = create_wb_app(self.router)
|
||||||
|
|
||||||
self.testapp = webtest.TestApp(self.app)
|
self.testapp = webtest.TestApp(self.app)
|
||||||
|
|
||||||
def _assert_basic_html(self, resp):
|
def _assert_basic_html(self, resp):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user