mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
more refactoring!
create 'framework' subpackage for general purpose components! contains routing, request/response, exceptions and wsgi wrappers update framework package for pep8 dsrules: using load_config_yaml() (pushed to utils) to init default config
This commit is contained in:
parent
f1acad53fc
commit
f0a0976038
@ -1,5 +1,5 @@
|
|||||||
from pywb.bootstrap.wsgi_wrappers import init_app, start_wsgi_server
|
from pywb.framework.wsgi_wrappers import init_app, start_wsgi_server
|
||||||
from pywb.bootstrap.pywb_init import create_wb_router
|
from pywb.core.pywb_init import create_wb_router
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# init pywb app
|
# init pywb app
|
||||||
|
@ -5,7 +5,7 @@ import time
|
|||||||
|
|
||||||
from pywb.rewrite.wburl import WbUrl
|
from pywb.rewrite.wburl import WbUrl
|
||||||
from pywb.cdx.query import CDXQuery
|
from pywb.cdx.query import CDXQuery
|
||||||
from wbrequestresponse import WbResponse
|
from pywb.framework.wbrequestresponse import WbResponse
|
||||||
from wbexceptions import WbException, NotFoundException
|
from wbexceptions import WbException, NotFoundException
|
||||||
from views import TextCapturesView
|
from views import TextCapturesView
|
||||||
|
|
||||||
|
@ -1,25 +1,25 @@
|
|||||||
from pywb.dispatch.archivalrouter import ArchivalRouter, Route
|
from pywb.framework.archivalrouter import ArchivalRouter, Route
|
||||||
from pywb.dispatch.proxy import ProxyArchivalRouter
|
from pywb.framework.proxy import ProxyArchivalRouter
|
||||||
|
|
||||||
from pywb.warc.recordloader import ArcWarcRecordLoader
|
from pywb.warc.recordloader import ArcWarcRecordLoader
|
||||||
from pywb.warc.resolvingloader import ResolvingLoader
|
from pywb.warc.resolvingloader import ResolvingLoader
|
||||||
|
|
||||||
from pywb.rewrite.rewrite_content import RewriteContent
|
from pywb.rewrite.rewrite_content import RewriteContent
|
||||||
|
|
||||||
from pywb.core.indexreader import IndexReader
|
from indexreader import IndexReader
|
||||||
from pywb.core.views import J2TemplateView, J2HtmlCapturesView
|
from views import J2TemplateView, J2HtmlCapturesView
|
||||||
from pywb.core.handlers import WBHandler
|
from replay_views import ReplayView
|
||||||
from pywb.core.replay_views import ReplayView
|
|
||||||
|
|
||||||
from pywb.core.handlers import CDXHandler, StaticHandler
|
from handlers import WBHandler
|
||||||
from pywb.core.handlers import DebugEchoHandler, DebugEchoEnvHandler
|
from handlers import CDXHandler, StaticHandler
|
||||||
|
from handlers import DebugEchoHandler, DebugEchoEnvHandler
|
||||||
|
|
||||||
from pywb.utils.loaders import BlockLoader
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import yaml
|
import yaml
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
DEFAULTS = {
|
DEFAULTS = {
|
||||||
'hostpaths': ['http://localhost:8080'],
|
'hostpaths': ['http://localhost:8080'],
|
||||||
@ -34,7 +34,7 @@ DEFAULTS = {
|
|||||||
|
|
||||||
'static_routes': {'static/default': 'static/'},
|
'static_routes': {'static/default': 'static/'},
|
||||||
|
|
||||||
'domain_specific_rules': 'rules.yaml',
|
'domain_specific_rules': 'pywb/rules.yaml',
|
||||||
}
|
}
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
@ -2,9 +2,9 @@ import StringIO
|
|||||||
|
|
||||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||||
from pywb.utils.bufferedreaders import ChunkedDataReader
|
from pywb.utils.bufferedreaders import ChunkedDataReader
|
||||||
from wbrequestresponse import WbResponse
|
from pywb.framework.wbrequestresponse import WbResponse
|
||||||
|
|
||||||
from wbexceptions import CaptureException, InternalRedirect
|
from pywb.framework.wbexceptions import CaptureException, InternalRedirect
|
||||||
from pywb.warc.recordloader import ArchiveLoadFailed
|
from pywb.warc.recordloader import ArchiveLoadFailed
|
||||||
|
|
||||||
from pywb.utils.loaders import LimitReader
|
from pywb.utils.loaders import LimitReader
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from pywb.utils.timeutils import timestamp_to_datetime
|
from pywb.utils.timeutils import timestamp_to_datetime
|
||||||
from wbrequestresponse import WbResponse
|
from pywb.framework.wbrequestresponse import WbResponse
|
||||||
|
|
||||||
import urlparse
|
import urlparse
|
||||||
import time
|
import time
|
||||||
|
@ -1,15 +1,17 @@
|
|||||||
import urlparse
|
import urlparse
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from pywb.core.wbrequestresponse import WbRequest, WbResponse
|
|
||||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||||
|
from wbrequestresponse import WbRequest, WbResponse
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# ArchivalRouter -- route WB requests in archival mode
|
# ArchivalRouter -- route WB requests in archival mode
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class ArchivalRouter:
|
class ArchivalRouter(object):
|
||||||
def __init__(self, routes, hostpaths=None, abs_path=True, home_view=None, error_view=None):
|
def __init__(self, routes, hostpaths=None, abs_path=True,
|
||||||
|
home_view=None, error_view=None):
|
||||||
|
|
||||||
self.routes = routes
|
self.routes = routes
|
||||||
self.fallback = ReferRedirect(hostpaths)
|
self.fallback = ReferRedirect(hostpaths)
|
||||||
self.abs_path = abs_path
|
self.abs_path = abs_path
|
||||||
@ -29,26 +31,27 @@ class ArchivalRouter:
|
|||||||
|
|
||||||
return self.fallback(env, self.routes) if self.fallback else None
|
return self.fallback(env, self.routes) if self.fallback else None
|
||||||
|
|
||||||
|
|
||||||
def render_home_page(self):
|
def render_home_page(self):
|
||||||
# render the homepage!
|
# render the homepage!
|
||||||
if self.home_view:
|
if self.home_view:
|
||||||
return self.home_view.render_response(routes = self.routes)
|
return self.home_view.render_response(routes=self.routes)
|
||||||
else:
|
else:
|
||||||
# default home page template
|
# default home page template
|
||||||
text = '\n'.join(map(str, self.routes))
|
text = '\n'.join(map(str, self.routes))
|
||||||
return WbResponse.text_response(text)
|
return WbResponse.text_response(text)
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# Route by matching regex (or fixed prefix)
|
# Route by matching regex (or fixed prefix)
|
||||||
# of request uri (excluding first '/')
|
# of request uri (excluding first '/')
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class Route:
|
class Route(object):
|
||||||
# match upto next / or ? or end
|
# match upto next / or ? or end
|
||||||
SLASH_QUERY_LOOKAHEAD ='(?=/|$|\?)'
|
SLASH_QUERY_LOOKAHEAD = '(?=/|$|\?)'
|
||||||
|
|
||||||
|
def __init__(self, regex, handler, coll_group=0, config={},
|
||||||
|
lookahead=SLASH_QUERY_LOOKAHEAD):
|
||||||
|
|
||||||
def __init__(self, regex, handler, coll_group = 0, config = {}, lookahead = SLASH_QUERY_LOOKAHEAD):
|
|
||||||
self.path = regex
|
self.path = regex
|
||||||
if regex:
|
if regex:
|
||||||
self.regex = re.compile(regex + lookahead)
|
self.regex = re.compile(regex + lookahead)
|
||||||
@ -59,12 +62,11 @@ class Route:
|
|||||||
self.coll_group = coll_group
|
self.coll_group = coll_group
|
||||||
self._custom_init(config)
|
self._custom_init(config)
|
||||||
|
|
||||||
|
|
||||||
def __call__(self, env, use_abs_prefix):
|
def __call__(self, env, use_abs_prefix):
|
||||||
wbrequest = self.parse_request(env, use_abs_prefix)
|
wbrequest = self.parse_request(env, use_abs_prefix)
|
||||||
return self.handler(wbrequest) if wbrequest else None
|
return self.handler(wbrequest) if wbrequest else None
|
||||||
|
|
||||||
def parse_request(self, env, use_abs_prefix, request_uri = None):
|
def parse_request(self, env, use_abs_prefix, request_uri=None):
|
||||||
if not request_uri:
|
if not request_uri:
|
||||||
request_uri = env['REL_REQUEST_URI']
|
request_uri = env['REL_REQUEST_URI']
|
||||||
|
|
||||||
@ -75,10 +77,12 @@ class Route:
|
|||||||
matched_str = matcher.group(0)
|
matched_str = matcher.group(0)
|
||||||
if matched_str:
|
if matched_str:
|
||||||
rel_prefix = env['SCRIPT_NAME'] + '/' + matched_str + '/'
|
rel_prefix = env['SCRIPT_NAME'] + '/' + matched_str + '/'
|
||||||
wb_url_str = request_uri[len(matched_str) + 2:] # remove the '/' + rel_prefix part of uri
|
# remove the '/' + rel_prefix part of uri
|
||||||
|
wb_url_str = request_uri[len(matched_str) + 2:]
|
||||||
else:
|
else:
|
||||||
rel_prefix = env['SCRIPT_NAME'] + '/'
|
rel_prefix = env['SCRIPT_NAME'] + '/'
|
||||||
wb_url_str = request_uri[1:] # the request_uri is the wb_url, since no coll
|
# the request_uri is the wb_url, since no coll
|
||||||
|
wb_url_str = request_uri[1:]
|
||||||
|
|
||||||
coll = matcher.group(self.coll_group)
|
coll = matcher.group(self.coll_group)
|
||||||
|
|
||||||
@ -88,20 +92,19 @@ class Route:
|
|||||||
rel_prefix=rel_prefix,
|
rel_prefix=rel_prefix,
|
||||||
coll=coll,
|
coll=coll,
|
||||||
use_abs_prefix=use_abs_prefix,
|
use_abs_prefix=use_abs_prefix,
|
||||||
wburl_class = self.handler.get_wburl_type(),
|
wburl_class=self.handler.get_wburl_type(),
|
||||||
urlrewriter_class=UrlRewriter)
|
urlrewriter_class=UrlRewriter)
|
||||||
|
|
||||||
|
|
||||||
# Allow for applying of additional filters
|
# Allow for applying of additional filters
|
||||||
self._apply_filters(wbrequest, matcher)
|
self._apply_filters(wbrequest, matcher)
|
||||||
|
|
||||||
return wbrequest
|
return wbrequest
|
||||||
|
|
||||||
|
|
||||||
def _apply_filters(self, wbrequest, matcher):
|
def _apply_filters(self, wbrequest, matcher):
|
||||||
for filter in self.filters:
|
for filter in self.filters:
|
||||||
last_grp = len(matcher.groups())
|
last_grp = len(matcher.groups())
|
||||||
wbrequest.query_filter.append(filter.format(matcher.group(last_grp)))
|
filter_str = filter.format(matcher.group(last_grp))
|
||||||
|
wbrequest.query_filter.append(filter_str)
|
||||||
|
|
||||||
def _custom_init(self, config):
|
def _custom_init(self, config):
|
||||||
self.filters = config.get('filters', [])
|
self.filters = config.get('filters', [])
|
||||||
@ -112,7 +115,8 @@ class Route:
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# ReferRedirect -- redirect urls that have 'fallen through' based on the referrer settings
|
# ReferRedirect -- redirect urls that have 'fallen through'
|
||||||
|
# based on the referrer settings
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class ReferRedirect:
|
class ReferRedirect:
|
||||||
def __init__(self, match_prefixs):
|
def __init__(self, match_prefixs):
|
||||||
@ -121,7 +125,6 @@ class ReferRedirect:
|
|||||||
else:
|
else:
|
||||||
self.match_prefixs = [match_prefixs]
|
self.match_prefixs = [match_prefixs]
|
||||||
|
|
||||||
|
|
||||||
def __call__(self, env, routes):
|
def __call__(self, env, routes):
|
||||||
referrer = env.get('HTTP_REFERER')
|
referrer = env.get('HTTP_REFERER')
|
||||||
|
|
||||||
@ -133,7 +136,7 @@ class ReferRedirect:
|
|||||||
ref_split = urlparse.urlsplit(referrer)
|
ref_split = urlparse.urlsplit(referrer)
|
||||||
|
|
||||||
# ensure referrer starts with one of allowed hosts
|
# ensure referrer starts with one of allowed hosts
|
||||||
if not any (referrer.startswith(i) for i in self.match_prefixs):
|
if not any(referrer.startswith(i) for i in self.match_prefixs):
|
||||||
if ref_split.netloc != env.get('HTTP_HOST'):
|
if ref_split.netloc != env.get('HTTP_HOST'):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -144,13 +147,12 @@ class ReferRedirect:
|
|||||||
if app_path:
|
if app_path:
|
||||||
# must start with current app name, if not root
|
# must start with current app name, if not root
|
||||||
if not path.startswith(app_path):
|
if not path.startswith(app_path):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
path = path[len(app_path):]
|
path = path[len(app_path):]
|
||||||
|
|
||||||
|
|
||||||
for route in routes:
|
for route in routes:
|
||||||
ref_request = route.parse_request(env, False, request_uri = path)
|
ref_request = route.parse_request(env, False, request_uri=path)
|
||||||
if ref_request:
|
if ref_request:
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -174,6 +176,10 @@ class ReferRedirect:
|
|||||||
# 2013/path.html -> /path.html
|
# 2013/path.html -> /path.html
|
||||||
rel_request_uri = rel_request_uri[len(timestamp_path) - 1:]
|
rel_request_uri = rel_request_uri[len(timestamp_path) - 1:]
|
||||||
|
|
||||||
final_url = urlparse.urlunsplit((ref_split.scheme, ref_split.netloc, rewriter.rewrite(rel_request_uri), '', ''))
|
final_url = urlparse.urlunsplit((ref_split.scheme,
|
||||||
|
ref_split.netloc,
|
||||||
|
rewriter.rewrite(rel_request_uri),
|
||||||
|
'',
|
||||||
|
''))
|
||||||
|
|
||||||
return WbResponse.redir_response(final_url)
|
return WbResponse.redir_response(final_url)
|
@ -1,15 +1,19 @@
|
|||||||
from pywb.core.wbrequestresponse import WbResponse, WbRequest
|
from wbrequestresponse import WbResponse, WbRequest
|
||||||
from archivalrouter import ArchivalRouter
|
from archivalrouter import ArchivalRouter
|
||||||
import urlparse
|
import urlparse
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# An experimental router which combines both archival and proxy modes
|
# An experimental router which combines both archival and proxy modes
|
||||||
# http proxy mode support is very simple: only latest capture is available currently
|
# http proxy mode support is very simple so far:
|
||||||
|
# only latest capture is available currently
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
|
||||||
class ProxyArchivalRouter:
|
class ProxyArchivalRouter:
|
||||||
def __init__(self, routes, hostpaths = None, abs_path = True, home_view = None, error_view = None):
|
def __init__(self, routes, hostpaths=None, abs_path=True,
|
||||||
self.archival = ArchivalRouter(routes, hostpaths, abs_path, home_view, error_view)
|
home_view=None, error_view=None):
|
||||||
|
|
||||||
|
self.archival = ArchivalRouter(routes, hostpaths, abs_path,
|
||||||
|
home_view, error_view)
|
||||||
self.proxy = ProxyRouter(routes[0].handler, hostpaths, error_view)
|
self.proxy = ProxyRouter(routes[0].handler, hostpaths, error_view)
|
||||||
self.error_view = error_view
|
self.error_view = error_view
|
||||||
|
|
||||||
@ -29,7 +33,7 @@ class ProxyArchivalRouter:
|
|||||||
# Only supports latest capture replay at the moment
|
# Only supports latest capture replay at the moment
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class ProxyRouter:
|
class ProxyRouter:
|
||||||
def __init__(self, handler, hostpaths = None, error_view = None):
|
def __init__(self, handler, hostpaths=None, error_view=None):
|
||||||
self.handler = handler
|
self.handler = handler
|
||||||
self.hostpaths = hostpaths
|
self.hostpaths = hostpaths
|
||||||
|
|
||||||
@ -56,27 +60,26 @@ class ProxyRouter:
|
|||||||
|
|
||||||
return self.handler(wbrequest)
|
return self.handler(wbrequest)
|
||||||
|
|
||||||
|
|
||||||
# Proxy Auto-Config (PAC) script for the proxy
|
# Proxy Auto-Config (PAC) script for the proxy
|
||||||
def make_pac_response(self, env):
|
def make_pac_response(self, env):
|
||||||
server_hostport = env['SERVER_NAME'] + ':' + env['SERVER_PORT']
|
server_hostport = env['SERVER_NAME'] + ':' + env['SERVER_PORT']
|
||||||
|
|
||||||
buff = 'function FindProxyForURL (url, host) {\n'
|
buff = 'function FindProxyForURL (url, host) {\n'
|
||||||
|
|
||||||
direct_cond =' if (shExpMatch(host, "{0}")) {{ return "DIRECT"; }}\n'
|
direct = ' if (shExpMatch(host, "{0}")) {{ return "DIRECT"; }}\n'
|
||||||
|
|
||||||
for hostpath in self.hostpaths:
|
for hostpath in self.hostpaths:
|
||||||
parts = urlparse.urlsplit(hostpath).netloc.split(':')
|
parts = urlparse.urlsplit(hostpath).netloc.split(':')
|
||||||
buff += direct_cond.format(parts[0])
|
buff += direct.format(parts[0])
|
||||||
|
|
||||||
buff += direct_cond.format(env['SERVER_NAME'])
|
buff += direct.format(env['SERVER_NAME'])
|
||||||
|
|
||||||
#buff += '\n return "PROXY {0}";\n}}\n'.format(self.hostpaths[0])
|
#buff += '\n return "PROXY {0}";\n}}\n'.format(self.hostpaths[0])
|
||||||
buff += '\n return "PROXY {0}";\n}}\n'.format(server_hostport)
|
buff += '\n return "PROXY {0}";\n}}\n'.format(server_hostport)
|
||||||
|
|
||||||
return WbResponse.text_response(buff, content_type = 'application/x-ns-proxy-autoconfig')
|
content_type = 'application/x-ns-proxy-autoconfig'
|
||||||
|
|
||||||
|
|
||||||
|
return WbResponse.text_response(buff, content_type=content_type)
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
@ -85,10 +88,11 @@ class ProxyRouter:
|
|||||||
class ProxyHttpsUrlRewriter:
|
class ProxyHttpsUrlRewriter:
|
||||||
HTTP = 'http://'
|
HTTP = 'http://'
|
||||||
HTTPS = 'https://'
|
HTTPS = 'https://'
|
||||||
|
|
||||||
def __init__(self, wbrequest, prefix):
|
def __init__(self, wbrequest, prefix):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def rewrite(self, url, mod = None):
|
def rewrite(self, url, mod=None):
|
||||||
if url.startswith(self.HTTPS):
|
if url.startswith(self.HTTPS):
|
||||||
return self.HTTP + url[len(self.HTTPS):]
|
return self.HTTP + url[len(self.HTTPS):]
|
||||||
else:
|
else:
|
||||||
@ -97,6 +101,5 @@ class ProxyHttpsUrlRewriter:
|
|||||||
def get_timestamp_url(self, timestamp, url):
|
def get_timestamp_url(self, timestamp, url):
|
||||||
return url
|
return url
|
||||||
|
|
||||||
def get_abs_url(self, url = ''):
|
def get_abs_url(self, url=''):
|
||||||
return url
|
return url
|
||||||
|
|
@ -84,7 +84,7 @@ False
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from pywb.dispatch.archivalrouter import Route, ReferRedirect
|
from pywb.framework.archivalrouter import Route, ReferRedirect
|
||||||
from pywb.core.handlers import BaseHandler, WbUrlHandler
|
from pywb.core.handlers import BaseHandler, WbUrlHandler
|
||||||
import pprint
|
import pprint
|
||||||
|
|
@ -41,7 +41,7 @@ from pywb.rewrite.wburl import WbUrl
|
|||||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||||
from pywb.utils.statusandheaders import StatusAndHeaders
|
from pywb.utils.statusandheaders import StatusAndHeaders
|
||||||
|
|
||||||
from pywb.core.wbrequestresponse import WbRequest, WbResponse
|
from pywb.framework.wbrequestresponse import WbRequest, WbResponse
|
||||||
|
|
||||||
|
|
||||||
def print_req_from_uri(request_uri, env={}, use_abs_prefix=False):
|
def print_req_from_uri(request_uri, env={}, use_abs_prefix=False):
|
@ -26,7 +26,6 @@ class WbRequest:
|
|||||||
except KeyError:
|
except KeyError:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, env,
|
def __init__(self, env,
|
||||||
request_uri=None,
|
request_uri=None,
|
||||||
rel_prefix='',
|
rel_prefix='',
|
||||||
@ -40,7 +39,10 @@ class WbRequest:
|
|||||||
|
|
||||||
self.env = env
|
self.env = env
|
||||||
|
|
||||||
self.request_uri = request_uri if request_uri else env.get('REL_REQUEST_URI')
|
if request_uri:
|
||||||
|
self.request_uri = request_uri
|
||||||
|
else:
|
||||||
|
self.request_uri = env.get('REL_REQUEST_URI')
|
||||||
|
|
||||||
self.coll = coll
|
self.coll = coll
|
||||||
|
|
||||||
@ -55,7 +57,6 @@ class WbRequest:
|
|||||||
else:
|
else:
|
||||||
self.wb_prefix = rel_prefix
|
self.wb_prefix = rel_prefix
|
||||||
|
|
||||||
|
|
||||||
if not wb_url_str:
|
if not wb_url_str:
|
||||||
wb_url_str = '/'
|
wb_url_str = '/'
|
||||||
|
|
||||||
@ -83,7 +84,6 @@ class WbRequest:
|
|||||||
# PERF
|
# PERF
|
||||||
env['X_PERF'] = {}
|
env['X_PERF'] = {}
|
||||||
|
|
||||||
|
|
||||||
def _is_ajax(self):
|
def _is_ajax(self):
|
||||||
value = self.env.get('HTTP_X_REQUESTED_WITH')
|
value = self.env.get('HTTP_X_REQUESTED_WITH')
|
||||||
if not value:
|
if not value:
|
||||||
@ -96,7 +96,6 @@ class WbRequest:
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
varlist = vars(self)
|
varlist = vars(self)
|
||||||
varstr = pprint.pformat(varlist)
|
varstr = pprint.pformat(varlist)
|
||||||
@ -111,32 +110,39 @@ class WbResponse:
|
|||||||
Holds a status_headers object and a response iter, to be
|
Holds a status_headers object and a response iter, to be
|
||||||
returned to wsgi container.
|
returned to wsgi container.
|
||||||
"""
|
"""
|
||||||
def __init__(self, status_headers, value = []):
|
def __init__(self, status_headers, value=[]):
|
||||||
self.status_headers = status_headers
|
self.status_headers = status_headers
|
||||||
self.body = value
|
self.body = value
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def text_stream(text, status = '200 OK', content_type = 'text/plain'):
|
def text_stream(stream, status='200 OK', content_type='text/plain'):
|
||||||
return WbResponse(StatusAndHeaders(status, [('Content-Type', content_type)]), value = text)
|
status_headers = StatusAndHeaders(status,
|
||||||
|
[('Content-Type', content_type)])
|
||||||
|
|
||||||
|
return WbResponse(status_headers, value=stream)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def text_response(text, status = '200 OK', content_type = 'text/plain'):
|
def text_response(text, status='200 OK', content_type='text/plain'):
|
||||||
return WbResponse(StatusAndHeaders(status, [('Content-Type', content_type)]), value = [text])
|
status_headers = StatusAndHeaders(status,
|
||||||
|
[('Content-Type', content_type)])
|
||||||
|
|
||||||
|
return WbResponse(status_headers, value=[text])
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def redir_response(location, status = '302 Redirect'):
|
def redir_response(location, status='302 Redirect'):
|
||||||
return WbResponse(StatusAndHeaders(status, [('Location', location)]))
|
return WbResponse(StatusAndHeaders(status,
|
||||||
|
[('Location', location)]))
|
||||||
|
|
||||||
def __call__(self, env, start_response):
|
def __call__(self, env, start_response):
|
||||||
|
|
||||||
# PERF
|
# PERF
|
||||||
perfstats = env.get('X_PERF')
|
perfstats = env.get('X_PERF')
|
||||||
if perfstats:
|
if perfstats:
|
||||||
self.status_headers.headers.append(('X-Archive-Perf-Stats', str(perfstats)))
|
self.status_headers.headers.append(('X-Archive-Perf-Stats',
|
||||||
|
str(perfstats)))
|
||||||
|
|
||||||
|
start_response(self.status_headers.statusline,
|
||||||
start_response(self.status_headers.statusline, self.status_headers.headers)
|
self.status_headers.headers)
|
||||||
|
|
||||||
if env['REQUEST_METHOD'] == 'HEAD':
|
if env['REQUEST_METHOD'] == 'HEAD':
|
||||||
if hasattr(self.body, 'close'):
|
if hasattr(self.body, 'close'):
|
||||||
@ -148,6 +154,5 @@ class WbResponse:
|
|||||||
else:
|
else:
|
||||||
return [str(self.body)]
|
return [str(self.body)]
|
||||||
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return str(vars(self))
|
return str(vars(self))
|
@ -1,8 +1,9 @@
|
|||||||
from pywb.utils.wbexception import WbException
|
from pywb.utils.wbexception import WbException
|
||||||
from pywb.core.wbexceptions import NotFoundException, InternalRedirect
|
from pywb.utils.loaders import load_yaml_config
|
||||||
from pywb.core.wbrequestresponse import WbResponse, StatusAndHeaders
|
|
||||||
|
from wbexceptions import NotFoundException, InternalRedirect
|
||||||
|
from wbrequestresponse import WbResponse, StatusAndHeaders
|
||||||
|
|
||||||
from pywb.utils.loaders import BlockLoader
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import importlib
|
import importlib
|
||||||
@ -10,10 +11,13 @@ import logging
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# adapted from wsgiref.request_uri, but doesn't include domain name and allows all characters
|
# adapted from wsgiref.request_uri, but doesn't include domain name
|
||||||
# allowed in the path segment according to: http://tools.ietf.org/html/rfc3986#section-3.3
|
# and allows all characters which are allowed in the path segment
|
||||||
|
# according to: http://tools.ietf.org/html/rfc3986#section-3.3
|
||||||
# explained here:
|
# explained here:
|
||||||
# http://stackoverflow.com/questions/4669692/valid-characters-for-directory-part-of-a-url-for-short-links
|
# http://stackoverflow.com/questions/4669692/
|
||||||
|
# valid-characters-for-directory-part-of-a-url-for-short-links
|
||||||
|
|
||||||
def rel_request_uri(environ, include_query=1):
|
def rel_request_uri(environ, include_query=1):
|
||||||
"""
|
"""
|
||||||
Return the requested path, optionally including the query string
|
Return the requested path, optionally including the query string
|
||||||
@ -28,7 +32,7 @@ def rel_request_uri(environ, include_query=1):
|
|||||||
"/web/example.com/0~!+$&'()*+,;=:%22"
|
"/web/example.com/0~!+$&'()*+,;=:%22"
|
||||||
"""
|
"""
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
url = quote(environ.get('PATH_INFO',''), safe='/~!$&\'()*+,;=:@')
|
url = quote(environ.get('PATH_INFO', ''), safe='/~!$&\'()*+,;=:@')
|
||||||
if include_query and environ.get('QUERY_STRING'):
|
if include_query and environ.get('QUERY_STRING'):
|
||||||
url += '?' + environ['QUERY_STRING']
|
url += '?' + environ['QUERY_STRING']
|
||||||
|
|
||||||
@ -50,7 +54,8 @@ def create_wb_app(wb_router):
|
|||||||
response = wb_router(env)
|
response = wb_router(env)
|
||||||
|
|
||||||
if not response:
|
if not response:
|
||||||
raise NotFoundException('No handler for "{0}"'.format(env['REL_REQUEST_URI']))
|
msg = 'No handler for "{0}"'.format(env['REL_REQUEST_URI'])
|
||||||
|
raise NotFoundException(msg)
|
||||||
|
|
||||||
except InternalRedirect as ir:
|
except InternalRedirect as ir:
|
||||||
response = WbResponse(StatusAndHeaders(ir.status, ir.httpHeaders))
|
response = WbResponse(StatusAndHeaders(ir.status, ir.httpHeaders))
|
||||||
@ -63,7 +68,6 @@ def create_wb_app(wb_router):
|
|||||||
|
|
||||||
return response(env, start_response)
|
return response(env, start_response)
|
||||||
|
|
||||||
|
|
||||||
return application
|
return application
|
||||||
|
|
||||||
|
|
||||||
@ -94,16 +98,6 @@ def handle_exception(env, error_view, exc, print_trace):
|
|||||||
#=================================================================
|
#=================================================================
|
||||||
DEFAULT_CONFIG_FILE = 'config.yaml'
|
DEFAULT_CONFIG_FILE = 'config.yaml'
|
||||||
|
|
||||||
def load_yaml_config(config_file=None):
|
|
||||||
import yaml
|
|
||||||
|
|
||||||
if not config_file:
|
|
||||||
config_file = DEFAULT_CONFIG_FILE
|
|
||||||
|
|
||||||
configdata = BlockLoader().load(config_file)
|
|
||||||
config = yaml.load(configdata)
|
|
||||||
return config
|
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def init_app(init_func, load_yaml=True, config_file=None):
|
def init_app(init_func, load_yaml=True, config_file=None):
|
||||||
@ -114,6 +108,9 @@ def init_app(init_func, load_yaml=True, config_file=None):
|
|||||||
if load_yaml:
|
if load_yaml:
|
||||||
if not config_file:
|
if not config_file:
|
||||||
config_file = os.environ.get('PYWB_CONFIG_FILE')
|
config_file = os.environ.get('PYWB_CONFIG_FILE')
|
||||||
|
if not config_file:
|
||||||
|
config_file = DEFAULT_CONFIG_FILE
|
||||||
|
|
||||||
config = load_yaml_config(config_file)
|
config = load_yaml_config(config_file)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -135,6 +132,7 @@ def init_app(init_func, load_yaml=True, config_file=None):
|
|||||||
#=================================================================
|
#=================================================================
|
||||||
DEFAULT_PORT = 8080
|
DEFAULT_PORT = 8080
|
||||||
|
|
||||||
|
|
||||||
def start_wsgi_server(the_app):
|
def start_wsgi_server(the_app):
|
||||||
from wsgiref.simple_server import make_server
|
from wsgiref.simple_server import make_server
|
||||||
from optparse import OptionParser
|
from optparse import OptionParser
|
||||||
@ -153,7 +151,6 @@ def start_wsgi_server(the_app):
|
|||||||
except:
|
except:
|
||||||
port = DEFAULT_PORT
|
port = DEFAULT_PORT
|
||||||
|
|
||||||
|
|
||||||
logging.debug('Starting CDX Server on port %s', port)
|
logging.debug('Starting CDX Server on port %s', port)
|
||||||
|
|
||||||
try:
|
try:
|
@ -1,11 +1,10 @@
|
|||||||
import yaml
|
|
||||||
import pkgutil
|
import pkgutil
|
||||||
|
from loaders import load_yaml_config
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
|
||||||
DEFAULT_RULES_FILE = 'rules.yaml'
|
DEFAULT_RULES_FILE = 'pywb/rules.yaml'
|
||||||
DEFAULT_RULES_PKG = 'pywb'
|
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class RuleSet(object):
|
class RuleSet(object):
|
||||||
@ -23,10 +22,14 @@ class RuleSet(object):
|
|||||||
|
|
||||||
self.rules = []
|
self.rules = []
|
||||||
|
|
||||||
ds_rules_file = kwargs.get('ds_rules_file')
|
|
||||||
default_rule_config = kwargs.get('default_rule_config')
|
default_rule_config = kwargs.get('default_rule_config')
|
||||||
|
|
||||||
config = self.load_default_rules(ds_rules_file)
|
ds_rules_file = kwargs.get('ds_rules_file')
|
||||||
|
|
||||||
|
if not ds_rules_file:
|
||||||
|
ds_rules_file = DEFAULT_RULES_FILE
|
||||||
|
|
||||||
|
config = load_yaml_config(ds_rules_file)
|
||||||
|
|
||||||
rulesmap = config.get('rules') if config else None
|
rulesmap = config.get('rules') if config else None
|
||||||
|
|
||||||
@ -53,22 +56,6 @@ class RuleSet(object):
|
|||||||
if not def_key_found and default_rule_config is not None:
|
if not def_key_found and default_rule_config is not None:
|
||||||
self.rules.append(rule_cls(self.DEFAULT_KEY, default_rule_config))
|
self.rules.append(rule_cls(self.DEFAULT_KEY, default_rule_config))
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def load_default_rules(filename=None, pkg=None):
|
|
||||||
config = None
|
|
||||||
|
|
||||||
if not filename:
|
|
||||||
filename = DEFAULT_RULES_FILE
|
|
||||||
|
|
||||||
if not pkg:
|
|
||||||
pkg = DEFAULT_RULES_PKG
|
|
||||||
|
|
||||||
if filename:
|
|
||||||
yaml_str = pkgutil.get_data(pkg, filename)
|
|
||||||
config = yaml.load(yaml_str)
|
|
||||||
|
|
||||||
return config
|
|
||||||
|
|
||||||
def iter_matching(self, urlkey):
|
def iter_matching(self, urlkey):
|
||||||
"""
|
"""
|
||||||
Iterate over all matching rules for given urlkey
|
Iterate over all matching rules for given urlkey
|
||||||
|
@ -7,12 +7,20 @@ import os
|
|||||||
import hmac
|
import hmac
|
||||||
import urllib2
|
import urllib2
|
||||||
import time
|
import time
|
||||||
from pkg_resources import resource_stream
|
import pkg_resources
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def is_http(filename):
|
def is_http(filename):
|
||||||
return any(filename.startswith(x) for x in ['http://', 'https://'])
|
return filename.startswith(('http://', 'https://'))
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
def load_yaml_config(config_file):
|
||||||
|
import yaml
|
||||||
|
configdata = BlockLoader().load(config_file)
|
||||||
|
config = yaml.load(configdata)
|
||||||
|
return config
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
@ -39,16 +47,27 @@ class BlockLoader(object):
|
|||||||
Load a file-like reader from the local file system
|
Load a file-like reader from the local file system
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
file_only = False
|
||||||
|
|
||||||
if url.startswith('file://'):
|
if url.startswith('file://'):
|
||||||
url = url[len('file://'):]
|
url = url[len('file://'):]
|
||||||
|
file_only = True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# first, try as file
|
# first, try as file
|
||||||
afile = open(url, 'rb')
|
afile = open(url, 'rb')
|
||||||
except IOError as file_err:
|
|
||||||
|
except IOError:
|
||||||
|
#if file_only:
|
||||||
|
# raise
|
||||||
|
|
||||||
# then, try as package.path/file
|
# then, try as package.path/file
|
||||||
pkg_split = url.split('/', 1)
|
pkg_split = url.split('/', 1)
|
||||||
afile = resource_stream(pkg_split[0], pkg_split[1])
|
#if len(pkg_split) == 1:
|
||||||
|
# raise
|
||||||
|
|
||||||
|
afile = pkg_resources.resource_stream(pkg_split[0],
|
||||||
|
pkg_split[1])
|
||||||
|
|
||||||
if offset > 0:
|
if offset > 0:
|
||||||
afile.seek(offset)
|
afile.seek(offset)
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import webtest
|
import webtest
|
||||||
from pywb.bootstrap.pywb_init import create_wb_router
|
from pywb.core.pywb_init import create_wb_router
|
||||||
from pywb.bootstrap.wsgi_wrappers import init_app
|
from pywb.framework.wsgi_wrappers import init_app
|
||||||
from pywb.cdx.cdxobject import CDXObject
|
from pywb.cdx.cdxobject import CDXObject
|
||||||
|
|
||||||
from fixture import TestExclusionPerms
|
from fixture import TestExclusionPerms
|
||||||
|
Loading…
x
Reference in New Issue
Block a user