mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
new-wayback cli script, using new FrontEndApp (rewriting) + AutoConfigApp (config-driven aggregator)
support for dynamic collections: check all .cdxj files in /<coll>/indexes/*.cdxj when accessing /<coll> support for fixed routes: specified in config.yaml as per https://github.com/ikreymer/pywb/wiki/Distributed-Archive-Config werkzeug routing in FrontEndApp: default query, replay, search pages working route listing: /_coll_info.json for listing fixed + dynamic routes autoindexing enabled, indexing WARCs added to archives directory to .cdxj index Addresses #196
This commit is contained in:
parent
531422fc1b
commit
31bf7a47f1
@ -1,4 +1,6 @@
|
|||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def cdx_server(args=None): #pragma: no cover
|
def cdx_server(args=None): #pragma: no cover
|
||||||
@ -26,6 +28,11 @@ def webagg():
|
|||||||
WebaggCli().run()
|
WebaggCli().run()
|
||||||
|
|
||||||
|
|
||||||
|
#=============================================================================
|
||||||
|
def new_wayback():
|
||||||
|
NewWaybackCli().run()
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
class BaseCli(object):
|
class BaseCli(object):
|
||||||
def __init__(self, args=None, default_port=8080, desc=''):
|
def __init__(self, args=None, default_port=8080, desc=''):
|
||||||
@ -33,6 +40,7 @@ class BaseCli(object):
|
|||||||
parser.add_argument('-p', '--port', type=int, default=default_port)
|
parser.add_argument('-p', '--port', type=int, default=default_port)
|
||||||
parser.add_argument('-t', '--threads', type=int, default=4)
|
parser.add_argument('-t', '--threads', type=int, default=4)
|
||||||
parser.add_argument('-s', '--server', default='gevent')
|
parser.add_argument('-s', '--server', default='gevent')
|
||||||
|
parser.add_argument('--debug', action='store_true')
|
||||||
|
|
||||||
self.desc = desc
|
self.desc = desc
|
||||||
|
|
||||||
@ -40,12 +48,15 @@ class BaseCli(object):
|
|||||||
|
|
||||||
self.r = parser.parse_args(args)
|
self.r = parser.parse_args(args)
|
||||||
|
|
||||||
|
logging.basicConfig(format='%(asctime)s: [%(levelname)s]: %(message)s',
|
||||||
|
level=logging.DEBUG if self.r.debug else logging.INFO)
|
||||||
|
|
||||||
if self.r.server == 'gevent':
|
if self.r.server == 'gevent':
|
||||||
try:
|
try:
|
||||||
from gevent.monkey import patch_all; patch_all()
|
from gevent.monkey import patch_all; patch_all()
|
||||||
print('Using Gevent')
|
logging.debug('Using Gevent')
|
||||||
except:
|
except:
|
||||||
print('No Gevent')
|
logging.debug('No Gevent')
|
||||||
self.r.server = 'wsgiref'
|
self.r.server = 'wsgiref'
|
||||||
|
|
||||||
from pywb.framework.wsgi_wrappers import init_app
|
from pywb.framework.wsgi_wrappers import init_app
|
||||||
@ -69,7 +80,7 @@ class BaseCli(object):
|
|||||||
|
|
||||||
def run_waitress(self): #pragma: no cover
|
def run_waitress(self): #pragma: no cover
|
||||||
from waitress import serve
|
from waitress import serve
|
||||||
print(self.desc)
|
logging.debug(str(self.desc))
|
||||||
serve(self.application, port=self.r.port, threads=self.r.threads)
|
serve(self.application, port=self.r.port, threads=self.r.threads)
|
||||||
|
|
||||||
def run_wsgiref(self): #pragma: no cover
|
def run_wsgiref(self): #pragma: no cover
|
||||||
@ -78,7 +89,7 @@ class BaseCli(object):
|
|||||||
|
|
||||||
def run_gevent(self):
|
def run_gevent(self):
|
||||||
from gevent.pywsgi import WSGIServer
|
from gevent.pywsgi import WSGIServer
|
||||||
print('Starting Gevent Server on ' + str(self.r.port))
|
logging.info('Starting Gevent Server on ' + str(self.r.port))
|
||||||
WSGIServer(('', self.r.port), self.application).serve_forever()
|
WSGIServer(('', self.r.port), self.application).serve_forever()
|
||||||
|
|
||||||
|
|
||||||
@ -105,6 +116,7 @@ class LiveCli(BaseCli):
|
|||||||
class ReplayCli(BaseCli):
|
class ReplayCli(BaseCli):
|
||||||
def _extend_parser(self, parser):
|
def _extend_parser(self, parser):
|
||||||
parser.add_argument('-a', '--autoindex', action='store_true')
|
parser.add_argument('-a', '--autoindex', action='store_true')
|
||||||
|
parser.add_argument('--auto-interval', type=int, default=30)
|
||||||
|
|
||||||
help_dir='Specify root archive dir (default is current working directory)'
|
help_dir='Specify root archive dir (default is current working directory)'
|
||||||
parser.add_argument('-d', '--directory', help=help_dir)
|
parser.add_argument('-d', '--directory', help=help_dir)
|
||||||
@ -118,7 +130,6 @@ class ReplayCli(BaseCli):
|
|||||||
if self.r.autoindex:
|
if self.r.autoindex:
|
||||||
from pywb.manager.manager import CollectionsManager
|
from pywb.manager.manager import CollectionsManager
|
||||||
import os
|
import os
|
||||||
import logging
|
|
||||||
|
|
||||||
m = CollectionsManager('', must_exist=False)
|
m = CollectionsManager('', must_exist=False)
|
||||||
if not os.path.isdir(m.colls_dir):
|
if not os.path.isdir(m.colls_dir):
|
||||||
@ -127,12 +138,13 @@ class ReplayCli(BaseCli):
|
|||||||
import sys
|
import sys
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
else:
|
else:
|
||||||
msg = 'Auto-Indexing Enabled on "{0}"'
|
msg = 'Auto-Indexing Enabled on "{0}", checking every {1} secs'
|
||||||
logging.info(msg.format(m.colls_dir))
|
logging.info(msg.format(m.colls_dir, self.r.auto_interval))
|
||||||
m.autoindex(do_loop=False)
|
m.autoindex(interval=self.r.auto_interval, do_loop=False)
|
||||||
|
|
||||||
super(ReplayCli, self).run()
|
super(ReplayCli, self).run()
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
class CdxCli(ReplayCli): #pragma: no cover
|
class CdxCli(ReplayCli): #pragma: no cover
|
||||||
def load(self):
|
def load(self):
|
||||||
@ -161,6 +173,18 @@ class WebaggCli(BaseCli):
|
|||||||
self.run_gevent()
|
self.run_gevent()
|
||||||
|
|
||||||
|
|
||||||
|
#=============================================================================
|
||||||
|
class NewWaybackCli(ReplayCli):
|
||||||
|
def load(self):
|
||||||
|
from pywb.apps.newwayback import application
|
||||||
|
return application
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
self.r.server = 'gevent'
|
||||||
|
super(NewWaybackCli, self).run()
|
||||||
|
#self.run_gevent()
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
wayback()
|
wayback()
|
||||||
|
6
pywb/apps/newwayback.py
Normal file
6
pywb/apps/newwayback.py
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
from gevent.monkey import patch_all; patch_all()
|
||||||
|
from pywb.urlrewrite.frontendapp import FrontEndApp
|
||||||
|
|
||||||
|
application = FrontEndApp()
|
||||||
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
|||||||
from gevent.monkey import patch_all; patch_all()
|
from gevent.monkey import patch_all; patch_all()
|
||||||
from pywb.webagg.autoapp import AutoConfigApp
|
from pywb.webagg.autoapp import AutoConfigApp
|
||||||
|
|
||||||
application = AutoConfigApp().init()
|
application = AutoConfigApp()
|
||||||
|
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@ from pywb.utils.loaders import extract_post_query, append_post_query
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import pprint
|
import pprint
|
||||||
import re
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
@ -246,6 +247,10 @@ class WbResponse(object):
|
|||||||
|
|
||||||
return WbResponse(status_headers, value=[encoded_text])
|
return WbResponse(status_headers, value=[encoded_text])
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def json_response(obj, status='200 OK', content_type='application/json; charset=utf-8'):
|
||||||
|
return WbResponse.text_response(json.dumps(obj), status, content_type)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def redir_response(location, status='302 Redirect', headers=None):
|
def redir_response(location, status='302 Redirect', headers=None):
|
||||||
redir_headers = [('Location', location), ('Content-Length', '0')]
|
redir_headers = [('Location', location), ('Content-Length', '0')]
|
||||||
|
@ -133,10 +133,6 @@ DEFAULT_CONFIG_FILE = 'config.yaml'
|
|||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def init_app(init_func, load_yaml=True, config_file=None, config=None):
|
def init_app(init_func, load_yaml=True, config_file=None, config=None):
|
||||||
logging.basicConfig(format='%(asctime)s: [%(levelname)s]: %(message)s',
|
|
||||||
level=logging.DEBUG)
|
|
||||||
logging.debug('')
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
config = config or {}
|
config = config or {}
|
||||||
if load_yaml:
|
if load_yaml:
|
||||||
|
123
pywb/urlrewrite/frontendapp.py
Normal file
123
pywb/urlrewrite/frontendapp.py
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
from gevent.monkey import patch_all; patch_all()
|
||||||
|
|
||||||
|
#from bottle import run, Bottle, request, response, debug
|
||||||
|
from werkzeug.routing import Map, Rule
|
||||||
|
from werkzeug.exceptions import HTTPException
|
||||||
|
from werkzeug.wsgi import pop_path_info
|
||||||
|
|
||||||
|
from pywb.webagg.autoapp import AutoConfigApp
|
||||||
|
from pywb.webapp.handlers import StaticHandler
|
||||||
|
|
||||||
|
from pywb.framework.wbrequestresponse import WbResponse
|
||||||
|
|
||||||
|
from pywb.urlrewrite.geventserver import GeventServer
|
||||||
|
from pywb.urlrewrite.templateview import BaseInsertView
|
||||||
|
|
||||||
|
from pywb.urlrewrite.rewriterapp import RewriterApp, UpstreamException
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class NewWbRequest(object):
|
||||||
|
def __init__(self, env, wb_url_str, full_prefix):
|
||||||
|
self.env = env
|
||||||
|
self.wb_url_str = wb_url_str
|
||||||
|
self.full_prefix = full_prefix
|
||||||
|
self.user_metadata = {}
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class FrontEndApp(RewriterApp):
|
||||||
|
def __init__(self, config_file='./config.yaml', custom_config=None):
|
||||||
|
super(FrontEndApp, self).__init__(True)
|
||||||
|
|
||||||
|
self.debug = True
|
||||||
|
self.webagg = AutoConfigApp(config_file=config_file,
|
||||||
|
custom_config=custom_config)
|
||||||
|
|
||||||
|
self.webagg_server = GeventServer(self.webagg, port=0)
|
||||||
|
|
||||||
|
self.static_handler = StaticHandler('pywb/static/')
|
||||||
|
|
||||||
|
self.url_map = Map()
|
||||||
|
self.url_map.add(Rule('/static/__pywb/<path:filepath>', endpoint=self.serve_static))
|
||||||
|
self.url_map.add(Rule('/<coll>/', endpoint=self.serve_coll_page))
|
||||||
|
self.url_map.add(Rule('/<coll>/<path:url>', endpoint=self.serve_content))
|
||||||
|
self.url_map.add(Rule('/_coll_info.json', endpoint=self.serve_listing))
|
||||||
|
|
||||||
|
self.paths = self.get_upstream_paths(self.webagg_server.port)
|
||||||
|
|
||||||
|
def get_upstream_paths(self, port):
|
||||||
|
return {'replay-dyn': 'http://localhost:%s/_/resource/postreq?param.coll={coll}' % port,
|
||||||
|
'replay-fixed': 'http://localhost:%s/{coll}/resource/postreq' % port
|
||||||
|
}
|
||||||
|
|
||||||
|
def serve_static(self, environ, filepath=''):
|
||||||
|
return self.static_handler(NewWbRequest(environ, filepath, ''))
|
||||||
|
|
||||||
|
def serve_coll_page(self, environ, coll):
|
||||||
|
view = BaseInsertView(self.jinja_env, 'search.html')
|
||||||
|
wbrequest = NewWbRequest(environ, '', '/')
|
||||||
|
return WbResponse.text_response(view.render_to_string(environ, wbrequest=wbrequest),
|
||||||
|
content_type='text/html; charset="utf-8"')
|
||||||
|
|
||||||
|
def serve_listing(self, environ):
|
||||||
|
result = {'fixed': self.webagg.list_fixed_routes(),
|
||||||
|
'dynamic': self.webagg.list_dynamic_routes()
|
||||||
|
}
|
||||||
|
|
||||||
|
return WbResponse.json_response(result)
|
||||||
|
|
||||||
|
def serve_content(self, environ, coll='', url=''):
|
||||||
|
pop_path_info(environ)
|
||||||
|
wb_url = self.get_wburl(environ)
|
||||||
|
|
||||||
|
kwargs = {'coll': coll}
|
||||||
|
|
||||||
|
if coll in self.webagg.list_fixed_routes():
|
||||||
|
kwargs['type'] = 'replay-fixed'
|
||||||
|
else:
|
||||||
|
kwargs['type'] = 'replay-dyn'
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = self.render_content(wb_url, kwargs, environ)
|
||||||
|
except UpstreamException as ue:
|
||||||
|
response = self.handle_error(environ, ue)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
def __call__(self, environ, start_response):
|
||||||
|
urls = self.url_map.bind_to_environ(environ)
|
||||||
|
try:
|
||||||
|
endpoint, args = urls.match()
|
||||||
|
except HTTPException as e:
|
||||||
|
return e(environ, start_response)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = endpoint(environ, **args)
|
||||||
|
|
||||||
|
return response(environ, start_response)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
if self.debug:
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
#message = 'Internal Error: ' + str(e)
|
||||||
|
#status = 500
|
||||||
|
#return self.send_error({}, start_response,
|
||||||
|
# message=message,
|
||||||
|
# status=status)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def create_app(cls, port):
|
||||||
|
app = FrontEndApp()
|
||||||
|
app_server = GeventServer(app, port=port, hostname='0.0.0.0')
|
||||||
|
return app_server
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app_server = FrontEndApp.create_app(port=8080)
|
||||||
|
app_server.join()
|
||||||
|
|
||||||
|
|
36
pywb/urlrewrite/geventserver.py
Normal file
36
pywb/urlrewrite/geventserver.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
from gevent.wsgi import WSGIServer
|
||||||
|
from gevent import spawn
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class GeventServer(object):
|
||||||
|
def __init__(self, app, port=0, hostname='localhost', handler_class=None):
|
||||||
|
self.port = port
|
||||||
|
self.make_server(app, port, hostname, handler_class)
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
if self.server:
|
||||||
|
logging.debug('stopping server on ' + str(self.port))
|
||||||
|
self.server.stop()
|
||||||
|
|
||||||
|
def _run(self, server, port):
|
||||||
|
logging.debug('starting server on ' + str(port))
|
||||||
|
try:
|
||||||
|
server.serve_forever()
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug('server failed to start on ' + str(port))
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
def make_server(self, app, port, hostname, handler_class):
|
||||||
|
server = WSGIServer((hostname, port), app, handler_class=handler_class)
|
||||||
|
server.init_socket()
|
||||||
|
self.port = server.address[1]
|
||||||
|
|
||||||
|
self.server = server
|
||||||
|
self.ge = spawn(self._run, server, self.port)
|
||||||
|
|
||||||
|
def join(self):
|
||||||
|
self.ge.join()
|
||||||
|
|
||||||
|
|
@ -41,7 +41,7 @@ class RewriterApp(object):
|
|||||||
self.loader = ArcWarcRecordLoader()
|
self.loader = ArcWarcRecordLoader()
|
||||||
|
|
||||||
config = config or {}
|
config = config or {}
|
||||||
self.paths = config['url_templates']
|
self.paths = {}
|
||||||
|
|
||||||
self.framed_replay = framed_replay
|
self.framed_replay = framed_replay
|
||||||
self.frame_mod = ''
|
self.frame_mod = ''
|
||||||
@ -395,13 +395,14 @@ class RewriterApp(object):
|
|||||||
|
|
||||||
def get_base_url(self, wb_url, kwargs):
|
def get_base_url(self, wb_url, kwargs):
|
||||||
type = kwargs.get('type')
|
type = kwargs.get('type')
|
||||||
return self.paths[type]
|
return self.paths[type].format(**kwargs)
|
||||||
|
|
||||||
def get_upstream_url(self, wb_url, kwargs, params):
|
def get_upstream_url(self, wb_url, kwargs, params):
|
||||||
base_url = self.get_base_url(wb_url, kwargs)
|
base_url = self.get_base_url(wb_url, kwargs)
|
||||||
param_str = urlencode(params, True)
|
param_str = urlencode(params, True)
|
||||||
if param_str:
|
if param_str:
|
||||||
base_url += '&' + param_str
|
q_char = '&' if '?' in base_url else '?'
|
||||||
|
base_url += q_char + param_str
|
||||||
return base_url
|
return base_url
|
||||||
|
|
||||||
def get_cookie_key(self, kwargs):
|
def get_cookie_key(self, kwargs):
|
||||||
|
@ -1,74 +0,0 @@
|
|||||||
from gevent.monkey import patch_all; patch_all()
|
|
||||||
|
|
||||||
from bottle import run, Bottle, request, response, debug
|
|
||||||
|
|
||||||
from six.moves.urllib.parse import quote
|
|
||||||
|
|
||||||
from pywb.utils.loaders import LocalFileLoader
|
|
||||||
|
|
||||||
import mimetypes
|
|
||||||
import redis
|
|
||||||
|
|
||||||
from pywb.urlrewrite.rewriterapp import RewriterApp
|
|
||||||
from pywb.urlrewrite.cookies import CookieTracker
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
class RWApp(RewriterApp):
|
|
||||||
def __init__(self, upstream_urls, cookie_key_templ, redis):
|
|
||||||
config = {}
|
|
||||||
config['url_templates'] = upstream_urls
|
|
||||||
|
|
||||||
self.cookie_key_templ = cookie_key_templ
|
|
||||||
self.app = Bottle()
|
|
||||||
self.block_loader = LocalFileLoader()
|
|
||||||
self.init_routes()
|
|
||||||
|
|
||||||
super(RWApp, self).__init__(True, config=config)
|
|
||||||
|
|
||||||
self.cookie_tracker = CookieTracker(redis)
|
|
||||||
|
|
||||||
self.orig_error_handler = self.app.default_error_handler
|
|
||||||
self.app.default_error_handler = self.err_handler
|
|
||||||
|
|
||||||
def err_handler(self, exc):
|
|
||||||
print(exc)
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
return self.orig_error_handler(exc)
|
|
||||||
|
|
||||||
def get_cookie_key(self, kwargs):
|
|
||||||
return self.cookie_key_templ.format(**kwargs)
|
|
||||||
|
|
||||||
def init_routes(self):
|
|
||||||
@self.app.get('/static/__pywb/<filepath:path>')
|
|
||||||
def server_static(filepath):
|
|
||||||
data = self.block_loader.load('pywb/static/' + filepath)
|
|
||||||
guessed = mimetypes.guess_type(filepath)
|
|
||||||
if guessed[0]:
|
|
||||||
response.headers['Content-Type'] = guessed[0]
|
|
||||||
|
|
||||||
return data
|
|
||||||
|
|
||||||
self.app.mount('/live/', self.call_with_params(type='live'))
|
|
||||||
self.app.mount('/record/', self.call_with_params(type='record'))
|
|
||||||
self.app.mount('/replay/', self.call_with_params(type='replay'))
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def create_app(replay_port=8080, record_port=8010):
|
|
||||||
upstream_urls = {'live': 'http://localhost:%s/live/resource/postreq?' % replay_port,
|
|
||||||
'record': 'http://localhost:%s/live/resource/postreq?' % record_port,
|
|
||||||
'replay': 'http://localhost:%s/replay/resource/postreq?' % replay_port,
|
|
||||||
}
|
|
||||||
|
|
||||||
r = redis.StrictRedis.from_url('redis://localhost/2')
|
|
||||||
rwapp = RWApp(upstream_urls, 'cookies:', r)
|
|
||||||
return rwapp
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
if __name__ == "__main__":
|
|
||||||
application = RWApp.create_app()
|
|
||||||
application.app.run(port=8090, server='gevent')
|
|
||||||
|
|
||||||
|
|
@ -1,24 +1,26 @@
|
|||||||
|
from gevent import monkey; monkey.patch_all(thread=False)
|
||||||
|
|
||||||
from pywb.webagg.test.testutils import LiveServerTests, BaseTestClass
|
from pywb.webagg.test.testutils import LiveServerTests, BaseTestClass
|
||||||
from pywb.webagg.test.testutils import FakeRedisTests
|
from pywb.webagg.test.testutils import FakeRedisTests
|
||||||
|
|
||||||
from .simpleapp import RWApp, debug
|
from pywb.urlrewrite.frontendapp import FrontEndApp
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import webtest
|
import webtest
|
||||||
|
|
||||||
|
|
||||||
class TestRewriter(LiveServerTests, FakeRedisTests, BaseTestClass):
|
LIVE_CONFIG = {'collections': {'live': '$live'}}
|
||||||
|
|
||||||
|
|
||||||
|
class TestRewriter(FakeRedisTests, BaseTestClass):
|
||||||
@classmethod
|
@classmethod
|
||||||
def setup_class(cls):
|
def setup_class(cls):
|
||||||
super(TestRewriter, cls).setup_class()
|
super(TestRewriter, cls).setup_class()
|
||||||
#cls.upstream_url = 'http://localhost:{0}'.format(cls.server.port)
|
|
||||||
#cls.upstream_url += '/{type}/resource/postreq?url={url}&closest={closest}'
|
|
||||||
#cls.app = RWApp(cls.upstream_url)
|
|
||||||
|
|
||||||
cls.app = RWApp.create_app(replay_port=cls.server.port)
|
#cls.app = RWApp.create_app(replay_port=cls.server.port)
|
||||||
cls.testapp = webtest.TestApp(cls.app.app)
|
#cls.testapp = webtest.TestApp(cls.app.app)
|
||||||
debug(True)
|
cls.testapp = webtest.TestApp(FrontEndApp(custom_config=LIVE_CONFIG,
|
||||||
|
config_file=None))
|
||||||
|
|
||||||
def test_replay(self):
|
def test_replay(self):
|
||||||
resp = self.testapp.get('/live/mp_/http://example.com/')
|
resp = self.testapp.get('/live/mp_/http://example.com/')
|
||||||
@ -36,8 +38,8 @@ class TestRewriter(LiveServerTests, FakeRedisTests, BaseTestClass):
|
|||||||
|
|
||||||
assert 'wbinfo.capture_url = "http://example.com/"' in resp.text
|
assert 'wbinfo.capture_url = "http://example.com/"' in resp.text
|
||||||
|
|
||||||
def test_cookie_track_1(self):
|
#def test_cookie_track_1(self):
|
||||||
resp = self.testapp.get('/live/mp_/https://twitter.com/')
|
# resp = self.testapp.get('/live/mp_/https://twitter.com/')
|
||||||
|
|
||||||
assert resp.headers['set-cookie'] != None
|
# assert resp.headers['set-cookie'] != None
|
||||||
|
|
||||||
|
@ -31,34 +31,37 @@ SOURCE_LIST = [LiveIndexSource,
|
|||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
class AutoConfigApp(ResAggApp):
|
class AutoConfigApp(ResAggApp):
|
||||||
def __init__(self, config_file='./config.yaml'):
|
AUTO_DIR_INDEX_PATH = '{coll}/indexes/'
|
||||||
|
AUTO_DIR_ARCHIVE_PATH = '{coll}/archive/'
|
||||||
|
|
||||||
|
def __init__(self, config_file='./config.yaml', custom_config=None):
|
||||||
config = load_yaml_config(DEFAULT_CONFIG)
|
config = load_yaml_config(DEFAULT_CONFIG)
|
||||||
|
|
||||||
try:
|
if config_file:
|
||||||
new_config = load_config('PYWB_CONFIG_FILE', config_file)
|
try:
|
||||||
except Exception as e:
|
custom_config = load_config('PYWB_CONFIG_FILE', config_file)
|
||||||
new_config = {}
|
except Exception as e:
|
||||||
print(e)
|
if not custom_config:
|
||||||
|
custom_config = {'debug': True}
|
||||||
|
print(e)
|
||||||
|
|
||||||
if new_config:
|
if custom_config:
|
||||||
config.update(new_config)
|
config.update(custom_config)
|
||||||
|
|
||||||
super(AutoConfigApp, self).__init__(debug=config.get('debug', False))
|
super(AutoConfigApp, self).__init__(debug=config.get('debug', False))
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
def init(self):
|
|
||||||
if self.config.get('enable_auto_colls', True):
|
if self.config.get('enable_auto_colls', True):
|
||||||
auto_handler = self.load_auto_colls()
|
auto_handler = self.load_auto_colls()
|
||||||
self.add_route('/_', auto_handler)
|
self.add_route('/_', auto_handler)
|
||||||
|
|
||||||
routes = self.load_colls()
|
self.fixed_routes = self.load_colls()
|
||||||
for name, route in iteritems(routes):
|
|
||||||
|
for name, route in iteritems(self.fixed_routes):
|
||||||
self.add_route('/' + name, route)
|
self.add_route('/' + name, route)
|
||||||
|
|
||||||
self._add_simple_route('/<coll>-cdx', self.cdx_compat)
|
self._add_simple_route('/<coll>-cdx', self.cdx_compat)
|
||||||
|
|
||||||
return self
|
|
||||||
|
|
||||||
def _lookup(self, environ, path):
|
def _lookup(self, environ, path):
|
||||||
urls = self.url_map.bind(environ['HTTP_HOST'], path_info=path)
|
urls = self.url_map.bind(environ['HTTP_HOST'], path_info=path)
|
||||||
|
|
||||||
@ -82,21 +85,37 @@ class AutoConfigApp(ResAggApp):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
def load_auto_colls(self):
|
def load_auto_colls(self):
|
||||||
root_dir = self.config.get('collections_root', '')
|
self.root_dir = self.config.get('collections_root', '')
|
||||||
if not root_dir:
|
if not self.root_dir:
|
||||||
print('No Root Dir, Skip Auto Colls!')
|
print('No Root Dir, Skip Auto Colls!')
|
||||||
return
|
return
|
||||||
|
|
||||||
indexes_templ = os.path.join('{coll}', 'indexes') + os.path.sep
|
#indexes_templ = os.path.join('{coll}', 'indexes') + os.path.sep
|
||||||
dir_source = CacheDirectoryIndexSource(root_dir, indexes_templ)
|
indexes_templ = self.AUTO_DIR_INDEX_PATH.replace('/', os.path.sep)
|
||||||
|
dir_source = CacheDirectoryIndexSource(self.root_dir, indexes_templ)
|
||||||
|
|
||||||
archive_templ = self.config.get('archive_paths')
|
archive_templ = self.config.get('archive_paths')
|
||||||
if not archive_templ:
|
if not archive_templ:
|
||||||
archive_templ = os.path.join('.', root_dir, '{coll}', 'archive') + os.path.sep
|
archive_templ = self.AUTO_DIR_ARCHIVE_PATH.replace('/', os.path.sep)
|
||||||
|
archive_templ = os.path.join(self.root_dir, archive_templ)
|
||||||
|
#archive_templ = os.path.join('.', root_dir, '{coll}', 'archive') + os.path.sep
|
||||||
|
|
||||||
handler = DefaultResourceHandler(dir_source, archive_templ)
|
handler = DefaultResourceHandler(dir_source, archive_templ)
|
||||||
|
|
||||||
return handler
|
return handler
|
||||||
|
|
||||||
|
def list_fixed_routes(self):
|
||||||
|
return list(self.fixed_routes.keys())
|
||||||
|
|
||||||
|
def list_dynamic_routes(self):
|
||||||
|
if not self.root_dir:
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
return os.listdir(self.root_dir)
|
||||||
|
except IOError:
|
||||||
|
return []
|
||||||
|
|
||||||
def load_colls(self):
|
def load_colls(self):
|
||||||
routes = {}
|
routes = {}
|
||||||
|
|
||||||
|
@ -17,6 +17,10 @@ class TestAutoConfigApp(TempDirTests, BaseTestClass):
|
|||||||
os.mkdir('./local')
|
os.mkdir('./local')
|
||||||
os.mkdir('./local/indexes')
|
os.mkdir('./local/indexes')
|
||||||
|
|
||||||
|
os.mkdir('collections')
|
||||||
|
os.mkdir('collections/auto1')
|
||||||
|
os.mkdir('collections/auto2')
|
||||||
|
|
||||||
with open(os.path.join('local', 'indexes', 'file.cdxj'), 'a') as fh:
|
with open(os.path.join('local', 'indexes', 'file.cdxj'), 'a') as fh:
|
||||||
fh.write('foo')
|
fh.write('foo')
|
||||||
|
|
||||||
@ -28,8 +32,6 @@ class TestAutoConfigApp(TempDirTests, BaseTestClass):
|
|||||||
|
|
||||||
cls.loader = AutoConfigApp(os.path.join(cls.get_curr_dir(), 'test_autoapp.yaml'))
|
cls.loader = AutoConfigApp(os.path.join(cls.get_curr_dir(), 'test_autoapp.yaml'))
|
||||||
|
|
||||||
cls.colls = cls.loader.load_colls()
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def teardown_class(cls):
|
def teardown_class(cls):
|
||||||
os.chdir(cls.orig_cwd)
|
os.chdir(cls.orig_cwd)
|
||||||
@ -41,11 +43,17 @@ class TestAutoConfigApp(TempDirTests, BaseTestClass):
|
|||||||
|
|
||||||
def _get_sources(self, coll_name='', handler=None):
|
def _get_sources(self, coll_name='', handler=None):
|
||||||
if not handler:
|
if not handler:
|
||||||
handler = self.colls.get(coll_name)
|
handler = self.loader.fixed_routes.get(coll_name)
|
||||||
assert isinstance(handler, ResourceHandler)
|
assert isinstance(handler, ResourceHandler)
|
||||||
assert isinstance(handler.index_source, BaseSourceListAggregator)
|
assert isinstance(handler.index_source, BaseSourceListAggregator)
|
||||||
return handler.index_source.sources
|
return handler.index_source.sources
|
||||||
|
|
||||||
|
def test_list_static(self):
|
||||||
|
assert len(self.loader.list_fixed_routes()) == 12
|
||||||
|
|
||||||
|
def test_list_dynamic(self):
|
||||||
|
assert self.loader.list_dynamic_routes() == ['auto1', 'auto2']
|
||||||
|
|
||||||
def test_remote_cdx(self):
|
def test_remote_cdx(self):
|
||||||
sources = self._get_sources('ait')
|
sources = self._get_sources('ait')
|
||||||
assert isinstance(sources['ait'], RemoteIndexSource)
|
assert isinstance(sources['ait'], RemoteIndexSource)
|
||||||
@ -90,7 +98,7 @@ class TestAutoConfigApp(TempDirTests, BaseTestClass):
|
|||||||
assert isinstance(sources['local_file'], FileIndexSource)
|
assert isinstance(sources['local_file'], FileIndexSource)
|
||||||
|
|
||||||
def test_sequence(self):
|
def test_sequence(self):
|
||||||
seq = self.colls.get('many_seq')
|
seq = self.loader.fixed_routes.get('many_seq')
|
||||||
assert isinstance(seq, HandlerSeq)
|
assert isinstance(seq, HandlerSeq)
|
||||||
|
|
||||||
assert len(seq.handlers) == 3
|
assert len(seq.handlers) == 3
|
||||||
|
1
setup.py
1
setup.py
@ -107,6 +107,7 @@ setup(
|
|||||||
cdx-indexer = pywb.warc.cdxindexer:main
|
cdx-indexer = pywb.warc.cdxindexer:main
|
||||||
wb-manager = pywb.manager.manager:main_wrap_exc
|
wb-manager = pywb.manager.manager:main_wrap_exc
|
||||||
webagg-server = pywb.apps.cli:webagg
|
webagg-server = pywb.apps.cli:webagg
|
||||||
|
new-wayback = pywb.apps.cli:new_wayback
|
||||||
""",
|
""",
|
||||||
classifiers=[
|
classifiers=[
|
||||||
'Development Status :: 4 - Beta',
|
'Development Status :: 4 - Beta',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user