diff --git a/CHANGES.rst b/CHANGES.rst
index c5f2f7ba..547a8f52 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -4,14 +4,12 @@ pywb 0.2.2 changelist
* Generate cdx indexs via command-line `cdx-indexer` script. Optionally sorting, and output to either a single combined file or a file per-directory.
Refer to ``cdx-indexer -h`` for more info.
-* Initial support for prefix url queries, eg: http://localhost:8080/pywb/*/http://example.com\* to query all captures from http://example.com
+* Initial support for prefix url queries, eg: http://localhost:8080/pywb/\*/http://example.com\* to query all captures from http://example.com
* Support for optional LXML html-based parser for fastest possible parsing. If lxml is installed on the system and via ``pip install lxml``, lxml parser is enabled by default.
(This can be turned off by setting ``use_lxml_parser: false`` in the config)
-* Memento: TimeMaps in ``application/link-format`` provided via the ``/timemap/*/`` query.. eg: http://localhost:8080/pywb/timemap/\*/http://example.com
-
-* Basic support for `Memento Protocol RFC7089 `_ Memento, TimeGate and now TimeMaps.
+* Full support for `Memento Protocol RFC7089 `_ Memento, TimeGate and TimeMaps. Memento: TimeMaps in ``application/link-format`` provided via the ``/timemap/*/`` query.. eg: http://localhost:8080/pywb/timemap/\*/http://example.com
* pywb now features new `domain-specific rules `_ which are applied to resolve and render certain difficult and dynamic content, in order to make accurate web replay work.
This ruleset will be under further iteration to address further challenges as the web evoles.
diff --git a/INSTALL.rst b/INSTALL.rst
index 59a4c930..db36704b 100644
--- a/INSTALL.rst
+++ b/INSTALL.rst
@@ -132,7 +132,7 @@ Sample Setup
pywb is configurable via yaml.
-The simplest `config.yaml `_ is roughly as follows:
+The simplest `config.yaml `_ is roughly as follows:
::
@@ -145,7 +145,7 @@ The simplest `config.yaml `_ contains
+(The the latest version of `config.yaml `_ contains
additional documentation and specifies all the optional properties, such
as ui filenames for Jinja2/html template files.)
diff --git a/README.rst b/README.rst
index 0e574021..15cf7eab 100644
--- a/README.rst
+++ b/README.rst
@@ -43,7 +43,7 @@ Given an archive of warcs at ``myarchive/warcs``
2. Run ``cdx-indexer --sort myarchive/cdx myarchive/warcs`` to generate .cdx files for each
warc/arc file in ``myarchive/warcs``
-3. Edit ``config.yaml`` to contain the following. You may replace ``pywb`` with
+3. Edit ```` to contain the following. You may replace ``pywb`` with
a name of your choice -- it will be the path to your collection. (Multiple collections can be added
for different sets of .cdx files as well)
@@ -71,7 +71,7 @@ If you already have .cdx files for your archive, you can skip the first two step
pywb recommends using `SURT `_ (Sort-friendly URI Reordering Transform)
sorted urls and the ``cdx-indexer`` automatically generates indexs in this format.
-However, pywb is compatible with regular url keyed indexs.
+However, pywb is compatible with regular url keyed indexes also.
If you would like to use non-SURT ordered .cdx files, simply add this field to the config:
::
@@ -112,7 +112,7 @@ Additional Documentation
~~~~~~~~~~~~~~~~~~~~~~~~
- For additional/up-to-date configuration details, consult the current
- `config.yaml `_
+ `config.yaml `_
- The `wiki `_ will have
additional technical documentation about various aspects of pywb
diff --git a/pywb/configs/config.yaml b/config.yaml
similarity index 97%
rename from pywb/configs/config.yaml
rename to config.yaml
index 3a535454..d17d4ff4 100644
--- a/pywb/configs/config.yaml
+++ b/config.yaml
@@ -98,9 +98,6 @@ enable_cdx_api: true
# set to false to disable
#domain_specific_rules: rules.yaml
-# Permissions checker
-#perms_checker: !!python/object/new:pywb.cdx.perms.AllowAllPerms []
-
# Memento support, enable
enable_memento: true
diff --git a/pywb/apps/cdx_server.py b/pywb/apps/cdx_server.py
index 8dcc4849..c87ca3e2 100644
--- a/pywb/apps/cdx_server.py
+++ b/pywb/apps/cdx_server.py
@@ -1,21 +1,18 @@
from pywb.framework.wsgi_wrappers import init_app, start_wsgi_server
-from pywb.core.cdx_api_handler import create_cdx_server_app
+#from pywb.core.cdx_api_handler import create_cdx_server_app
+from pywb.core.pywb_init import create_cdx_server_app
#=================================================================
# init cdx server app
#=================================================================
-# cdx-server only config
-DEFAULT_CONFIG = 'pywb/configs/cdx-server-config.yaml'
-
application = init_app(create_cdx_server_app,
- load_yaml=True,
- config_file=DEFAULT_CONFIG)
+ load_yaml=True)
def main():
- start_wsgi_server(application, 'CDX Server')
+ start_wsgi_server(application, 'CDX Server', default_port=8090)
if __name__ == "__main__":
main()
diff --git a/pywb/cdx/test/test_cdxserver.py b/pywb/cdx/test/test_cdxserver.py
index 61a4cce8..3514efb6 100644
--- a/pywb/cdx/test/test_cdxserver.py
+++ b/pywb/cdx/test/test_cdxserver.py
@@ -14,7 +14,7 @@ from pywb import get_test_dir
TEST_CDX_DIR = get_test_dir() + 'cdx/'
-CDX_SERVER_URL = 'http://localhost/cdx'
+CDX_SERVER_URL = 'http://localhost/pywb-cdx'
CDX_RESULT = [
('urlkey', 'com,example)/'),
diff --git a/pywb/configs/cdx-server-config.yaml b/pywb/configs/cdx-server-config.yaml
deleted file mode 100644
index 293e71a1..00000000
--- a/pywb/configs/cdx-server-config.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-#CDX Server WSGI App Config
-index_paths: ./sample_archive/cdx/
-port: 8090
diff --git a/pywb/core/cdx_api_handler.py b/pywb/core/cdx_api_handler.py
index ca5f317c..e54de959 100644
--- a/pywb/core/cdx_api_handler.py
+++ b/pywb/core/cdx_api_handler.py
@@ -49,17 +49,3 @@ class CDXAPIHandler(BaseHandler):
params['output'] = 'text'
return params
-
-
-#=================================================================
-def create_cdx_server_app(config):
- """
- Create a cdx server config to be wrapped in a wsgi app
- Currently using single access point '/cdx' to expose the api
- TODO: more complex example with multiple collections?
- """
- query_handler = QueryHandler.init_from_config(config)
-
- port = config.get('port')
- routes = [Route('cdx', CDXAPIHandler(query_handler))]
- return ArchivalRouter(routes, port=port)
diff --git a/pywb/core/pywb_init.py b/pywb/core/pywb_init.py
index 3b63f759..520d742b 100644
--- a/pywb/core/pywb_init.py
+++ b/pywb/core/pywb_init.py
@@ -96,13 +96,67 @@ def create_wb_handler(query_handler, config, ds_rules_file=None):
wb_handler = wb_handler_class(
query_handler,
replayer,
- #html_view=html_view,
search_view=search_view,
)
return wb_handler
+#=================================================================
+def init_collection(value, config):
+ if isinstance(value, str):
+ value = {'index_paths': value}
+
+ route_config = DictChain(value, config)
+
+ ds_rules_file = route_config.get('domain_specific_rules', None)
+
+ html_view = load_query_template(config.get('query_html'),
+ 'Captures Page')
+
+ query_handler = QueryHandler.init_from_config(route_config,
+ ds_rules_file,
+ html_view)
+
+ return route_config, query_handler, ds_rules_file
+
+
+#=================================================================
+def add_cdx_api_handler(name, cdx_api_suffix, routes, query_handler):
+ # if bool, use -cdx suffix, else use custom string
+ # as the suffix
+ if isinstance(cdx_api_suffix, bool):
+ name += '-cdx'
+ else:
+ name += str(cdx_api_suffix)
+
+ routes.append(Route(name, CDXAPIHandler(query_handler)))
+
+
+#=================================================================
+def create_cdx_server_app(passed_config):
+ """
+ Create a cdx server api-only app
+ For each collection, create a /-cdx access point
+ which follows the cdx api
+ """
+ config = DictChain(passed_config, DEFAULTS)
+
+ collections = config.get('collections')
+
+ routes = []
+
+ for name, value in collections.iteritems():
+ result = init_collection(value, config)
+ route_config, query_handler, ds_rules_file = result
+
+ cdx_api_suffix = route_config.get('enable_cdx_api', True)
+
+ add_cdx_api_handler(name, cdx_api_suffix, routes, query_handler)
+
+ return ArchivalRouter(routes)
+
+
#=================================================================
def create_wb_router(passed_config={}):
@@ -131,24 +185,9 @@ def create_wb_router(passed_config={}):
use_lxml_parser()
for name, value in collections.iteritems():
- if isinstance(value, str):
- value = {'index_paths': value}
- route_config = DictChain(value, config)
-
- ds_rules_file = route_config.get('domain_specific_rules', None)
-
- #perms_policy = route_config.get('perms_policy', None)
- #
- #cdx_server = create_cdx_server(route_config,
- # ds_rules_file)
- #
- html_view = load_query_template(config.get('query_html'),
- 'Captures Page')
-
- query_handler = QueryHandler.init_from_config(route_config,
- ds_rules_file,
- html_view)
+ result = init_collection(value, config)
+ route_config, query_handler, ds_rules_file = result
wb_handler = create_wb_handler(
query_handler=query_handler,
@@ -168,13 +207,7 @@ def create_wb_router(passed_config={}):
cdx_api_suffix = route_config.get('enable_cdx_api', False)
if cdx_api_suffix:
- # if bool, use -cdx suffix, else use custom string
- # as the suffix
- if isinstance(cdx_api_suffix, bool):
- cdx_api_suffix = '-cdx'
-
- routes.append(Route(name + str(cdx_api_suffix),
- CDXAPIHandler(query_handler)))
+ add_cdx_api_handler(name, cdx_api_suffix, routes, query_handler)
if config.get('debug_echo_env', False):
routes.append(Route('echo_env', DebugEchoEnvHandler()))
diff --git a/pywb/framework/wsgi_wrappers.py b/pywb/framework/wsgi_wrappers.py
index 8a42e101..7401f89e 100644
--- a/pywb/framework/wsgi_wrappers.py
+++ b/pywb/framework/wsgi_wrappers.py
@@ -103,7 +103,7 @@ def handle_exception(env, wb_router, exc, print_trace):
status=status)
#=================================================================
-DEFAULT_CONFIG_FILE = 'pywb/configs/config.yaml'
+DEFAULT_CONFIG_FILE = 'config.yaml'
#=================================================================
@@ -139,7 +139,7 @@ def init_app(init_func, load_yaml=True, config_file=None):
#=================================================================
-def start_wsgi_server(the_app, name): # pragma: no cover
+def start_wsgi_server(the_app, name, default_port=None): # pragma: no cover
from wsgiref.simple_server import make_server
from optparse import OptionParser
@@ -154,7 +154,10 @@ def start_wsgi_server(the_app, name): # pragma: no cover
port = the_app.port
if not port:
- port = DEFAULT_PORT
+ if default_port:
+ port = default_port
+ else:
+ port = DEFAULT_PORT
logging.info('Starting %s on port %s', name, port)
diff --git a/setup.py b/setup.py
index c6ecb656..ab9f6867 100755
--- a/setup.py
+++ b/setup.py
@@ -54,7 +54,7 @@ setup(
'pywb.apps'
],
package_data={
- 'pywb': ['ui/*', 'static/*', '*.yaml', 'configs/*'],
+ 'pywb': ['ui/*', 'static/*', '*.yaml'],
},
data_files=[
('sample_archive/cdx/', glob.glob('sample_archive/cdx/*')),
diff --git a/tests/test_cdx_server_app.py b/tests/test_cdx_server_app.py
index 613273b5..889f8512 100644
--- a/tests/test_cdx_server_app.py
+++ b/tests/test_cdx_server_app.py
@@ -17,7 +17,7 @@ def client():
#================================================================
def query(client, url, is_error=False, **params):
params['url'] = url
- return client.get('/cdx?' + urlencode(params, doseq=1), expect_errors=is_error)
+ return client.get('/pywb-cdx?' + urlencode(params, doseq=1), expect_errors=is_error)
#================================================================