diff --git a/README.rst b/README.rst index 15cf7eab..03309398 100644 --- a/README.rst +++ b/README.rst @@ -9,8 +9,12 @@ PyWb 0.2.2 pywb is a python implementation of web archival replay tools, sometimes also known as 'Wayback Machine'. -The software includes wsgi apps and other tools which 'replay' archived web data -stored in standard `ARC `_ and `WARC `_ files and can provide additional information about the archived captures. +pywb allows high-fidelity replay (browsing) of archived web data stored in standardized `ARC `_ and `WARC `_. + + +Latest Changes +"""""""""""""" +See `CHANGES.rst `_ for up-to-date changelist. Quick Install & Run Samples @@ -32,8 +36,11 @@ installation and testing examples.) Configure to Replay Archived Content ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If you have existing WARC or ARC files (.warc, .warc.gz, .arc, .arc.gz), you should be able -to replay them in pywb after creating sorted indexs with the ``cdx-indexer`` script. +If you have existing WARC or ARC files (.warc, .warc.gz, .arc, .arc.gz), you should be able to view +their contents in pywb after creating sorted .cdx index files of their contents. +This process can be done by running the ``cdx-indexer`` script and only needs to be done once. + +(See the note below if you already have .cdx files for your archives) Given an archive of warcs at ``myarchive/warcs`` @@ -63,6 +70,9 @@ Given an archive of warcs at ``myarchive/warcs`` (You can also ./run-uwsgi.sh for running with those WSGI containers) +See `INSTALL.rst `_ for additional installation info. + + Use existing .cdx index files """"""""""""""""""""""""""""" @@ -79,13 +89,6 @@ If you would like to use non-SURT ordered .cdx files, simply add this field to t surt_ordered: false - -Latest Changes -~~~~~~~~~~~~~~ -See `CHANGES.rst `_ for up-to-date changelist. - - - About Wayback ~~~~~~~~~~~~~ diff --git a/pywb/apps/cdx_server.py b/pywb/apps/cdx_server.py index c87ca3e2..b234e445 100644 --- a/pywb/apps/cdx_server.py +++ b/pywb/apps/cdx_server.py @@ -11,7 +11,7 @@ application = init_app(create_cdx_server_app, load_yaml=True) -def main(): +def main(): # pragma: no cover start_wsgi_server(application, 'CDX Server', default_port=8090) if __name__ == "__main__": diff --git a/pywb/apps/wayback.py b/pywb/apps/wayback.py index 6d276b66..70d15537 100644 --- a/pywb/apps/wayback.py +++ b/pywb/apps/wayback.py @@ -7,7 +7,7 @@ from pywb.core.pywb_init import create_wb_router application = init_app(create_wb_router, load_yaml=True) -def main(): +def main(): # pragma: no cover start_wsgi_server(application, 'Wayback') if __name__ == "__main__":