1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

New Documentation (#252)

* docs work:
- remove old doc folder
- generate new sphinx docs
rewrite: fix existing docstrings for rst
add 'make apidoc' to rerun apidoc on pywb root
apidocs in docs/code
first pass on usage manual in docs/manual

* use default theme

* docs config work:
- remove modules.rst, use pywb toc directly
- make apidoc force rebuild
- comment out alabaster theme config

* Update usage.rst with working dir info

* docs: add configuring web archive page, ui customizations, custom collections explanations

* work on 'custom collections' section

* docs: update dir tree, switch recording/proxy order

* docs: improve framed vs frameless intro
add 'custom outer replay frame' section
This commit is contained in:
Ilya Kreymer 2017-10-04 22:02:03 -07:00 committed by GitHub
parent b631a24a0e
commit 31209db311
35 changed files with 1283 additions and 940 deletions

4
.gitignore vendored
View File

@ -19,6 +19,7 @@ develop-eggs
lib
lib64
__pycache__
.DS_Store
# ignore auto-gen certs
ca/pywb-ca.pem
@ -47,3 +48,6 @@ node_modules/
# git_hash
git_hash.py
# Sphinx documentation
docs/_build/*

View File

@ -1,177 +0,0 @@
# Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = _build
# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
endif
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " xml to make Docutils-native XML files"
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
clean:
rm -rf $(BUILDDIR)/*
html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pywb.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pywb.qhc"
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/pywb"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pywb"
@echo "# devhelp"
epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
latexpdfja:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through platex and dvipdfmx..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."
man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."
info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
xml:
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
@echo
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
pseudoxml:
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
@echo
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."

View File

@ -1,266 +0,0 @@
# -*- coding: utf-8 -*-
#
# pywb documentation build configuration file, created by
# sphinx-quickstart on Thu Feb 27 21:08:12 2014.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
import sys
import os
# adding project root to sys.path ('.' is not included by default)
sys.path.insert(0, os.path.dirname(__file__))
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.todo',
'sphinx.ext.coverage',
'sphinx.ext.viewcode',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix of source filenames.
source_suffix = '.rst'
# The encoding of source files.
#source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'pywb'
copyright = u'2014, Ilya Kreymer'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '0.2'
# The full version, including alpha/beta/rc tags.
release = '0.2'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
#today = ''
# Else, today_fmt is used as the format for a strftime call.
#today_fmt = '%B %d, %Y'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
# The reST default role (used for this markup: `text`) to use for all
# documents.
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
# If true, keep warnings as "system message" paragraphs in the built documents.
#keep_warnings = False
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'default'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
#html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# "<project> v<release> documentation".
#html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
#html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
#html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
# directly to the root of the documentation.
#html_extra_path = []
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
#html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
#html_additional_pages = {}
# If false, no module index is generated.
#html_domain_indices = True
# If false, no index is generated.
#html_use_index = True
# If true, the index is split into individual pages for each letter.
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a <link> tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
#html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
#html_file_suffix = None
# Output file base name for HTML help builder.
htmlhelp_basename = 'pywbdoc'
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#'preamble': '',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
('index', 'pywb.tex', u'pywb Documentation',
u'Ilya Kreymer', 'manual'),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False
# If true, show page references after internal links.
#latex_show_pagerefs = False
# If true, show URL addresses after external links.
#latex_show_urls = False
# Documents to append as an appendix to all manuals.
#latex_appendices = []
# If false, no module index is generated.
#latex_domain_indices = True
# -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'pywb', u'pywb Documentation',
[u'Ilya Kreymer'], 1)
]
# If true, show URL addresses after external links.
#man_show_urls = False
# -- Options for Texinfo output -------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
('index', 'pywb', u'pywb Documentation',
u'Ilya Kreymer', 'pywb', 'One line description of project.',
'Miscellaneous'),
]
# Documents to append as an appendix to all manuals.
#texinfo_appendices = []
# If false, no module index is generated.
#texinfo_domain_indices = True
# How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote'
# If true, do not generate a @detailmenu in the "Top" node's menu.
#texinfo_no_detailmenu = False

View File

@ -1,44 +0,0 @@
.. pywb documentation master file, created by
sphinx-quickstart on Thu Feb 27 21:08:12 2014.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to pywb's documentation!
================================
Pywb is a pure-Python implementation of Wayback Machine.
.. toctree::
:maxdepth: 2
Installing Pywb
---------------
Pywb QuickStart
---------------
The pywb tool suite currently includes two runnable applications in the
:mod:`pywb.apps` package::
python -m pywb.apps.wayback
starts the full wayback on port 8080, and::
python -m pywb.apps.cdx_server
starts standalone cdx server on port 8090
Customizing Pywb
----------------
Access Control
~~~~~~~~~~~~~~
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View File

@ -1,7 +0,0 @@
pywb
====
.. toctree::
:maxdepth: 4
pywb

View File

@ -1,30 +0,0 @@
pywb.apps package
=================
Submodules
----------
pywb.apps.cdx_server module
---------------------------
.. automodule:: pywb.apps.cdx_server
:members:
:undoc-members:
:show-inheritance:
pywb.apps.wayback module
------------------------
.. automodule:: pywb.apps.wayback
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: pywb.apps
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,76 +0,0 @@
pywb.cdx package
================
Submodules
----------
pywb.cdx.cdxdomainspecific module
---------------------------------
.. automodule:: pywb.cdx.cdxdomainspecific
:members:
:undoc-members:
:show-inheritance:
:special-members: __call__
pywb.cdx.cdxobject module
-------------------------
.. automodule:: pywb.cdx.cdxobject
:members:
:undoc-members:
:show-inheritance:
:special-members: __call__
pywb.cdx.cdxops module
----------------------
.. automodule:: pywb.cdx.cdxops
:members:
:undoc-members:
:show-inheritance:
:special-members: __call__
pywb.cdx.cdxserver module
-------------------------
.. automodule:: pywb.cdx.cdxserver
:members:
:undoc-members:
:show-inheritance:
:special-members: __call__
pywb.cdx.cdxsource module
-------------------------
.. automodule:: pywb.cdx.cdxsource
:members:
:undoc-members:
:show-inheritance:
:special-members: __call__
pywb.cdx.query module
---------------------
.. automodule:: pywb.cdx.query
:members:
:undoc-members:
:show-inheritance:
:special-members: __call__
pywb.cdx.zipnum module
----------------------
.. automodule:: pywb.cdx.zipnum
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: pywb.cdx
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,65 +0,0 @@
pywb.core package
=================
Submodules
----------
pywb.core.cdx_handler module
----------------------------
.. automodule:: pywb.core.cdx_handler
:members:
:undoc-members:
:show-inheritance:
:special-members: __call__
pywb.core.handlers module
-------------------------
.. automodule:: pywb.core.handlers
:members:
:undoc-members:
:show-inheritance:
:special-members: __call__
pywb.core.indexreader module
----------------------------
.. automodule:: pywb.core.indexreader
:members:
:undoc-members:
:show-inheritance:
pywb.core.pywb_init module
--------------------------
.. automodule:: pywb.core.pywb_init
:members:
:undoc-members:
:show-inheritance:
pywb.core.replay_views module
-----------------------------
.. automodule:: pywb.core.replay_views
:members:
:undoc-members:
:show-inheritance:
:special-members: __call__
pywb.core.views module
----------------------
.. automodule:: pywb.core.views
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: pywb.core
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,67 +0,0 @@
pywb.framework package
======================
Submodules
----------
pywb.framework.archivalrouter module
------------------------------------
.. automodule:: pywb.framework.archivalrouter
:members:
:undoc-members:
:show-inheritance:
:special-members: __call__
pywb.framework.basehandlers module
----------------------------------
.. automodule:: pywb.framework.basehandlers
:members:
:undoc-members:
:show-inheritance:
:special-members: __call__
pywb.framework.proxy module
---------------------------
.. automodule:: pywb.framework.proxy
:members:
:undoc-members:
:show-inheritance:
:special-members: __call__
pywb.framework.wbexceptions module
----------------------------------
.. automodule:: pywb.framework.wbexceptions
:members:
:undoc-members:
:show-inheritance:
pywb.framework.wbrequestresponse module
---------------------------------------
.. automodule:: pywb.framework.wbrequestresponse
:members:
:undoc-members:
:show-inheritance:
:special-members: __call__
pywb.framework.wsgi_wrappers module
-----------------------------------
.. automodule:: pywb.framework.wsgi_wrappers
:members:
:undoc-members:
:show-inheritance:
:special-members: __call__
Module contents
---------------
.. automodule:: pywb.framework
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,38 +0,0 @@
pywb.perms package
==================
Subpackages
-----------
.. toctree::
pywb.perms.test
Submodules
----------
pywb.perms.perms_filter module
------------------------------
.. automodule:: pywb.perms.perms_filter
:members:
:undoc-members:
:show-inheritance:
pywb.perms.perms_handler module
-------------------------------
.. automodule:: pywb.perms.perms_handler
:members:
:undoc-members:
:show-inheritance:
:special-members: __call__
Module contents
---------------
.. automodule:: pywb.perms
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,79 +0,0 @@
pywb.rewrite package
====================
Submodules
----------
pywb.rewrite.header_rewriter module
-----------------------------------
.. automodule:: pywb.rewrite.header_rewriter
:members:
:undoc-members:
:show-inheritance:
pywb.rewrite.html_rewriter module
---------------------------------
.. automodule:: pywb.rewrite.html_rewriter
:members:
:undoc-members:
:show-inheritance:
pywb.rewrite.regex_rewriters module
-----------------------------------
.. automodule:: pywb.rewrite.regex_rewriters
:members:
:undoc-members:
:show-inheritance:
:special-members: __call__
pywb.rewrite.rewrite_content module
-----------------------------------
.. automodule:: pywb.rewrite.rewrite_content
:members:
:undoc-members:
:show-inheritance:
pywb.rewrite.rewrite_live module
--------------------------------
.. automodule:: pywb.rewrite.rewrite_live
:members:
:undoc-members:
:show-inheritance:
pywb.rewrite.rewriterules module
--------------------------------
.. automodule:: pywb.rewrite.rewriterules
:members:
:undoc-members:
:show-inheritance:
pywb.rewrite.url_rewriter module
--------------------------------
.. automodule:: pywb.rewrite.url_rewriter
:members:
:undoc-members:
:show-inheritance:
pywb.rewrite.wburl module
-------------------------
.. automodule:: pywb.rewrite.wburl
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: pywb.rewrite
:members:
:undoc-members:
:show-inheritance:

View File

@ -1,39 +0,0 @@
pywb.warc package
=================
Submodules
----------
pywb.warc.pathresolvers module
------------------------------
.. automodule:: pywb.warc.pathresolvers
:members:
:undoc-members:
:show-inheritance:
:special-members: __call__
pywb.warc.recordloader module
-----------------------------
.. automodule:: pywb.warc.recordloader
:members:
:undoc-members:
:show-inheritance:
pywb.warc.resolvingloader module
--------------------------------
.. automodule:: pywb.warc.resolvingloader
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: pywb.warc
:members:
:undoc-members:
:show-inheritance:

23
docs/Makefile Normal file
View File

@ -0,0 +1,23 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = python -msphinx
SPHINXPROJ = pywb
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
apidoc:
@sphinx-apidoc -f -T -o code ../pywb/ "../*test*" "../*git_hash*"
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

78
docs/code/pywb.apps.rst Normal file
View File

@ -0,0 +1,78 @@
pywb\.apps package
==================
Submodules
----------
pywb\.apps\.cli module
----------------------
.. automodule:: pywb.apps.cli
:members:
:undoc-members:
:show-inheritance:
pywb\.apps\.frontendapp module
------------------------------
.. automodule:: pywb.apps.frontendapp
:members:
:undoc-members:
:show-inheritance:
pywb\.apps\.live module
-----------------------
.. automodule:: pywb.apps.live
:members:
:undoc-members:
:show-inheritance:
pywb\.apps\.rewriterapp module
------------------------------
.. automodule:: pywb.apps.rewriterapp
:members:
:undoc-members:
:show-inheritance:
pywb\.apps\.static\_handler module
----------------------------------
.. automodule:: pywb.apps.static_handler
:members:
:undoc-members:
:show-inheritance:
pywb\.apps\.warcserverapp module
--------------------------------
.. automodule:: pywb.apps.warcserverapp
:members:
:undoc-members:
:show-inheritance:
pywb\.apps\.wayback module
--------------------------
.. automodule:: pywb.apps.wayback
:members:
:undoc-members:
:show-inheritance:
pywb\.apps\.wbrequestresponse module
------------------------------------
.. automodule:: pywb.apps.wbrequestresponse
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: pywb.apps
:members:
:undoc-members:
:show-inheritance:

View File

@ -0,0 +1,30 @@
pywb\.indexer package
=====================
Submodules
----------
pywb\.indexer\.archiveindexer module
------------------------------------
.. automodule:: pywb.indexer.archiveindexer
:members:
:undoc-members:
:show-inheritance:
pywb\.indexer\.cdxindexer module
--------------------------------
.. automodule:: pywb.indexer.cdxindexer
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: pywb.indexer
:members:
:undoc-members:
:show-inheritance:

View File

@ -0,0 +1,38 @@
pywb\.manager package
=====================
Submodules
----------
pywb\.manager\.autoindex module
-------------------------------
.. automodule:: pywb.manager.autoindex
:members:
:undoc-members:
:show-inheritance:
pywb\.manager\.manager module
-----------------------------
.. automodule:: pywb.manager.manager
:members:
:undoc-members:
:show-inheritance:
pywb\.manager\.migrate module
-----------------------------
.. automodule:: pywb.manager.migrate
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: pywb.manager
:members:
:undoc-members:
:show-inheritance:

View File

@ -0,0 +1,46 @@
pywb\.recorder package
======================
Submodules
----------
pywb\.recorder\.filters module
------------------------------
.. automodule:: pywb.recorder.filters
:members:
:undoc-members:
:show-inheritance:
pywb\.recorder\.multifilewarcwriter module
------------------------------------------
.. automodule:: pywb.recorder.multifilewarcwriter
:members:
:undoc-members:
:show-inheritance:
pywb\.recorder\.recorderapp module
----------------------------------
.. automodule:: pywb.recorder.recorderapp
:members:
:undoc-members:
:show-inheritance:
pywb\.recorder\.redisindexer module
-----------------------------------
.. automodule:: pywb.recorder.redisindexer
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: pywb.recorder
:members:
:undoc-members:
:show-inheritance:

142
docs/code/pywb.rewrite.rst Normal file
View File

@ -0,0 +1,142 @@
pywb\.rewrite package
=====================
Submodules
----------
pywb\.rewrite\.content\_rewriter module
---------------------------------------
.. automodule:: pywb.rewrite.content_rewriter
:members:
:undoc-members:
:show-inheritance:
pywb\.rewrite\.cookie\_rewriter module
--------------------------------------
.. automodule:: pywb.rewrite.cookie_rewriter
:members:
:undoc-members:
:show-inheritance:
pywb\.rewrite\.cookies module
-----------------------------
.. automodule:: pywb.rewrite.cookies
:members:
:undoc-members:
:show-inheritance:
pywb\.rewrite\.default\_rewriter module
---------------------------------------
.. automodule:: pywb.rewrite.default_rewriter
:members:
:undoc-members:
:show-inheritance:
pywb\.rewrite\.header\_rewriter module
--------------------------------------
.. automodule:: pywb.rewrite.header_rewriter
:members:
:undoc-members:
:show-inheritance:
pywb\.rewrite\.html\_insert\_rewriter module
--------------------------------------------
.. automodule:: pywb.rewrite.html_insert_rewriter
:members:
:undoc-members:
:show-inheritance:
pywb\.rewrite\.html\_rewriter module
------------------------------------
.. automodule:: pywb.rewrite.html_rewriter
:members:
:undoc-members:
:show-inheritance:
pywb\.rewrite\.jsonp\_rewriter module
-------------------------------------
.. automodule:: pywb.rewrite.jsonp_rewriter
:members:
:undoc-members:
:show-inheritance:
pywb\.rewrite\.regex\_rewriters module
--------------------------------------
.. automodule:: pywb.rewrite.regex_rewriters
:members:
:undoc-members:
:show-inheritance:
pywb\.rewrite\.rewrite\_amf module
----------------------------------
.. automodule:: pywb.rewrite.rewrite_amf
:members:
:undoc-members:
:show-inheritance:
pywb\.rewrite\.rewrite\_dash module
-----------------------------------
.. automodule:: pywb.rewrite.rewrite_dash
:members:
:undoc-members:
:show-inheritance:
pywb\.rewrite\.rewrite\_hls module
----------------------------------
.. automodule:: pywb.rewrite.rewrite_hls
:members:
:undoc-members:
:show-inheritance:
pywb\.rewrite\.rewriteinputreq module
-------------------------------------
.. automodule:: pywb.rewrite.rewriteinputreq
:members:
:undoc-members:
:show-inheritance:
pywb\.rewrite\.templateview module
----------------------------------
.. automodule:: pywb.rewrite.templateview
:members:
:undoc-members:
:show-inheritance:
pywb\.rewrite\.url\_rewriter module
-----------------------------------
.. automodule:: pywb.rewrite.url_rewriter
:members:
:undoc-members:
:show-inheritance:
pywb\.rewrite\.wburl module
---------------------------
.. automodule:: pywb.rewrite.wburl
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: pywb.rewrite
:members:
:undoc-members:
:show-inheritance:

View File

@ -7,13 +7,12 @@ Subpackages
.. toctree::
pywb.apps
pywb.cdx
pywb.core
pywb.framework
pywb.perms
pywb.indexer
pywb.manager
pywb.recorder
pywb.rewrite
pywb.utils
pywb.warc
pywb.warcserver
Module contents
---------------

View File

@ -1,68 +1,67 @@
pywb.utils package
==================
pywb\.utils package
===================
Submodules
----------
pywb.utils.binsearch module
---------------------------
pywb\.utils\.binsearch module
-----------------------------
.. automodule:: pywb.utils.binsearch
:members:
:undoc-members:
:show-inheritance:
pywb.utils.bufferedreaders module
---------------------------------
.. automodule:: pywb.utils.bufferedreaders
:members:
:undoc-members:
:show-inheritance:
pywb.utils.canonicalize module
------------------------------
pywb\.utils\.canonicalize module
--------------------------------
.. automodule:: pywb.utils.canonicalize
:members:
:undoc-members:
:show-inheritance:
:special-members: __call__
pywb.utils.dsrules module
-------------------------
pywb\.utils\.format module
--------------------------
.. automodule:: pywb.utils.dsrules
.. automodule:: pywb.utils.format
:members:
:undoc-members:
:show-inheritance:
pywb.utils.loaders module
-------------------------
pywb\.utils\.geventserver module
--------------------------------
.. automodule:: pywb.utils.geventserver
:members:
:undoc-members:
:show-inheritance:
pywb\.utils\.io module
----------------------
.. automodule:: pywb.utils.io
:members:
:undoc-members:
:show-inheritance:
pywb\.utils\.loaders module
---------------------------
.. automodule:: pywb.utils.loaders
:members:
:undoc-members:
:show-inheritance:
pywb.utils.statusandheaders module
----------------------------------
.. automodule:: pywb.utils.statusandheaders
:members:
:undoc-members:
:show-inheritance:
pywb.utils.timeutils module
pywb\.utils\.memento module
---------------------------
.. automodule:: pywb.utils.timeutils
.. automodule:: pywb.utils.memento
:members:
:undoc-members:
:show-inheritance:
pywb.utils.wbexception module
-----------------------------
pywb\.utils\.wbexception module
-------------------------------
.. automodule:: pywb.utils.wbexception
:members:

View File

@ -0,0 +1,70 @@
pywb\.warcserver\.index package
===============================
Submodules
----------
pywb\.warcserver\.index\.aggregator module
------------------------------------------
.. automodule:: pywb.warcserver.index.aggregator
:members:
:undoc-members:
:show-inheritance:
pywb\.warcserver\.index\.cdxobject module
-----------------------------------------
.. automodule:: pywb.warcserver.index.cdxobject
:members:
:undoc-members:
:show-inheritance:
pywb\.warcserver\.index\.cdxops module
--------------------------------------
.. automodule:: pywb.warcserver.index.cdxops
:members:
:undoc-members:
:show-inheritance:
pywb\.warcserver\.index\.fuzzymatcher module
--------------------------------------------
.. automodule:: pywb.warcserver.index.fuzzymatcher
:members:
:undoc-members:
:show-inheritance:
pywb\.warcserver\.index\.indexsource module
-------------------------------------------
.. automodule:: pywb.warcserver.index.indexsource
:members:
:undoc-members:
:show-inheritance:
pywb\.warcserver\.index\.query module
-------------------------------------
.. automodule:: pywb.warcserver.index.query
:members:
:undoc-members:
:show-inheritance:
pywb\.warcserver\.index\.zipnum module
--------------------------------------
.. automodule:: pywb.warcserver.index.zipnum
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: pywb.warcserver.index
:members:
:undoc-members:
:show-inheritance:

View File

@ -0,0 +1,46 @@
pywb\.warcserver\.resource package
==================================
Submodules
----------
pywb\.warcserver\.resource\.blockrecordloader module
----------------------------------------------------
.. automodule:: pywb.warcserver.resource.blockrecordloader
:members:
:undoc-members:
:show-inheritance:
pywb\.warcserver\.resource\.pathresolvers module
------------------------------------------------
.. automodule:: pywb.warcserver.resource.pathresolvers
:members:
:undoc-members:
:show-inheritance:
pywb\.warcserver\.resource\.resolvingloader module
--------------------------------------------------
.. automodule:: pywb.warcserver.resource.resolvingloader
:members:
:undoc-members:
:show-inheritance:
pywb\.warcserver\.resource\.responseloader module
-------------------------------------------------
.. automodule:: pywb.warcserver.resource.responseloader
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: pywb.warcserver.resource
:members:
:undoc-members:
:show-inheritance:

View File

@ -0,0 +1,70 @@
pywb\.warcserver package
========================
Subpackages
-----------
.. toctree::
pywb.warcserver.index
pywb.warcserver.resource
Submodules
----------
pywb\.warcserver\.basewarcserver module
---------------------------------------
.. automodule:: pywb.warcserver.basewarcserver
:members:
:undoc-members:
:show-inheritance:
pywb\.warcserver\.handlers module
---------------------------------
.. automodule:: pywb.warcserver.handlers
:members:
:undoc-members:
:show-inheritance:
pywb\.warcserver\.http module
-----------------------------
.. automodule:: pywb.warcserver.http
:members:
:undoc-members:
:show-inheritance:
pywb\.warcserver\.inputrequest module
-------------------------------------
.. automodule:: pywb.warcserver.inputrequest
:members:
:undoc-members:
:show-inheritance:
pywb\.warcserver\.upstreamindexsource module
--------------------------------------------
.. automodule:: pywb.warcserver.upstreamindexsource
:members:
:undoc-members:
:show-inheritance:
pywb\.warcserver\.warcserver module
-----------------------------------
.. automodule:: pywb.warcserver.warcserver
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: pywb.warcserver
:members:
:undoc-members:
:show-inheritance:

181
docs/conf.py Normal file
View File

@ -0,0 +1,181 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# pywb documentation build configuration file, created by
# sphinx-quickstart on Thu Sep 21 01:58:55 2017.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = ['sphinx.ext.autodoc',
'sphinx.ext.doctest',
'sphinx.ext.intersphinx',
'sphinx.ext.todo',
'sphinx.ext.coverage',
'sphinx.ext.ifconfig',
'sphinx.ext.viewcode']
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = 'pywb'
copyright = 'A Webrecorder Project, Ilya Kreymer, Rhizome'
author = 'Ilya Kreymer'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '2.0'
# The full version, including alpha/beta/rc tags.
release = '2.0'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = True
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'default'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# This is required for the alabaster theme
# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
#html_sidebars = {
# '**': [
# 'about.html',
# 'navigation.html',
# 'relations.html', # needs 'show_related': True theme option to display
# 'searchbox.html',
# 'donate.html',
# ]
#}
# -- Options for HTMLHelp output ------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = 'pywbdoc'
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'pywb.tex', 'pywb Documentation',
'Ilya Kreymer', 'manual'),
]
# -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'pywb', 'pywb Documentation',
[author], 1)
]
# -- Options for Texinfo output -------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'pywb', 'pywb Documentation',
author, 'pywb', 'One line description of project.',
'Miscellaneous'),
]
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {'https://docs.python.org/': None}

27
docs/index.rst Normal file
View File

@ -0,0 +1,27 @@
.. pywb documentation master file, created by
sphinx-quickstart on Thu Sep 21 01:58:55 2017.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Webrecorder pywb documentation!
================================
Webrecorder (:mod:`pywb`) toolkit is a full-featured, advanced web archiving capture and replay framework for python.
It provides command-line tools and an extensible framework for high-fidelity web archive access and creation.
.. toctree::
:maxdepth: 2
manual/intro
manual/usage
manual/configuring
manual/index
code/pywb
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

36
docs/make.bat Normal file
View File

@ -0,0 +1,36 @@
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=python -msphinx
)
set SOURCEDIR=.
set BUILDDIR=_build
set SPHINXPROJ=pywb
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The Sphinx module was not found. Make sure you have Sphinx installed,
echo.then set the SPHINXBUILD environment variable to point to the full
echo.path of the 'sphinx-build' executable. Alternatively you may add the
echo.Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
:end
popd

269
docs/manual/configuring.rst Normal file
View File

@ -0,0 +1,269 @@
Configuring the Web Archive
===========================
pywb offers an extensible YAML based configuration format via a main ``config.yaml`` at the root of each web archive.
Framed vs Frameless Replay vs HTTPS proxy
-----------------------------------------
pywb supports several modes for serving archived web content.
With **framed replay**, the archived content is loaded into an iframe, and a top frame UI provides info and metadata.
In this mode, the top frame url is for example, ``http://my-archive.example.com/<coll name>/http://example.com/`` while
the actual content is served at ``http://my-archive.example.com/<coll name>/mp_/http://example.com/``
With **frameless replay**, the archived content is loaded directly, and a banner UI is injected into the page.
In this mode, the content is served directly at ``http://my-archive.example.com/<coll name>/http://example.com/``
(pywb can also supports HTTP/S **proxy mode** which requires additional setup. See :ref:`https-proxy` for more details).
For security reasons, we recommend running pywb in framed mode, because a malicious site
`could tamper with the banner <http://labs.rhizome.org/presentations/security.html#/13>`_
However, for certain situations, frameless replay made be appropriate.
To disable framed replay add:
``framed_replay: false`` to your config.yaml
Directory Structure
-------------------
The pywb system assumes the following default directory structure for a web archive::
+-- config.yaml (optional)
|
+-- templates (optional)
|
+-- static (optional)
|
+-- collections
|
+-- <coll name>
|
+-- archives
| |
| +-- (WARC or ARC files here)
|
+-- indexes
| |
| +-- (CDXJ index files here)
|
+-- templates
| |
| +-- (optional html templates here)
|
+-- static
|
+-- (optional custom static assets here)
If running with default settings, the ``config.yaml`` can be omitted.
It is possible to config these paths in the config.yaml
The following are some of the implicit default settings which can be customized::
collections_root: collections
archive_paths: archive
index_paths: indexes
(For a complete list of defaults, see the ``pywb/default_config.yaml`` file for reference)
Index Paths
^^^^^^^^^^^
The ``index_paths`` key defines the subdirectory for index files (usually CDXJ) and determine the contents of each archive collection.
The index files usually contain a pointer to a WARC file, but not the absolute path.
Archive Paths
^^^^^^^^^^^^^
The ``archive_paths`` key indicates how pywb will resolve WARC files listed in the index.
For example, it is possible to configure multiple archive paths::
archive_paths:
- archive
- http://remote-bakup.example.com/collections/
When resolving a ``example.warc.gz``, pywb will then check (in order):
* First, ``collections/<coll name>/example.warc.gz``
* Then, ``http://remote-backup.example.com/collections/<coll name>/example.warc.gz`` (if first lookup unsuccessful)
Custom Defined Collections
--------------------------
While pywb can detect automatically collections following the above directory structure,
it may be useful to declare custom collections explicitly.
In addition, several "special" collection definitions are possible.
All custom defined collections are placed under the ``collections`` key in ``config.yaml``
Live Web Collection
^^^^^^^^^^^^^^^^^^^
The live web collection proxies all data to the live web, and can be defined as follows::
collections:
live: $live
This configures the ``/live/`` route to point to the live web.
(As a shortcut, ``wayback --live`` adds this collection via cli w/o modifiying the config.yaml)
This collection can be useful for testing, or even more powerful, when combined with recording.
Auto "All" Aggregate Collection
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The aggregate all collections automatically aggregates data from all collections in the ``collections`` directory::
collections:
all: $all
Accessing ``/all/<url>`` will cause an aggregate lookup within the collections directory.
Note: It is not (yet) possible to exclude collections from the all collection, although "special" collections are not included.
Generic Collection Definitions
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The collection definition syntax allows for explicitly setting the index, archive paths
and all other templates, per collection, for example::
collections:
custom:
index: ./path/to/indexes
resource: ./some/other/path/to/archive/
query_html: ./path/to/templates/query.html
This configuration supports the full Warcserver config syntax, including
remote archives, aggregation and fallback sequences (link)
This format also makes it easier to move legacy collections that have unique path requirements.
Root Collection
^^^^^^^^^^^^^^^
It is also possible to define a "root" collection, for example, accessible at ``http://my-archive.example.com/<url>``
Such a collection must be defined explicitly using the ``$root`` as collection name::
collections:
$root:
index: ./path/to/indexes
resource: ./path/to/archive/
Note: When a root collection is set, no other collections are currently accessible, they are ignored.
Recording Mode
--------------
TODO
.. _https-proxy:
HTTP/S Proxy Mode
-----------------
TODO
UI Customizations
-----------------
pywb supports UI customizations, either for an entire archive,
or per-collection.
Static Files
^^^^^^^^^^^^
The replay server will automatically support static files placed under the following directories:
* Files under the root ``static`` directory can be accessed via ``http://my-archive.example.com/static/<filename>``
* Files under the per-collection ``./collections/<coll name>/static`` directory can be accessed via ``http://my-archive.example.com/static/_/<coll name>/<filename>``
Templates
^^^^^^^^^
pywb users Jinja2 templates to render HTML to render the HTML for all aspects of the application.
A version placed in the ``templates`` directory, either in the root or per collection, will override that template.
To copy the default pywb template to the template directory run:
``wb-manager template --add search_html``
The following templates are available:
* ``home.html`` -- Home Page Template, used for ``http://my-archive.example.com/``
* ``search.html`` -- Collection Template, used for each collection page ``http://my-archive.example.com/<coll name>/``
* ``query.html`` -- Capture Query Page for a given url, used for ``http://my-archive.example.com/<coll name/*/<url>``
Error Pages:
* ``not_found.html`` -- Page to show when a url is not found in the archive
* ``error.html`` -- Generic Error Page for any error (except not found)
Replay and Banner templates:
* ``frame_insert.html`` -- Top-frame for framed replay mode (not used with frameless mode)
* ``head_insert.html`` -- Rewriting code injected into ``<head>`` of each replayed page.
This template includes the banner template and itself should generally not need to be modified.
* ``banner.html`` -- The banner used for frameless replay. Can be set to blank to disable the banner.
Custom Outer Replay Frame
^^^^^^^^^^^^^^^^^^^^^^^^^
The top-frame used for framed replay can be replaced or augmented
by modifiying the ``frame_insert.html``.
To start with modifiying the default outer page, you can add it to the current
templates directory by running ``wb-frame template --add frame_insert.html``
To initialize the replay, the outer page should include ``wb_frame.js``,
create an ``<iframe>`` element and pass the id (or element itself) to the ``ContentFrame`` constructor:
.. code-block:: html
<script src='{{ host_prefix }}/{{ static_path }}/wb_frame.js'> </script>
<script>
var cframe = new ContentFrame({"url": "{{ url }}" + window.location.hash,
"prefix": "{{ wb_prefix }}",
"request_ts": "{{ wb_url.timestamp }}",
"iframe": "#replay_iframe"});
</script>
The outer frame can receive notifications of changes to the replay via ``postMessage``
For example, to detect when the content frame changed and log the new url and timestamp,
use the following script to the outer frame html:
.. code-block:: javascript
window.addEventListener("message", function(event.data) {
if (event.data.wb_type == "load" && event.data.wb_type == "replace-url") {
console.log("New Url: " + event.data.url);
console.log("New Timestamp: " + event.data.ts);
}
}
The ``load`` message is sent when a new page is first loaded, while ``replace-url`` is used
for url changes caused by content frame History navigation.

10
docs/manual/index.rst Normal file
View File

@ -0,0 +1,10 @@
Architecture
============
.. toctree::
:maxdepth: 2
warcserver
recorder
rewriter

14
docs/manual/intro.rst Normal file
View File

@ -0,0 +1,14 @@
New Features
============
The 2.0 release of :mod:`pywb` is a significant refactoring over previous versions,
and introduces many new features, including:
* WARC Server and API
* WARC Recorder
* Improved replay fidelity
* Dynamic Collections
* Memento Aggregation Chains
* Customizable Rewriting System

4
docs/manual/recorder.rst Normal file
View File

@ -0,0 +1,4 @@
WARC Recorder
=============

4
docs/manual/rewriter.rst Normal file
View File

@ -0,0 +1,4 @@
Rewriter
========

127
docs/manual/usage.rst Normal file
View File

@ -0,0 +1,127 @@
Usage
=====
Getting Started
---------------
At its core, pywb includes a fully featured web archive replay system, sometimes known as 'wayback machine', to provide the ability to replay,
or view, archived web content in the browser.
If you have existing web archive (WARC or legacy ARC) files, here's how to make them accessible using :mod:`pywb`
(If not, see :ref:`creating-warc` for instructions on how to easily create a WARC file right away)
By default, pywb provides directory-based collections system to run your own web archive directly from archive collections on disk.
Two command line utilities are provided:
* ``wb-manager`` is a command line tool for managing common collection operations.
* ``wayback`` starts a web server that provides the access to web archives.
(For more details, run ``wb-manager -h`` and ``wayback -h``)
For example, to install pywb and create a new collection "my-web-archive" in ``./collections/my-web-archive``.
.. code:: console
pip install pywb
wb-manager init my-web-archive
wb-manager add my-web-archive <path/to/my_warc.warc.gz>
wayback
Point your browser to ``http://localhost:8080/my-web-archive/<url>/`` where ``<url>`` is a url you recorded before into your WARC/ARC file.
If all worked well, you should see your archived version of ``<url>``. Congrats, you are now running your own web archive!
Using Existing Web Archive Collections
--------------------------------------
Existing archives of WARCs/ARCs files can be used with pywb with minimal amount of setup. By using ``wb-manager add``,
WARC/ARC files will automatically be placed in the collection archive directory and indexed.
By default ``wb-manager``, places new collections in ``collections/<coll name>`` subdirectory in the current working directory. To specify a different root directory, the ``wb-manager -d <dir>``. Other options can be set in the config file.
If you have a large number of existing CDX index files, pywb will be able to read them as well after running through a simple conversion process.
It is recommended that any index files be converted to the latest CDXJ format, which can be done by running:
``wb-manager cdx-convert <path/to/cdx>``
To setup a collection with existing ARC/WARCs and CDX index files, you can:
1. Run ``wb-manager init <coll name>``. This will initialize all the required collection directories.
2. Copy any archive files (WARCs and ARCs) to ``collections/<coll name>/archive/``
3. Copy any existing cdx indexes to ``collections/<coll name>/indexes/``
4. Run ``wb-manager cdx-convert collections/<coll name>/indexes/``. This strongly recommended, as it will
ensure that any legacy indexes are updated to the latest CDXJ format.
This will fully migrate your archive and indexes the collection.
Any new WARCs added with ``wb-manager add`` will be indexed and added to the existing collection.
Dynamic Collections and Automatic Indexing
------------------------------------------
Collections created via ``wb-manager init`` are fully dynamic, and new collections can be added without restarting pywb.
When adding WARCs with ``wb-manager add``, the indexes are also updated automatically. No restart is required, and the
content is instantly available for replay.
For more complex use cases, mod:`pywb` also includes a background indexer that checks the archives directory and automatically
updates the indexes, if any files have changed or were added.
(Of course, indexing will take some time if adding a large amount of data all at once, but is quite useful for smaller archive updates).
To enable auto-indexing, run with ``wayback -a`` or ``wayback -a --auto-interval 30`` to adjust the frequency of auto-indexing (default is 30 seconds).
.. _creating-warc:
Creating a Web Archive
----------------------
Using Webrecorder
^^^^^^^^^^^^^^^^^
If you do not have a web archive to test, one easy way to create one is to use `Webrecorder <https://webrecorder.io>`_
After recording, you can click ``Stop`` and then click `Download Collection` to receive a WARC (`.warc.gz`) file.
You can then use this with work with pywb.
Using pywb Recorder
^^^^^^^^^^^^^^^^^^^
The core recording functinality in Webrecorder ia also part of :mod:`pywb`. If you want to create a WARC locally, this can be
done by directly recording into your pywb collection:
1. Edit ``config.yaml`` to add ``recorder: live``
2. Create a collection: ``wb-manager init my-web-archive`` (if you haven't already created a web archive collection)
3. Run: ``wayback --live -a --auto-interval 10``
4. Point your browser to ``http://localhost:8080/my-web-archive/record/<url>``
For example, to record ``http://example.com/``, visit ``http://localhost:8080/my-web-archive/record/<url>``
In this configuration, the indexing happens every 10 seconds.. After 10 seconds, the recorded url will be accessible for replay, eg:
``http://localhost:8080/my-web-archive/http://example.com/``
(Note: this recorder is still experimental)
HTTP/S Proxy Mode Access
------------------------
It is also possible to access any pywb collection via HTTP/S proxy mode, providing possibly better replay
without client-side url rewriting.
At this time, a single collection for proxy mode access can be specified with the ``--proxy`` flag.
For example, ``wayback --proxy my-web-archive`` will start pywb and enable proxy mode access.
You can then configure a browser to Proxy Settings host port to: ``localhost:8080`` and then loading any url, eg. ``http://example.com/`` should
load the latest copy from the ``my-web-archive`` collection.

View File

@ -0,0 +1,11 @@
WARC Server
===========
CDX Server API
--------------
WARC Server API
---------------

View File

@ -136,7 +136,7 @@ class JSLinkRewriterMixin(object):
class JSLocationRewriterMixin(object):
"""
JS Rewriter mixin which rewrites location and domain to the
specified prefix (default: 'WB_wombat_')
specified prefix (default: ``WB_wombat_``)
"""
def __init__(self, rewriter, rules=[], prefix='WB_wombat_'):

View File

@ -9,27 +9,30 @@ with the wayback machine.
There WbUrl may represent one of the following forms:
query form: [/modifier]/[timestamp][-end_timestamp]*/<url>
query form: ``[/modifier]/[timestamp][-end_timestamp]*/<url>``
modifier, timestamp and end_timestamp are optional
modifier, timestamp and end_timestamp are optional::
*/example.com
20101112030201*/http://example.com
2009-2015*/http://example.com
/cdx/*/http://example.com
*/example.com
20101112030201*/http://example.com
2009-2015*/http://example.com
/cdx/*/http://example.com
url query form: used to indicate query across urls
same as query form but with a final *
*/example.com*
20101112030201*/http://example.com*
same as query form but with a final ``*``::
*/example.com*
20101112030201*/http://example.com*
replay form:
20101112030201/http://example.com
20101112030201im_/http://example.com
replay form::
latest_replay: (no timestamp)
http://example.com
20101112030201/http://example.com
20101112030201im_/http://example.com
latest_replay: (no timestamp)::
http://example.com
Additionally, the BaseWbUrl provides the base components
(url, timestamp, end_timestamp, modifier, type) which