From efc51a43617eb791e9829dd5ecff00e8bbf6a946 Mon Sep 17 00:00:00 2001
From: Noah Levitt <nlevitt@archive.org>
Date: Tue, 22 May 2018 11:59:06 -0700
Subject: [PATCH] stubby api docs

---
 api.rst    | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 readme.rst |  23 ++++++++---
 2 files changed, 127 insertions(+), 6 deletions(-)
 create mode 100644 api.rst

diff --git a/api.rst b/api.rst
new file mode 100644
index 0000000..87b444f
--- /dev/null
+++ b/api.rst
@@ -0,0 +1,110 @@
+warcprox API
+************
+
+Means of Interacting with warcprox over http, aside from simply proxying urls.
+
+`/status` url
+=============
+
+If warcprox is running at localhost:8000, http://localhost:8000/status returns
+a json blob with a bunch of status info. For example:
+
+::
+
+    $ curl -sS http://localhost:8000/status
+    {
+      "rates_5min": {
+        "warc_bytes_per_sec": 0.0,
+        "urls_per_sec": 0.0,
+        "actual_elapsed": 277.2983281612396
+      },
+      "version": "2.4b2.dev174",
+      "load": 0.0,
+      "seconds_behind": 0.0,
+      "threads": 100,
+      "warc_bytes_written": 0,
+      "port": 8000,
+      "postfetch_chain": [
+        {
+          "queued_urls": 0,
+          "processor": "SkipFacebookCaptchas"
+        },
+        {
+          "queued_urls": 0,
+          "processor": "BatchTroughLoader"
+        },
+        {
+          "queued_urls": 0,
+          "processor": "WarcWriterProcessor"
+        },
+        {
+          "queued_urls": 0,
+          "processor": "BatchTroughStorer"
+        },
+        {
+          "queued_urls": 0,
+          "processor": "RethinkStatsProcessor"
+        },
+        {
+          "queued_urls": 0,
+          "processor": "CrawlLogger"
+        },
+        {
+          "queued_urls": 0,
+          "processor": "TroughFeed"
+        },
+        {
+          "queued_urls": 0,
+          "processor": "RunningStats"
+        }
+      ],
+      "queue_max_size": 500,
+      "role": "warcprox",
+      "queued_urls": 0,
+      "active_requests": 1,
+      "host": "wbgrp-svc405.us.archive.org",
+      "rates_15min": {
+        "warc_bytes_per_sec": 0.0,
+        "urls_per_sec": 0.0,
+        "actual_elapsed": 876.9885368347168
+      },
+      "unaccepted_requests": 0,
+      "urls_processed": 0,
+      "pid": 18841,
+      "address": "127.0.0.1",
+      "rates_1min": {
+        "warc_bytes_per_sec": 0.0,
+        "urls_per_sec": 0.0,
+        "actual_elapsed": 54.92501664161682
+      },
+      "start_time": 1526690353.4060142
+    }
+
+`WARCPROX_WRITE_RECORD` http method
+===================================
+
+::
+
+    $ echo -ne 'WARCPROX_WRITE_RECORD special://url/some?thing HTTP/1.1\r\nWARC-Type: resource\r\ncontent-type: text/plain;charset=utf-8\r\ncontent-length: 29\r\n\r\ni am a warc record payload!\r\n' | ncat 127.0.0.1 8000
+    HTTP/1.0 204 OK
+    Server: BaseHTTP/0.6 Python/3.6.3
+    Date: Mon, 21 May 2018 23:33:31 GMT
+
+::
+
+    WARC/1.0
+    WARC-Type: resource
+    WARC-Record-ID: <urn:uuid:d0e10852-b18c-4037-a99e-f41915fec5b5>
+    WARC-Date: 2018-05-21T23:33:31Z
+    WARC-Target-URI: special://url/some?thing
+    WARC-Block-Digest: sha1:a282cfe127ab8d51b315ff3d31de18614979d0df
+    WARC-Payload-Digest: sha1:a282cfe127ab8d51b315ff3d31de18614979d0df
+    Content-Type: text/plain;charset=utf-8
+    Content-Length: 29
+
+    i am a warc record payload!
+
+
+`Warcprox-Meta` http request header
+===================================
+
diff --git a/readme.rst b/readme.rst
index 113099b..090130e 100644
--- a/readme.rst
+++ b/readme.rst
@@ -1,5 +1,5 @@
 warcprox - WARC writing MITM HTTP/S proxy
------------------------------------------
+*****************************************
 .. image:: https://travis-ci.org/internetarchive/warcprox.svg?branch=master
     :target: https://travis-ci.org/internetarchive/warcprox
 
@@ -7,7 +7,7 @@ Based on the excellent and simple pymiproxy by Nadeem Douba.
 https://github.com/allfro/pymiproxy
 
 Install
-~~~~~~~
+=======
 
 Warcprox runs on python 3.4+.
 
@@ -26,7 +26,7 @@ You can also install the latest bleeding edge code:
 
 
 Trusting the CA cert
-~~~~~~~~~~~~~~~~~~~~
+====================
 
 For best results while browsing through warcprox, you need to add the CA
 cert as a trusted cert in your browser. If you don't do that, you will
@@ -34,8 +34,19 @@ get the warning when you visit each new site. But worse, any embedded
 https content on a different server will simply fail to load, because
 the browser will reject the certificate without telling you.
 
+API
+===
+
+For interacting with a running instance of warcprox.
+
+* `/status` url
+* `WARCPROX_WRITE_RECORD` http method
+* `Warcprox-Meta` http request header
+
+See `<api.rst>`_.
+
 Plugins
-~~~~~~~
+=======
 
 Warcprox supports a limited notion of plugins by way of the `--plugin` command
 line argument. Plugin classes are loaded from the regular python module search
@@ -49,7 +60,7 @@ specifying `--plugin` multiples times.
 `A minimal example <https://github.com/internetarchive/warcprox/blob/318405e795ac0ab8760988a1a482cf0a17697148/warcprox/__init__.py#L165>`__
 
 Usage
-~~~~~
+=====
 
 ::
 
@@ -162,7 +173,7 @@ Usage
       -q, --quiet
 
 License
-~~~~~~~
+=======
 
 Warcprox is a derivative work of pymiproxy, which is GPL. Thus warcprox is also
 GPL.