be clear about timezone in timestamps

This commit is contained in:
Noah Levitt 2018-10-30 13:17:33 -07:00
parent f082db62cf
commit dbf868a74d
3 changed files with 45 additions and 56 deletions

96
api.rst
View File

@ -15,72 +15,60 @@ a json blob with a bunch of status info. For example:
$ curl -sS http://localhost:8000/status
{
"rates_5min": {
"warc_bytes_per_sec": 0.0,
"urls_per_sec": 0.0,
"actual_elapsed": 277.2983281612396
},
"version": "2.4b2.dev174",
"load": 0.0,
"seconds_behind": 0.0,
"threads": 100,
"warc_bytes_written": 0,
"role": "warcprox",
"version": "2.4b3.dev189",
"host": "ayutla.local",
"address": "127.0.0.1",
"port": 8000,
"pid": 60555,
"threads": 100,
"active_requests": 1,
"unaccepted_requests": 0,
"load": 0.0,
"queued_urls": 0,
"queue_max_size": 500,
"urls_processed": 0,
"warc_bytes_written": 0,
"start_time": "2018-10-30T20:15:19.929861Z",
"rates_1min": {
"actual_elapsed": 61.76024103164673,
"urls_per_sec": 0.0,
"warc_bytes_per_sec": 0.0
},
"rates_5min": {
"actual_elapsed": 1.7602601051330566,
"urls_per_sec": 0.0,
"warc_bytes_per_sec": 0.0
},
"rates_15min": {
"actual_elapsed": 1.7602710723876953,
"urls_per_sec": 0.0,
"warc_bytes_per_sec": 0.0
},
"earliest_still_active_fetch_start": "2018-10-30T20:15:21.691467Z",
"seconds_behind": 0.001758,
"postfetch_chain": [
{
"queued_urls": 0,
"processor": "SkipFacebookCaptchas"
"processor": "DedupLoader",
"queued_urls": 0
},
{
"queued_urls": 0,
"processor": "BatchTroughLoader"
"processor": "WarcWriterProcessor",
"queued_urls": 0
},
{
"queued_urls": 0,
"processor": "WarcWriterProcessor"
"processor": "DedupDb",
"queued_urls": 0
},
{
"queued_urls": 0,
"processor": "BatchTroughStorer"
"processor": "StatsProcessor",
"queued_urls": 0
},
{
"queued_urls": 0,
"processor": "RethinkStatsProcessor"
},
{
"queued_urls": 0,
"processor": "CrawlLogger"
},
{
"queued_urls": 0,
"processor": "TroughFeed"
},
{
"queued_urls": 0,
"processor": "RunningStats"
"processor": "RunningStats",
"queued_urls": 0
}
],
"queue_max_size": 500,
"role": "warcprox",
"queued_urls": 0,
"active_requests": 1,
"host": "wbgrp-svc405.us.archive.org",
"rates_15min": {
"warc_bytes_per_sec": 0.0,
"urls_per_sec": 0.0,
"actual_elapsed": 876.9885368347168
},
"unaccepted_requests": 0,
"urls_processed": 0,
"pid": 18841,
"address": "127.0.0.1",
"rates_1min": {
"warc_bytes_per_sec": 0.0,
"urls_per_sec": 0.0,
"actual_elapsed": 54.92501664161682
},
"start_time": 1526690353.4060142
}
]
``WARCPROX_WRITE_RECORD`` http method
=====================================

View File

@ -51,7 +51,7 @@ class Options(_Namespace):
class Jsonner(json.JSONEncoder):
def default(self, o):
if isinstance(o, datetime.datetime):
return o.isoformat()
return o.isoformat() + 'Z' # we use only utc
elif isinstance(o, bytes):
return base64.b64encode(o).decode('ascii')
else:

View File

@ -425,6 +425,7 @@ class SingleThreadedWarcProxy(http_server.HTTPServer, object):
def __init__(
self, stats_db=None, status_callback=None,
options=warcprox.Options()):
self.start_time = datetime.datetime.utcnow()
self.status_callback = status_callback
self.stats_db = stats_db
self.options = options
@ -477,7 +478,7 @@ class SingleThreadedWarcProxy(http_server.HTTPServer, object):
'queue_max_size': self.recorded_url_q.maxsize,
'urls_processed': self.running_stats.urls,
'warc_bytes_written': self.running_stats.warc_bytes,
'start_time': self.running_stats.first_snap_time,
'start_time': self.start_time,
})
elapsed, urls_per_sec, warc_bytes_per_sec = self.running_stats.current_rates(1)
result['rates_1min'] = {