diff --git a/setup.py b/setup.py index df8515f..f7457fe 100755 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ except: setuptools.setup( name='warcprox', - version='2.0b2.dev38', + version='2.0b2.dev39', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt', diff --git a/warcprox/bigtable.py b/warcprox/bigtable.py index 0b3a4e9..bc049f3 100644 --- a/warcprox/bigtable.py +++ b/warcprox/bigtable.py @@ -168,7 +168,12 @@ class RethinkCaptures: "response_code": recorded_url.status, "http_method": recorded_url.method, "bucket": bucket, - "length": records[0].length, + "record_length": records[0].length, # compressed (or not) length of + # warc record including record + # headers + "wire_bytes": recorded_url.size, # count of bytes transferred over + # the wire, including http headers + # if any } if (recorded_url.warcprox_meta and diff --git a/warcprox/writerthread.py b/warcprox/writerthread.py index 92bd416..a255717 100644 --- a/warcprox/writerthread.py +++ b/warcprox/writerthread.py @@ -1,24 +1,24 @@ -# -# warcprox/writerthread.py - warc writer thread, reads from the recorded url -# queue, writes warc records, runs final tasks after warc records are written -# -# Copyright (C) 2013-2016 Internet Archive -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, -# USA. -# +""" +warcprox/writerthread.py - warc writer thread, reads from the recorded url +queue, writes warc records, runs final tasks after warc records are written + +Copyright (C) 2013-2016 Internet Archive + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +USA. +""" from __future__ import absolute_import