two different measures of size in the big captures table, record_length and wire_bytes

This commit is contained in:
Noah Levitt 2016-11-21 15:17:50 -08:00
parent 2918a73a3b
commit d31cae2d51
3 changed files with 28 additions and 23 deletions

View File

@ -51,7 +51,7 @@ except:
setuptools.setup(
name='warcprox',
version='2.0b2.dev38',
version='2.0b2.dev39',
description='WARC writing MITM HTTP/S proxy',
url='https://github.com/internetarchive/warcprox',
author='Noah Levitt',

View File

@ -168,7 +168,12 @@ class RethinkCaptures:
"response_code": recorded_url.status,
"http_method": recorded_url.method,
"bucket": bucket,
"length": records[0].length,
"record_length": records[0].length, # compressed (or not) length of
# warc record including record
# headers
"wire_bytes": recorded_url.size, # count of bytes transferred over
# the wire, including http headers
# if any
}
if (recorded_url.warcprox_meta and

View File

@ -1,24 +1,24 @@
#
# warcprox/writerthread.py - warc writer thread, reads from the recorded url
# queue, writes warc records, runs final tasks after warc records are written
#
# Copyright (C) 2013-2016 Internet Archive
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
# USA.
#
"""
warcprox/writerthread.py - warc writer thread, reads from the recorded url
queue, writes warc records, runs final tasks after warc records are written
Copyright (C) 2013-2016 Internet Archive
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
USA.
"""
from __future__ import absolute_import