diff --git a/setup.py b/setup.py index a93da08..f345120 100755 --- a/setup.py +++ b/setup.py @@ -52,7 +52,7 @@ except: setuptools.setup( name='warcprox', - version='2.4b1.dev139', + version='2.4b1.dev143', description='WARC writing MITM HTTP/S proxy', url='https://github.com/internetarchive/warcprox', author='Noah Levitt', diff --git a/warcprox/warc.py b/warcprox/warc.py index 6b9cbcf..a929a73 100644 --- a/warcprox/warc.py +++ b/warcprox/warc.py @@ -1,23 +1,23 @@ -# -# warcprox/warc.py - assembles warc records -# -# Copyright (C) 2013-2016 Internet Archive -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, -# USA. -# +''' +warcprox/warc.py - assembles warc records + +Copyright (C) 2013-2018 Internet Archive + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +USA. +''' from __future__ import absolute_import @@ -140,6 +140,13 @@ class WarcRecordBuilder: return record + def _local_address(self): + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect(('10.255.255.255', 1)) # ip doesn't need to be reachable + output = s.getsockname()[0] + s.close() + return output + def build_warcinfo_record(self, filename): warc_record_date = warctools.warc.warc_datetime_str(datetime.datetime.utcnow()) record_id = warctools.WarcRecord.random_warc_uuid() @@ -154,7 +161,7 @@ class WarcRecordBuilder: warcinfo_fields.append(b'software: warcprox ' + warcprox.__version__.encode('latin1')) hostname = socket.gethostname() warcinfo_fields.append('hostname: {}'.format(hostname).encode('latin1')) - warcinfo_fields.append('ip: {}'.format(socket.gethostbyname(hostname)).encode('latin1')) + warcinfo_fields.append(('ip: %s' % self._local_address()).encode('latin1')) warcinfo_fields.append(b'format: WARC File Format 1.0') # warcinfo_fields.append('robots: ignore') # warcinfo_fields.append('description: {0}'.format(self.description)) diff --git a/warcprox/warcproxy.py b/warcprox/warcproxy.py index 8850821..7ae5ab4 100644 --- a/warcprox/warcproxy.py +++ b/warcprox/warcproxy.py @@ -404,7 +404,8 @@ class SingleThreadedWarcProxy(http_server.HTTPServer, object): ca_name = ('Warcprox CA on %s' % socket.gethostname())[:64] self.ca = CertificateAuthority( - ca_file='warcprox-ca.pem', certs_dir='./warcprox-ca', + ca_file=options.cacert or 'warcprox-ca.pem', + certs_dir=options.certs_dir or './warcprox-ca', ca_name=ca_name) self.recorded_url_q = warcprox.TimestampedQueue( diff --git a/warcprox/writer.py b/warcprox/writer.py index 56ff635..3fd6c7d 100644 --- a/warcprox/writer.py +++ b/warcprox/writer.py @@ -158,7 +158,6 @@ class WarcWriter: with self._lock: writer = self._writer() - recordset_offset = writer.tell() for record in records: offset = writer.tell() diff --git a/warcprox/writerthread.py b/warcprox/writerthread.py index f823cc6..632ea2c 100644 --- a/warcprox/writerthread.py +++ b/warcprox/writerthread.py @@ -89,3 +89,7 @@ class WarcWriterThread(warcprox.BaseStandardPostfetchProcessor): recorded_url.method, recorded_url.url.decode("utf-8"), recorded_url.mimetype, recorded_url.size, payload_digest, type_, filename, offset) + + def _shutdown(self): + self.writer_pool.close_writers() +