mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
fix bugs, improve logging of each warc record
This commit is contained in:
parent
568df5360d
commit
e01691c1f2
13
warcprox.py
13
warcprox.py
@ -225,8 +225,8 @@ class WarcProxyHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
|||||||
|
|
||||||
|
|
||||||
def _transition_to_ssl(self):
|
def _transition_to_ssl(self):
|
||||||
self.connection = ssl.wrap_socket(self.connection, server_side=True,
|
self.request = self.connection = ssl.wrap_socket(self.connection,
|
||||||
certfile=self.server.ca[self.hostname])
|
server_side=True, certfile=self.server.ca[self.hostname])
|
||||||
|
|
||||||
|
|
||||||
def do_CONNECT(self):
|
def do_CONNECT(self):
|
||||||
@ -279,7 +279,7 @@ class WarcProxyHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
|||||||
self.send_error(500, str(e))
|
self.send_error(500, str(e))
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
self.url = _construct_tunneled_url()
|
self.url = self._construct_tunneled_url()
|
||||||
|
|
||||||
# Build request
|
# Build request
|
||||||
req = '%s %s %s\r\n' % (self.command, self.path, self.request_version)
|
req = '%s %s %s\r\n' % (self.command, self.path, self.request_version)
|
||||||
@ -506,8 +506,13 @@ class WarcWriterThread(threading.Thread):
|
|||||||
writer = self._writer()
|
writer = self._writer()
|
||||||
|
|
||||||
for record in recordset:
|
for record in recordset:
|
||||||
|
offset = writer.tell()
|
||||||
record.write_to(writer, gzip=self.gzip)
|
record.write_to(writer, gzip=self.gzip)
|
||||||
logging.info('wrote warc record {}'.format(record))
|
logging.info('wrote warc record: warc_type={} content_length={} url={} warc={} offset={}'.format(
|
||||||
|
record.get_header(warctools.WarcRecord.TYPE),
|
||||||
|
record.get_header(warctools.WarcRecord.CONTENT_LENGTH),
|
||||||
|
record.get_header(warctools.WarcRecord.URL),
|
||||||
|
self._fpath, offset))
|
||||||
|
|
||||||
if record.content_file:
|
if record.content_file:
|
||||||
# XXX now we know we're done with this... messy to
|
# XXX now we know we're done with this... messy to
|
||||||
|
Loading…
x
Reference in New Issue
Block a user