mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
write WARC-IP-Address header on response record
This commit is contained in:
parent
980ba13d10
commit
7367620dae
21
warcprox.py
21
warcprox.py
@ -310,11 +310,13 @@ class WarcProxyHandler(BaseHTTPServer.BaseHTTPRequestHandler):
|
|||||||
while buf != '':
|
while buf != '':
|
||||||
buf = h.read(8192)
|
buf = h.read(8192)
|
||||||
|
|
||||||
|
remote_ip = self._proxy_sock.getpeername()[0]
|
||||||
|
|
||||||
# Let's close off the remote end
|
# Let's close off the remote end
|
||||||
h.close()
|
h.close()
|
||||||
self._proxy_sock.close()
|
self._proxy_sock.close()
|
||||||
|
|
||||||
self.server.recordset_q.create_and_queue(self.url, req, h.recorder)
|
self.server.recordset_q.create_and_queue(self.url, req, h.recorder, remote_ip)
|
||||||
|
|
||||||
|
|
||||||
def __getattr__(self, item):
|
def __getattr__(self, item):
|
||||||
@ -352,13 +354,14 @@ class WarcProxy(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer):
|
|||||||
# consecutively in the same warc.
|
# consecutively in the same warc.
|
||||||
class WarcRecordsetQueue(Queue.Queue):
|
class WarcRecordsetQueue(Queue.Queue):
|
||||||
|
|
||||||
def create_and_queue(self, url, request_data, response_recorder):
|
def create_and_queue(self, url, request_data, response_recorder, remote_ip):
|
||||||
warc_date = warctools.warc.warc_datetime_str(datetime.now())
|
warc_date = warctools.warc.warc_datetime_str(datetime.now())
|
||||||
|
|
||||||
response_record, response_record_id = self.make_record(url=url,
|
response_record, response_record_id = self.make_record(url=url,
|
||||||
warc_date=warc_date, recorder=response_recorder,
|
warc_date=warc_date, recorder=response_recorder,
|
||||||
warc_type=warctools.WarcRecord.RESPONSE,
|
warc_type=warctools.WarcRecord.RESPONSE,
|
||||||
content_type="application/http;msgtype=response")
|
content_type="application/http;msgtype=response",
|
||||||
|
remote_ip=remote_ip)
|
||||||
|
|
||||||
request_record, request_record_id = self.make_record(url=url,
|
request_record, request_record_id = self.make_record(url=url,
|
||||||
warc_date=warc_date, data=request_data,
|
warc_date=warc_date, data=request_data,
|
||||||
@ -372,7 +375,7 @@ class WarcRecordsetQueue(Queue.Queue):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def make_record(url, warc_date=None, recorder=None, data=None,
|
def make_record(url, warc_date=None, recorder=None, data=None,
|
||||||
concurrent_to=None, warc_type=None, content_type=None):
|
concurrent_to=None, warc_type=None, content_type=None, remote_ip=None):
|
||||||
|
|
||||||
if warc_date is None:
|
if warc_date is None:
|
||||||
warc_date = warctools.warc.warc_datetime_str(datetime.now())
|
warc_date = warctools.warc.warc_datetime_str(datetime.now())
|
||||||
@ -380,12 +383,13 @@ class WarcRecordsetQueue(Queue.Queue):
|
|||||||
record_id = warctools.WarcRecord.random_warc_uuid()
|
record_id = warctools.WarcRecord.random_warc_uuid()
|
||||||
|
|
||||||
headers = []
|
headers = []
|
||||||
|
if warc_type is not None:
|
||||||
|
headers.append((warctools.WarcRecord.TYPE, warc_type))
|
||||||
headers.append((warctools.WarcRecord.ID, record_id))
|
headers.append((warctools.WarcRecord.ID, record_id))
|
||||||
headers.append((warctools.WarcRecord.DATE, warc_date))
|
headers.append((warctools.WarcRecord.DATE, warc_date))
|
||||||
headers.append((warctools.WarcRecord.URL, url))
|
headers.append((warctools.WarcRecord.URL, url))
|
||||||
# headers.append((warctools.WarcRecord.IP_ADDRESS, ip))
|
if remote_ip is not None:
|
||||||
if warc_type is not None:
|
headers.append((warctools.WarcRecord.IP_ADDRESS, remote_ip))
|
||||||
headers.append((warctools.WarcRecord.TYPE, warc_type))
|
|
||||||
if concurrent_to is not None:
|
if concurrent_to is not None:
|
||||||
headers.append((warctools.WarcRecord.CONCURRENT_TO, concurrent_to))
|
headers.append((warctools.WarcRecord.CONCURRENT_TO, concurrent_to))
|
||||||
if content_type is not None:
|
if content_type is not None:
|
||||||
@ -457,7 +461,6 @@ class WarcWriterThread(threading.Thread):
|
|||||||
headers.append((warctools.WarcRecord.TYPE, warctools.WarcRecord.WARCINFO))
|
headers.append((warctools.WarcRecord.TYPE, warctools.WarcRecord.WARCINFO))
|
||||||
headers.append((warctools.WarcRecord.FILENAME, filename))
|
headers.append((warctools.WarcRecord.FILENAME, filename))
|
||||||
headers.append((warctools.WarcRecord.DATE, warc_record_date))
|
headers.append((warctools.WarcRecord.DATE, warc_record_date))
|
||||||
# headers.append((warctools.WarcRecord.IP_ADDRESS, ip))
|
|
||||||
|
|
||||||
warcinfo_fields = []
|
warcinfo_fields = []
|
||||||
warcinfo_fields.append('software: warcprox.py https://github.com/nlevitt/warcprox')
|
warcinfo_fields.append('software: warcprox.py https://github.com/nlevitt/warcprox')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user