Checking for content type header consiting of only empty spaces and removing spaces from exception messages in json section

This commit is contained in:
Adam Miller 2021-03-29 22:22:19 +00:00
parent b8057825d8
commit e0732ffaf4

View File

@ -44,9 +44,9 @@ class CrawlLogger(object):
status = self.get_artificial_status(recorded_url) status = self.get_artificial_status(recorded_url)
extra_info = {'contentSize': recorded_url.size,} if recorded_url.size is not None and recorded_url.size > 0 else {} extra_info = {'contentSize': recorded_url.size,} if recorded_url.size is not None and recorded_url.size > 0 else {}
if hasattr(recorded_url, 'exception') and recorded_url.exception is not None: if hasattr(recorded_url, 'exception') and recorded_url.exception is not None:
extra_info['exception'] = str(recorded_url.exception) extra_info['exception'] = str(recorded_url.exception).replace(" ", "_")
if(hasattr(recorded_url, 'message') and recorded_url.message is not None): if(hasattr(recorded_url, 'message') and recorded_url.message is not None):
extra_info['exceptionMessage'] = str(recorded_url.message) extra_info['exceptionMessage'] = str(recorded_url.message).replace(" ", "_")
if records: if records:
extra_info['warcFilename'] = records[0].warc_filename extra_info['warcFilename'] = records[0].warc_filename
extra_info['warcFileOffset'] = records[0].offset extra_info['warcFileOffset'] = records[0].offset
@ -71,7 +71,7 @@ class CrawlLogger(object):
recorded_url.url, recorded_url.url,
'-', # hop path '-', # hop path
recorded_url.referer or '-', recorded_url.referer or '-',
recorded_url.mimetype if recorded_url.mimetype is not None else '-', recorded_url.mimetype if recorded_url.mimetype is not None and recorded_url.mimetype.strip() else '-',
'-', '-',
'{:%Y%m%d%H%M%S}{:03d}+{:03d}'.format( '{:%Y%m%d%H%M%S}{:03d}+{:03d}'.format(
recorded_url.timestamp, recorded_url.timestamp,