From e0732ffaf497c757baedfa7c05b36acc09abc808 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Mon, 29 Mar 2021 22:22:19 +0000 Subject: [PATCH] Checking for content type header consiting of only empty spaces and removing spaces from exception messages in json section --- warcprox/crawl_log.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/warcprox/crawl_log.py b/warcprox/crawl_log.py index 6c847bb..4e67723 100644 --- a/warcprox/crawl_log.py +++ b/warcprox/crawl_log.py @@ -44,9 +44,9 @@ class CrawlLogger(object): status = self.get_artificial_status(recorded_url) extra_info = {'contentSize': recorded_url.size,} if recorded_url.size is not None and recorded_url.size > 0 else {} if hasattr(recorded_url, 'exception') and recorded_url.exception is not None: - extra_info['exception'] = str(recorded_url.exception) + extra_info['exception'] = str(recorded_url.exception).replace(" ", "_") if(hasattr(recorded_url, 'message') and recorded_url.message is not None): - extra_info['exceptionMessage'] = str(recorded_url.message) + extra_info['exceptionMessage'] = str(recorded_url.message).replace(" ", "_") if records: extra_info['warcFilename'] = records[0].warc_filename extra_info['warcFileOffset'] = records[0].offset @@ -71,7 +71,7 @@ class CrawlLogger(object): recorded_url.url, '-', # hop path recorded_url.referer or '-', - recorded_url.mimetype if recorded_url.mimetype is not None else '-', + recorded_url.mimetype if recorded_url.mimetype is not None and recorded_url.mimetype.strip() else '-', '-', '{:%Y%m%d%H%M%S}{:03d}+{:03d}'.format( recorded_url.timestamp,