From 46dd01de892215ea08a822e3188097b030204f64 Mon Sep 17 00:00:00 2001 From: Barbara Miller Date: Wed, 14 Feb 2018 15:48:21 -0800 Subject: [PATCH] add do_not_archive check to should_archive --- warcprox/writerthread.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/warcprox/writerthread.py b/warcprox/writerthread.py index 1010161..27c5eea 100644 --- a/warcprox/writerthread.py +++ b/warcprox/writerthread.py @@ -81,8 +81,12 @@ class WarcWriterProcessor(warcprox.BaseStandardPostfetchProcessor): if recorded_url.warcprox_meta and 'warc-prefix' in recorded_url.warcprox_meta else self.options.prefix) + do_not_archive = (recorded_url.do_not_archive + if recorded_url.do_not_archive + else False) # special warc name prefix '-' means "don't archive" - return prefix != '-' and self._filter_accepts(recorded_url) + return prefix != '-' and (not do_not_archive) and + self._filter_accepts(recorded_url) def _log(self, recorded_url, records): try: