diff --git a/tests/test_writer.py b/tests/test_writer.py index 2675393..ab6d9aa 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -166,8 +166,7 @@ def test_special_dont_write_prefix(): recorder.read() old = datetime.utcnow() - timedelta(0, 3600) ru = RecordedUrl( - url='http://example.com/yes', - # content_type=hanzo.httptools.ResponseMessage.CONTENT_TYPE, + url='http://example.com/dup', content_type='text/plain', status=200, client_ip='127.0.0.2', request_data=b'abc', response_recorder=recorder, remote_ip='127.0.0.3', @@ -181,7 +180,7 @@ def test_special_dont_write_prefix(): recorder.read() recent = datetime.utcnow() - timedelta(0, 5) ru = RecordedUrl( - url='http://example.com/yes', content_type='text/plain', + url='http://example.com/dup', content_type='text/plain', status=200, client_ip='127.0.0.2', request_data=b'abc', response_recorder=recorder, remote_ip='127.0.0.3', timestamp=datetime.utcnow(), diff --git a/warcprox/writerthread.py b/warcprox/writerthread.py index 83f4485..5eef44f 100644 --- a/warcprox/writerthread.py +++ b/warcprox/writerthread.py @@ -114,10 +114,6 @@ class WarcWriterProcessor(warcprox.BaseStandardPostfetchProcessor): if recorded_url.warcprox_meta and 'warc-prefix' in recorded_url.warcprox_meta else self.options.prefix) - res = (prefix != '-' and not recorded_url.do_not_archive - and self._filter_accepts(recorded_url) - and not self._in_blackout(recorded_url)) - # special warc name prefix '-' means "don't archive" return (prefix != '-' and not recorded_url.do_not_archive and self._filter_accepts(recorded_url) @@ -132,7 +128,7 @@ class WarcWriterProcessor(warcprox.BaseStandardPostfetchProcessor): if self.blackout_period and hasattr(recorded_url, "dedup_info") and \ recorded_url.dedup_info: dedup_date = recorded_url.dedup_info.get('date') - if dedup_date: + if dedup_date and recorded_url.dedup_info.get('url') == recorded_url.url: try: dt = datetime.strptime(dedup_date.decode('utf-8'), '%Y-%m-%dT%H:%M:%SZ')