mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Apply blackout on when dedup URL equals request URL
This commit is contained in:
parent
2c2c1d008a
commit
6b1d60c390
@ -166,8 +166,7 @@ def test_special_dont_write_prefix():
|
||||
recorder.read()
|
||||
old = datetime.utcnow() - timedelta(0, 3600)
|
||||
ru = RecordedUrl(
|
||||
url='http://example.com/yes',
|
||||
# content_type=hanzo.httptools.ResponseMessage.CONTENT_TYPE,
|
||||
url='http://example.com/dup',
|
||||
content_type='text/plain',
|
||||
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
||||
response_recorder=recorder, remote_ip='127.0.0.3',
|
||||
@ -181,7 +180,7 @@ def test_special_dont_write_prefix():
|
||||
recorder.read()
|
||||
recent = datetime.utcnow() - timedelta(0, 5)
|
||||
ru = RecordedUrl(
|
||||
url='http://example.com/yes', content_type='text/plain',
|
||||
url='http://example.com/dup', content_type='text/plain',
|
||||
status=200, client_ip='127.0.0.2', request_data=b'abc',
|
||||
response_recorder=recorder, remote_ip='127.0.0.3',
|
||||
timestamp=datetime.utcnow(),
|
||||
|
@ -114,10 +114,6 @@ class WarcWriterProcessor(warcprox.BaseStandardPostfetchProcessor):
|
||||
if recorded_url.warcprox_meta
|
||||
and 'warc-prefix' in recorded_url.warcprox_meta
|
||||
else self.options.prefix)
|
||||
res = (prefix != '-' and not recorded_url.do_not_archive
|
||||
and self._filter_accepts(recorded_url)
|
||||
and not self._in_blackout(recorded_url))
|
||||
|
||||
# special warc name prefix '-' means "don't archive"
|
||||
return (prefix != '-' and not recorded_url.do_not_archive
|
||||
and self._filter_accepts(recorded_url)
|
||||
@ -132,7 +128,7 @@ class WarcWriterProcessor(warcprox.BaseStandardPostfetchProcessor):
|
||||
if self.blackout_period and hasattr(recorded_url, "dedup_info") and \
|
||||
recorded_url.dedup_info:
|
||||
dedup_date = recorded_url.dedup_info.get('date')
|
||||
if dedup_date:
|
||||
if dedup_date and recorded_url.dedup_info.get('url') == recorded_url.url:
|
||||
try:
|
||||
dt = datetime.strptime(dedup_date.decode('utf-8'),
|
||||
'%Y-%m-%dT%H:%M:%SZ')
|
||||
|
Loading…
x
Reference in New Issue
Block a user