python 3.5 version, mostly

This commit is contained in:
Barbara Miller 2021-12-02 11:46:30 -08:00
parent 1476bfec8c
commit e744075913

View File

@ -418,8 +418,8 @@ class BatchTroughLoader(warcprox.BaseBatchPostfetchProcessor):
if (recorded_url.response_recorder if (recorded_url.response_recorder
and recorded_url.payload_digest and recorded_url.payload_digest
and self.trough_dedup_db.should_dedup(recorded_url) and self.trough_dedup_db.should_dedup(recorded_url)
and f'{recorded_url.payload_digest}{recorded_url.url}' not in hash_plus_urls): and '{}{}'.format(recorded_url.payload_digest, recorded_url.url) not in hash_plus_urls):
hash_plus_urls.add(f'{recorded_url.payload_digest}{recorded_url.url}') hash_plus_urls.add('{}{}'.format(recorded_url.payload_digest, recorded_url.url))
if (recorded_url.warcprox_meta if (recorded_url.warcprox_meta
and 'dedup-buckets' in recorded_url.warcprox_meta): and 'dedup-buckets' in recorded_url.warcprox_meta):
for bucket, bucket_mode in recorded_url.warcprox_meta["dedup-buckets"].items(): for bucket, bucket_mode in recorded_url.warcprox_meta["dedup-buckets"].items():
@ -427,9 +427,9 @@ class BatchTroughLoader(warcprox.BaseBatchPostfetchProcessor):
else: else:
buckets['__unspecified__'].append(recorded_url) buckets['__unspecified__'].append(recorded_url)
else: else:
if f'{recorded_url.payload_digest}{recorded_url.url}' in hash_plus_urls: if recorded_url.payload_digest and '{}{}'.format(recorded_url.payload_digest, recorded_url.url) in hash_plus_urls:
self.logger.debug( self.logger.debug(
f'discarding duplicate {recorded_url.payload_digest} {recorded_url.url}') 'discarding duplicate {} {}'.format(recorded_url.payload_digest, recorded_url.url))
discards.append( discards.append(
warcprox.digest_str( warcprox.digest_str(
recorded_url.payload_digest, self.options.base32) recorded_url.payload_digest, self.options.base32)