mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Merge branch 'master' into qa
* master: bump version number make sure we always format WARC-Date properly bump version after merge
This commit is contained in:
commit
4dd789aa78
2
setup.py
2
setup.py
@ -40,7 +40,7 @@ except:
|
|||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name='warcprox',
|
name='warcprox',
|
||||||
version='2.4b3.dev188',
|
version='2.4b3.dev190',
|
||||||
description='WARC writing MITM HTTP/S proxy',
|
description='WARC writing MITM HTTP/S proxy',
|
||||||
url='https://github.com/internetarchive/warcprox',
|
url='https://github.com/internetarchive/warcprox',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
@ -34,6 +34,9 @@ class WarcRecordBuilder:
|
|||||||
self.digest_algorithm = digest_algorithm
|
self.digest_algorithm = digest_algorithm
|
||||||
self.base32 = base32
|
self.base32 = base32
|
||||||
|
|
||||||
|
def format_warc_date(self, dt):
|
||||||
|
return dt.strftime('%Y-%m-%dT%H:%M:%SZ').encode('ascii')
|
||||||
|
|
||||||
def _build_response_principal_record(self, recorded_url, warc_date):
|
def _build_response_principal_record(self, recorded_url, warc_date):
|
||||||
"""Builds response or revisit record, whichever is appropriate."""
|
"""Builds response or revisit record, whichever is appropriate."""
|
||||||
if hasattr(recorded_url, "dedup_info") and recorded_url.dedup_info:
|
if hasattr(recorded_url, "dedup_info") and recorded_url.dedup_info:
|
||||||
@ -70,7 +73,7 @@ class WarcRecordBuilder:
|
|||||||
|
|
||||||
def build_warc_records(self, recorded_url):
|
def build_warc_records(self, recorded_url):
|
||||||
"""Returns a tuple of hanzo.warctools.warc.WarcRecord (principal_record, ...)"""
|
"""Returns a tuple of hanzo.warctools.warc.WarcRecord (principal_record, ...)"""
|
||||||
warc_date = warctools.warc.warc_datetime_str(recorded_url.timestamp)
|
warc_date = self.format_warc_date(recorded_url.timestamp)
|
||||||
|
|
||||||
if recorded_url.response_recorder:
|
if recorded_url.response_recorder:
|
||||||
principal_record = self._build_response_principal_record(recorded_url, warc_date)
|
principal_record = self._build_response_principal_record(recorded_url, warc_date)
|
||||||
@ -98,7 +101,7 @@ class WarcRecordBuilder:
|
|||||||
content_length=None):
|
content_length=None):
|
||||||
|
|
||||||
if warc_date is None:
|
if warc_date is None:
|
||||||
warc_date = warctools.warc.warc_datetime_str(datetime.datetime.utcnow())
|
warc_date = self.format_warc_date(datetime.datetime.utcnow())
|
||||||
|
|
||||||
record_id = warctools.WarcRecord.random_warc_uuid()
|
record_id = warctools.WarcRecord.random_warc_uuid()
|
||||||
|
|
||||||
@ -175,7 +178,7 @@ class WarcRecordBuilder:
|
|||||||
return output
|
return output
|
||||||
|
|
||||||
def build_warcinfo_record(self, filename):
|
def build_warcinfo_record(self, filename):
|
||||||
warc_record_date = warctools.warc.warc_datetime_str(datetime.datetime.utcnow())
|
warc_record_date = self.format_warc_date(datetime.datetime.utcnow())
|
||||||
record_id = warctools.WarcRecord.random_warc_uuid()
|
record_id = warctools.WarcRecord.random_warc_uuid()
|
||||||
|
|
||||||
headers = []
|
headers = []
|
||||||
|
Loading…
x
Reference in New Issue
Block a user