mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
shorten warc filename template
This commit is contained in:
parent
0ce8022ea9
commit
d3df48b97e
@ -9,6 +9,8 @@ import time
|
|||||||
import warcprox
|
import warcprox
|
||||||
import os
|
import os
|
||||||
import socket
|
import socket
|
||||||
|
import string
|
||||||
|
import random
|
||||||
|
|
||||||
class WarcWriter:
|
class WarcWriter:
|
||||||
logger = logging.getLogger("warcprox.writer.WarcWriter")
|
logger = logging.getLogger("warcprox.writer.WarcWriter")
|
||||||
@ -35,6 +37,8 @@ class WarcWriter:
|
|||||||
self._f_finalname = None
|
self._f_finalname = None
|
||||||
self._serial = 0
|
self._serial = 0
|
||||||
|
|
||||||
|
self._randomtoken = "".join(random.Random().sample(string.digits + string.ascii_lowercase, 8))
|
||||||
|
|
||||||
if not os.path.exists(directory):
|
if not os.path.exists(directory):
|
||||||
self.logger.info("warc destination directory {} doesn't exist, creating it".format(directory))
|
self.logger.info("warc destination directory {} doesn't exist, creating it".format(directory))
|
||||||
os.mkdir(directory)
|
os.mkdir(directory)
|
||||||
@ -53,15 +57,15 @@ class WarcWriter:
|
|||||||
self._fpath = None
|
self._fpath = None
|
||||||
self._f = None
|
self._f = None
|
||||||
|
|
||||||
# <!-- <property name="template" value="${prefix}-${timestamp17}-${serialno}-${heritrix.pid}~${heritrix.hostname}~${heritrix.port}" /> -->
|
# h3 default <!-- <property name="template" value="${prefix}-${timestamp17}-${serialno}-${heritrix.pid}~${heritrix.hostname}~${heritrix.port}" /> -->
|
||||||
|
# ${prefix}-${timestamp17}-${randomtoken}-${serialno}.warc.gz"
|
||||||
def _writer(self):
|
def _writer(self):
|
||||||
if self._fpath and os.path.getsize(self._fpath) > self.rollover_size:
|
if self._fpath and os.path.getsize(self._fpath) > self.rollover_size:
|
||||||
self.close_writer()
|
self.close_writer()
|
||||||
|
|
||||||
if self._f == None:
|
if self._f == None:
|
||||||
self._f_finalname = '{}-{}-{:05d}-{}-{}-{}.warc{}'.format(
|
self._f_finalname = '{}-{}-{:05d}-{}.warc{}'.format(
|
||||||
self.prefix, self.timestamp17(), self._serial, os.getpid(),
|
self.prefix, self.timestamp17(), self._serial, self._randomtoken, '.gz' if self.gzip else '')
|
||||||
socket.gethostname(), self.port, '.gz' if self.gzip else '')
|
|
||||||
self._fpath = os.path.sep.join([self.directory, self._f_finalname + '.open'])
|
self._fpath = os.path.sep.join([self.directory, self._f_finalname + '.open'])
|
||||||
|
|
||||||
self._f = open(self._fpath, 'wb')
|
self._f = open(self._fpath, 'wb')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user