mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
writing: add write_stream_to_file()function to be able to write to a WARC an existing input stream
refactor _do_write_req_resp to pass callback to actual writing (eg. _write_to_file)
This commit is contained in:
parent
1b09015954
commit
db3b92e228
@ -7,6 +7,7 @@ import zlib
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import six
|
import six
|
||||||
|
import shutil
|
||||||
|
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
@ -410,6 +411,26 @@ class MultiFileWARCWriter(BaseWARCWriter):
|
|||||||
self._do_write_req_resp(None, record, params)
|
self._do_write_req_resp(None, record, params)
|
||||||
|
|
||||||
def _do_write_req_resp(self, req, resp, params):
|
def _do_write_req_resp(self, req, resp, params):
|
||||||
|
def write_callback(out, filename):
|
||||||
|
url = resp.rec_headers.get('WARC-Target-URI')
|
||||||
|
print('Writing req/resp {0} to {1} '.format(url, filename))
|
||||||
|
|
||||||
|
if resp and self._is_write_resp(resp, params):
|
||||||
|
self._write_warc_record(out, resp)
|
||||||
|
|
||||||
|
if req and self._is_write_req(req, params):
|
||||||
|
self._write_warc_record(out, req)
|
||||||
|
|
||||||
|
return self._write_to_file(params, write_callback)
|
||||||
|
|
||||||
|
def write_stream_to_file(self, params, stream):
|
||||||
|
def write_callback(out, filename):
|
||||||
|
print('Writing stream to {0}'.format(filename))
|
||||||
|
shutil.copyfileobj(stream, out)
|
||||||
|
|
||||||
|
return self._write_to_file(params, write_callback)
|
||||||
|
|
||||||
|
def _write_to_file(self, params, write_callback):
|
||||||
full_dir = res_template(self.dir_template, params)
|
full_dir = res_template(self.dir_template, params)
|
||||||
dir_key = self.get_dir_key(params)
|
dir_key = self.get_dir_key(params)
|
||||||
|
|
||||||
@ -424,23 +445,16 @@ class MultiFileWARCWriter(BaseWARCWriter):
|
|||||||
filename = self.get_new_filename(full_dir, params)
|
filename = self.get_new_filename(full_dir, params)
|
||||||
|
|
||||||
if not self.allow_new_file(filename, params):
|
if not self.allow_new_file(filename, params):
|
||||||
return
|
return False
|
||||||
|
|
||||||
out = self._open_file(filename, params)
|
out = self._open_file(filename, params)
|
||||||
|
|
||||||
is_new = True
|
is_new = True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
url = resp.rec_headers.get('WARC-Target-URI')
|
|
||||||
print('Writing req/resp {0} to {1} '.format(url, filename))
|
|
||||||
|
|
||||||
start = out.tell()
|
start = out.tell()
|
||||||
|
|
||||||
if resp and self._is_write_resp(resp, params):
|
write_callback(out, filename)
|
||||||
self._write_warc_record(out, resp)
|
|
||||||
|
|
||||||
if req and self._is_write_req(req, params):
|
|
||||||
self._write_warc_record(out, req)
|
|
||||||
|
|
||||||
out.flush()
|
out.flush()
|
||||||
|
|
||||||
@ -453,9 +467,12 @@ class MultiFileWARCWriter(BaseWARCWriter):
|
|||||||
filename,
|
filename,
|
||||||
new_size - start)
|
new_size - start)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
close_file = True
|
close_file = True
|
||||||
|
return False
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# check for rollover
|
# check for rollover
|
||||||
|
Loading…
x
Reference in New Issue
Block a user