1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

misc improvements:

redis multi-key source: store member listing from hgetall 'scan:<key>' key
add 'recorder-skip' to cdx line also
use latest warcio (1.3.3)
This commit is contained in:
Ilya Kreymer 2017-05-31 16:05:57 -07:00
parent 481bc40ccc
commit f2c2829f49
3 changed files with 6 additions and 4 deletions

View File

@ -259,6 +259,7 @@ class RedisIndexSource(BaseIndexSource):
key = res_template(member_key, params) key = res_template(member_key, params)
keys = self.redis.smembers(key) keys = self.redis.smembers(key)
params['scan:' + key] = keys
match_templ = match_templ.encode('utf-8') match_templ = match_templ.encode('utf-8')

View File

@ -48,11 +48,12 @@ class BaseLoader(object):
out_headers['WebAgg-Type'] = 'warc' out_headers['WebAgg-Type'] = 'warc'
out_headers['Content-Type'] = 'application/warc-record' out_headers['Content-Type'] = 'application/warc-record'
out_headers['WebAgg-Cdx'] = to_native_str(cdx.to_cdxj().rstrip())
out_headers['WebAgg-Source-Coll'] = source
if params.get('recorder_skip'): if params.get('recorder_skip'):
out_headers['Recorder-Skip'] = '1' out_headers['Recorder-Skip'] = '1'
cdx['recorder_skip'] = '1'
out_headers['WebAgg-Cdx'] = to_native_str(cdx.to_cdxj().rstrip())
out_headers['WebAgg-Source-Coll'] = source
if not warc_headers: if not warc_headers:
if other_headers: if other_headers:

View File

@ -1,5 +1,5 @@
six six
warcio==1.3 warcio==1.3.3
chardet chardet
requests requests
redis redis