From f2c2829f494da6873f54060716ce7dc6846d369c Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 31 May 2017 16:05:57 -0700 Subject: [PATCH] misc improvements: redis multi-key source: store member listing from hgetall 'scan:' key add 'recorder-skip' to cdx line also use latest warcio (1.3.3) --- pywb/webagg/indexsource.py | 1 + pywb/webagg/responseloader.py | 7 ++++--- requirements.txt | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pywb/webagg/indexsource.py b/pywb/webagg/indexsource.py index c2a7f673..c8fa4705 100644 --- a/pywb/webagg/indexsource.py +++ b/pywb/webagg/indexsource.py @@ -259,6 +259,7 @@ class RedisIndexSource(BaseIndexSource): key = res_template(member_key, params) keys = self.redis.smembers(key) + params['scan:' + key] = keys match_templ = match_templ.encode('utf-8') diff --git a/pywb/webagg/responseloader.py b/pywb/webagg/responseloader.py index 9e3e5ad2..9153a93d 100644 --- a/pywb/webagg/responseloader.py +++ b/pywb/webagg/responseloader.py @@ -48,11 +48,12 @@ class BaseLoader(object): out_headers['WebAgg-Type'] = 'warc' out_headers['Content-Type'] = 'application/warc-record' - out_headers['WebAgg-Cdx'] = to_native_str(cdx.to_cdxj().rstrip()) - out_headers['WebAgg-Source-Coll'] = source - if params.get('recorder_skip'): out_headers['Recorder-Skip'] = '1' + cdx['recorder_skip'] = '1' + + out_headers['WebAgg-Cdx'] = to_native_str(cdx.to_cdxj().rstrip()) + out_headers['WebAgg-Source-Coll'] = source if not warc_headers: if other_headers: diff --git a/requirements.txt b/requirements.txt index 61b4653a..bf94b46a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ six -warcio==1.3 +warcio==1.3.3 chardet requests redis