mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Decoding and Recorder Fixes (#313)
* redisindex: use decode_resposes=True for redisindex * recorder: close_file(): return true if closed, close_key() return filename if closed * logging: if debug=True, log warc load failures * appveyor build fix: add pypiwin32 as dependency for windows build
This commit is contained in:
parent
a9cbdc1bd6
commit
9da5bd1083
@ -17,6 +17,7 @@ install:
|
||||
- "pip install coverage pytest-cov"
|
||||
- "pip install cffi"
|
||||
- "pip install pyopenssl"
|
||||
- "pip install pypiwin32"
|
||||
- "pip install certauth boto3 youtube-dl pysocks"
|
||||
- "pip install codecov"
|
||||
|
||||
|
@ -100,8 +100,10 @@ class MultiFileWARCWriter(BaseWARCWriter):
|
||||
if os.name != 'nt':
|
||||
portalocker.lock(fh, portalocker.LOCK_UN)
|
||||
fh.close()
|
||||
return True
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return False
|
||||
|
||||
def get_dir_key(self, params):
|
||||
return res_template(self.key_template, params)
|
||||
@ -115,7 +117,7 @@ class MultiFileWARCWriter(BaseWARCWriter):
|
||||
return
|
||||
|
||||
out, filename = result
|
||||
self._close_file(out)
|
||||
if self._close_file(out):
|
||||
return filename
|
||||
|
||||
def close_file(self, match_filename):
|
||||
|
@ -8,6 +8,11 @@ from pywb.warcserver.index.fuzzymatcher import FuzzyMatcher
|
||||
from pywb.warcserver.resource.responseloader import WARCPathLoader, LiveWebLoader, VideoLoader
|
||||
|
||||
import six
|
||||
import logging
|
||||
import traceback
|
||||
|
||||
|
||||
logger = logging.getLogger('warcserver')
|
||||
|
||||
|
||||
#=============================================================================
|
||||
@ -123,6 +128,8 @@ class ResourceHandler(IndexHandler):
|
||||
return out_headers, resp, errs
|
||||
except (WbException, ArchiveLoadFailed) as e:
|
||||
last_exc = e
|
||||
if logger.isEnabledFor(logging.DEBUG):
|
||||
traceback.print_exc()
|
||||
errs[str(loader)] = str(e)
|
||||
|
||||
if last_exc:
|
||||
|
@ -370,12 +370,11 @@ class BaseRedisMultiKeyIndexSource(BaseAggregator, RedisIndexSource):
|
||||
redis_key_pattern = res_template(self.redis_key_template, params)
|
||||
|
||||
if '*' not in redis_key_pattern:
|
||||
keys = [redis_key_pattern.encode('utf-8')]
|
||||
keys = [redis_key_pattern]
|
||||
else:
|
||||
keys = self.scan_keys(redis_key_pattern, params)
|
||||
|
||||
for key in keys:
|
||||
key = key.decode('utf-8')
|
||||
res = self._get_source_for_key(key)
|
||||
if res:
|
||||
yield key, res
|
||||
|
@ -282,7 +282,7 @@ class RedisIndexSource(BaseIndexSource):
|
||||
|
||||
redis_key_template = key_prefix
|
||||
if not redis_:
|
||||
redis_ = redis.StrictRedis.from_url(redis_url)
|
||||
redis_ = redis.StrictRedis.from_url(redis_url, decode_responses=True)
|
||||
return redis_, key_prefix
|
||||
|
||||
def scan_keys(self, match_templ, params, member_key=None):
|
||||
@ -301,18 +301,18 @@ class RedisIndexSource(BaseIndexSource):
|
||||
keys = self._load_key_set(key)
|
||||
params[scan_key] = keys
|
||||
|
||||
match_templ = match_templ.encode('utf-8')
|
||||
#match_templ = match_templ.encode('utf-8')
|
||||
|
||||
return [match_templ.replace(b'*', key) for key in keys]
|
||||
return [match_templ.replace('*', key) for key in keys]
|
||||
|
||||
def _load_key_set(self, key):
|
||||
if not self.member_key_type:
|
||||
self.member_key_type = self.redis.type(key)
|
||||
|
||||
if self.member_key_type == b'set':
|
||||
if self.member_key_type == 'set':
|
||||
return self.redis.smembers(key)
|
||||
|
||||
elif self.member_key_type == b'hash':
|
||||
elif self.member_key_type == 'hash':
|
||||
return self.redis.hvals(key)
|
||||
|
||||
# don't cache if any other type
|
||||
@ -332,6 +332,8 @@ class RedisIndexSource(BaseIndexSource):
|
||||
|
||||
def do_load(index_list):
|
||||
for line in index_list:
|
||||
if isinstance(line, str):
|
||||
line = line.encode('utf-8')
|
||||
yield CDXObject(line)
|
||||
|
||||
return do_load(index_list)
|
||||
|
@ -82,15 +82,13 @@ class RedisResolver(RedisIndexSource):
|
||||
|
||||
if '*' in redis_key:
|
||||
for key in self.scan_keys(redis_key, params):
|
||||
#key = key.decode('utf-8')
|
||||
res = self.redis.hget(key, filename)
|
||||
if res:
|
||||
break
|
||||
else:
|
||||
res = self.redis.hget(redis_key, filename)
|
||||
|
||||
if res and six.PY3:
|
||||
res = res.decode('utf-8')
|
||||
res = to_native_str(res, 'utf-8')
|
||||
|
||||
return res
|
||||
|
||||
|
@ -110,7 +110,7 @@ class TestPathIndex(object):
|
||||
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
|
||||
assert resolver('example-2.warc.gz', cdx) == 'some_path/example-2.warc.gz'
|
||||
|
||||
assert resolver.member_key_type == b'set'
|
||||
assert resolver.member_key_type == 'set'
|
||||
|
||||
@patch('redis.StrictRedis', FakeStrictRedis)
|
||||
def test_redis_resolver_multi_key_with_member_hash(self):
|
||||
@ -135,7 +135,7 @@ class TestPathIndex(object):
|
||||
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
|
||||
assert resolver('example-2.warc.gz', cdx) == 'some_path/example-2.warc.gz'
|
||||
|
||||
assert resolver.member_key_type == b'hash'
|
||||
assert resolver.member_key_type == 'hash'
|
||||
|
||||
def test_make_best_resolver_http(self):
|
||||
res = DefaultResolverMixin.make_best_resolver('http://myhost.example.com/warcs/')
|
||||
|
Loading…
x
Reference in New Issue
Block a user