1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

Decoding and Recorder Fixes (#313)

* redisindex: use decode_resposes=True for redisindex
* recorder: close_file(): return true if closed, close_key() return filename if closed
* logging: if debug=True, log warc load failures
* appveyor build fix: add pypiwin32 as dependency for windows build
This commit is contained in:
Ilya Kreymer 2018-03-29 13:42:00 -07:00 committed by GitHub
parent a9cbdc1bd6
commit 9da5bd1083
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 23 additions and 14 deletions

View File

@ -17,6 +17,7 @@ install:
- "pip install coverage pytest-cov"
- "pip install cffi"
- "pip install pyopenssl"
- "pip install pypiwin32"
- "pip install certauth boto3 youtube-dl pysocks"
- "pip install codecov"

View File

@ -100,8 +100,10 @@ class MultiFileWARCWriter(BaseWARCWriter):
if os.name != 'nt':
portalocker.lock(fh, portalocker.LOCK_UN)
fh.close()
return True
except Exception as e:
print(e)
return False
def get_dir_key(self, params):
return res_template(self.key_template, params)
@ -115,7 +117,7 @@ class MultiFileWARCWriter(BaseWARCWriter):
return
out, filename = result
self._close_file(out)
if self._close_file(out):
return filename
def close_file(self, match_filename):

View File

@ -8,6 +8,11 @@ from pywb.warcserver.index.fuzzymatcher import FuzzyMatcher
from pywb.warcserver.resource.responseloader import WARCPathLoader, LiveWebLoader, VideoLoader
import six
import logging
import traceback
logger = logging.getLogger('warcserver')
#=============================================================================
@ -123,6 +128,8 @@ class ResourceHandler(IndexHandler):
return out_headers, resp, errs
except (WbException, ArchiveLoadFailed) as e:
last_exc = e
if logger.isEnabledFor(logging.DEBUG):
traceback.print_exc()
errs[str(loader)] = str(e)
if last_exc:

View File

@ -370,12 +370,11 @@ class BaseRedisMultiKeyIndexSource(BaseAggregator, RedisIndexSource):
redis_key_pattern = res_template(self.redis_key_template, params)
if '*' not in redis_key_pattern:
keys = [redis_key_pattern.encode('utf-8')]
keys = [redis_key_pattern]
else:
keys = self.scan_keys(redis_key_pattern, params)
for key in keys:
key = key.decode('utf-8')
res = self._get_source_for_key(key)
if res:
yield key, res

View File

@ -282,7 +282,7 @@ class RedisIndexSource(BaseIndexSource):
redis_key_template = key_prefix
if not redis_:
redis_ = redis.StrictRedis.from_url(redis_url)
redis_ = redis.StrictRedis.from_url(redis_url, decode_responses=True)
return redis_, key_prefix
def scan_keys(self, match_templ, params, member_key=None):
@ -301,18 +301,18 @@ class RedisIndexSource(BaseIndexSource):
keys = self._load_key_set(key)
params[scan_key] = keys
match_templ = match_templ.encode('utf-8')
#match_templ = match_templ.encode('utf-8')
return [match_templ.replace(b'*', key) for key in keys]
return [match_templ.replace('*', key) for key in keys]
def _load_key_set(self, key):
if not self.member_key_type:
self.member_key_type = self.redis.type(key)
if self.member_key_type == b'set':
if self.member_key_type == 'set':
return self.redis.smembers(key)
elif self.member_key_type == b'hash':
elif self.member_key_type == 'hash':
return self.redis.hvals(key)
# don't cache if any other type
@ -332,6 +332,8 @@ class RedisIndexSource(BaseIndexSource):
def do_load(index_list):
for line in index_list:
if isinstance(line, str):
line = line.encode('utf-8')
yield CDXObject(line)
return do_load(index_list)

View File

@ -82,15 +82,13 @@ class RedisResolver(RedisIndexSource):
if '*' in redis_key:
for key in self.scan_keys(redis_key, params):
#key = key.decode('utf-8')
res = self.redis.hget(key, filename)
if res:
break
else:
res = self.redis.hget(redis_key, filename)
if res and six.PY3:
res = res.decode('utf-8')
res = to_native_str(res, 'utf-8')
return res

View File

@ -110,7 +110,7 @@ class TestPathIndex(object):
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
assert resolver('example-2.warc.gz', cdx) == 'some_path/example-2.warc.gz'
assert resolver.member_key_type == b'set'
assert resolver.member_key_type == 'set'
@patch('redis.StrictRedis', FakeStrictRedis)
def test_redis_resolver_multi_key_with_member_hash(self):
@ -135,7 +135,7 @@ class TestPathIndex(object):
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
assert resolver('example-2.warc.gz', cdx) == 'some_path/example-2.warc.gz'
assert resolver.member_key_type == b'hash'
assert resolver.member_key_type == 'hash'
def test_make_best_resolver_http(self):
res = DefaultResolverMixin.make_best_resolver('http://myhost.example.com/warcs/')