mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Decoding and Recorder Fixes (#313)
* redisindex: use decode_resposes=True for redisindex * recorder: close_file(): return true if closed, close_key() return filename if closed * logging: if debug=True, log warc load failures * appveyor build fix: add pypiwin32 as dependency for windows build
This commit is contained in:
parent
a9cbdc1bd6
commit
9da5bd1083
@ -17,6 +17,7 @@ install:
|
|||||||
- "pip install coverage pytest-cov"
|
- "pip install coverage pytest-cov"
|
||||||
- "pip install cffi"
|
- "pip install cffi"
|
||||||
- "pip install pyopenssl"
|
- "pip install pyopenssl"
|
||||||
|
- "pip install pypiwin32"
|
||||||
- "pip install certauth boto3 youtube-dl pysocks"
|
- "pip install certauth boto3 youtube-dl pysocks"
|
||||||
- "pip install codecov"
|
- "pip install codecov"
|
||||||
|
|
||||||
|
@ -100,8 +100,10 @@ class MultiFileWARCWriter(BaseWARCWriter):
|
|||||||
if os.name != 'nt':
|
if os.name != 'nt':
|
||||||
portalocker.lock(fh, portalocker.LOCK_UN)
|
portalocker.lock(fh, portalocker.LOCK_UN)
|
||||||
fh.close()
|
fh.close()
|
||||||
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
|
return False
|
||||||
|
|
||||||
def get_dir_key(self, params):
|
def get_dir_key(self, params):
|
||||||
return res_template(self.key_template, params)
|
return res_template(self.key_template, params)
|
||||||
@ -115,8 +117,8 @@ class MultiFileWARCWriter(BaseWARCWriter):
|
|||||||
return
|
return
|
||||||
|
|
||||||
out, filename = result
|
out, filename = result
|
||||||
self._close_file(out)
|
if self._close_file(out):
|
||||||
return filename
|
return filename
|
||||||
|
|
||||||
def close_file(self, match_filename):
|
def close_file(self, match_filename):
|
||||||
for dir_key, out, filename in self.iter_open_files():
|
for dir_key, out, filename in self.iter_open_files():
|
||||||
|
@ -8,6 +8,11 @@ from pywb.warcserver.index.fuzzymatcher import FuzzyMatcher
|
|||||||
from pywb.warcserver.resource.responseloader import WARCPathLoader, LiveWebLoader, VideoLoader
|
from pywb.warcserver.resource.responseloader import WARCPathLoader, LiveWebLoader, VideoLoader
|
||||||
|
|
||||||
import six
|
import six
|
||||||
|
import logging
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger('warcserver')
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
@ -123,6 +128,8 @@ class ResourceHandler(IndexHandler):
|
|||||||
return out_headers, resp, errs
|
return out_headers, resp, errs
|
||||||
except (WbException, ArchiveLoadFailed) as e:
|
except (WbException, ArchiveLoadFailed) as e:
|
||||||
last_exc = e
|
last_exc = e
|
||||||
|
if logger.isEnabledFor(logging.DEBUG):
|
||||||
|
traceback.print_exc()
|
||||||
errs[str(loader)] = str(e)
|
errs[str(loader)] = str(e)
|
||||||
|
|
||||||
if last_exc:
|
if last_exc:
|
||||||
|
@ -370,12 +370,11 @@ class BaseRedisMultiKeyIndexSource(BaseAggregator, RedisIndexSource):
|
|||||||
redis_key_pattern = res_template(self.redis_key_template, params)
|
redis_key_pattern = res_template(self.redis_key_template, params)
|
||||||
|
|
||||||
if '*' not in redis_key_pattern:
|
if '*' not in redis_key_pattern:
|
||||||
keys = [redis_key_pattern.encode('utf-8')]
|
keys = [redis_key_pattern]
|
||||||
else:
|
else:
|
||||||
keys = self.scan_keys(redis_key_pattern, params)
|
keys = self.scan_keys(redis_key_pattern, params)
|
||||||
|
|
||||||
for key in keys:
|
for key in keys:
|
||||||
key = key.decode('utf-8')
|
|
||||||
res = self._get_source_for_key(key)
|
res = self._get_source_for_key(key)
|
||||||
if res:
|
if res:
|
||||||
yield key, res
|
yield key, res
|
||||||
|
@ -282,7 +282,7 @@ class RedisIndexSource(BaseIndexSource):
|
|||||||
|
|
||||||
redis_key_template = key_prefix
|
redis_key_template = key_prefix
|
||||||
if not redis_:
|
if not redis_:
|
||||||
redis_ = redis.StrictRedis.from_url(redis_url)
|
redis_ = redis.StrictRedis.from_url(redis_url, decode_responses=True)
|
||||||
return redis_, key_prefix
|
return redis_, key_prefix
|
||||||
|
|
||||||
def scan_keys(self, match_templ, params, member_key=None):
|
def scan_keys(self, match_templ, params, member_key=None):
|
||||||
@ -301,18 +301,18 @@ class RedisIndexSource(BaseIndexSource):
|
|||||||
keys = self._load_key_set(key)
|
keys = self._load_key_set(key)
|
||||||
params[scan_key] = keys
|
params[scan_key] = keys
|
||||||
|
|
||||||
match_templ = match_templ.encode('utf-8')
|
#match_templ = match_templ.encode('utf-8')
|
||||||
|
|
||||||
return [match_templ.replace(b'*', key) for key in keys]
|
return [match_templ.replace('*', key) for key in keys]
|
||||||
|
|
||||||
def _load_key_set(self, key):
|
def _load_key_set(self, key):
|
||||||
if not self.member_key_type:
|
if not self.member_key_type:
|
||||||
self.member_key_type = self.redis.type(key)
|
self.member_key_type = self.redis.type(key)
|
||||||
|
|
||||||
if self.member_key_type == b'set':
|
if self.member_key_type == 'set':
|
||||||
return self.redis.smembers(key)
|
return self.redis.smembers(key)
|
||||||
|
|
||||||
elif self.member_key_type == b'hash':
|
elif self.member_key_type == 'hash':
|
||||||
return self.redis.hvals(key)
|
return self.redis.hvals(key)
|
||||||
|
|
||||||
# don't cache if any other type
|
# don't cache if any other type
|
||||||
@ -332,6 +332,8 @@ class RedisIndexSource(BaseIndexSource):
|
|||||||
|
|
||||||
def do_load(index_list):
|
def do_load(index_list):
|
||||||
for line in index_list:
|
for line in index_list:
|
||||||
|
if isinstance(line, str):
|
||||||
|
line = line.encode('utf-8')
|
||||||
yield CDXObject(line)
|
yield CDXObject(line)
|
||||||
|
|
||||||
return do_load(index_list)
|
return do_load(index_list)
|
||||||
|
@ -82,15 +82,13 @@ class RedisResolver(RedisIndexSource):
|
|||||||
|
|
||||||
if '*' in redis_key:
|
if '*' in redis_key:
|
||||||
for key in self.scan_keys(redis_key, params):
|
for key in self.scan_keys(redis_key, params):
|
||||||
#key = key.decode('utf-8')
|
|
||||||
res = self.redis.hget(key, filename)
|
res = self.redis.hget(key, filename)
|
||||||
if res:
|
if res:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
res = self.redis.hget(redis_key, filename)
|
res = self.redis.hget(redis_key, filename)
|
||||||
|
|
||||||
if res and six.PY3:
|
res = to_native_str(res, 'utf-8')
|
||||||
res = res.decode('utf-8')
|
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
@ -110,7 +110,7 @@ class TestPathIndex(object):
|
|||||||
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
|
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
|
||||||
assert resolver('example-2.warc.gz', cdx) == 'some_path/example-2.warc.gz'
|
assert resolver('example-2.warc.gz', cdx) == 'some_path/example-2.warc.gz'
|
||||||
|
|
||||||
assert resolver.member_key_type == b'set'
|
assert resolver.member_key_type == 'set'
|
||||||
|
|
||||||
@patch('redis.StrictRedis', FakeStrictRedis)
|
@patch('redis.StrictRedis', FakeStrictRedis)
|
||||||
def test_redis_resolver_multi_key_with_member_hash(self):
|
def test_redis_resolver_multi_key_with_member_hash(self):
|
||||||
@ -135,7 +135,7 @@ class TestPathIndex(object):
|
|||||||
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
|
assert resolver('example.warc.gz', cdx) == 'some_path/example.warc.gz'
|
||||||
assert resolver('example-2.warc.gz', cdx) == 'some_path/example-2.warc.gz'
|
assert resolver('example-2.warc.gz', cdx) == 'some_path/example-2.warc.gz'
|
||||||
|
|
||||||
assert resolver.member_key_type == b'hash'
|
assert resolver.member_key_type == 'hash'
|
||||||
|
|
||||||
def test_make_best_resolver_http(self):
|
def test_make_best_resolver_http(self):
|
||||||
res = DefaultResolverMixin.make_best_resolver('http://myhost.example.com/warcs/')
|
res = DefaultResolverMixin.make_best_resolver('http://myhost.example.com/warcs/')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user