diff --git a/pywb/cdx/cdxobject.py b/pywb/cdx/cdxobject.py index f933d77a..e9174a37 100644 --- a/pywb/cdx/cdxobject.py +++ b/pywb/cdx/cdxobject.py @@ -123,11 +123,11 @@ class CDXObject(OrderedDict): for n, v in six.iteritems(json_fields): n = self.CDX_ALT_FIELDS.get(n, n) - try: - v.encode('ascii') - except UnicodeEncodeError: - parts = v.encode('utf-8').split(b'//', 1) - v = parts[0].decode('utf-8') + '//' + quote(parts[1]) + if n == 'url': + try: + v.encode('ascii') + except UnicodeEncodeError: + v = quote(v, safe=':/') self[n] = v diff --git a/pywb/warc/pathresolvers.py b/pywb/warc/pathresolvers.py index ea9d2119..cc6510b4 100644 --- a/pywb/warc/pathresolvers.py +++ b/pywb/warc/pathresolvers.py @@ -51,7 +51,7 @@ class RedisResolver(object): def __call__(self, filename, cdx=None): redis_val = self.redis.hget(self.key_prefix + filename, 'path') - return [to_native_str(redis_val)] if redis_val else [] + return [to_native_str(redis_val, 'utf-8')] if redis_val else [] def __repr__(self): return "RedisResolver('{0}')".format(self.redis_url) @@ -69,7 +69,7 @@ class PathIndexResolver(object): for pathline in result: paths = pathline.split(b'\t')[1:] for path in paths: - yield to_native_str(path) + yield to_native_str(path, 'utf-8') def __repr__(self): # pragma: no cover return "PathIndexResolver('{0}')".format(self.pathindex_file)