diff --git a/pywb/cdx/cdxserver.py b/pywb/cdx/cdxserver.py index e6825956..e37c4d82 100644 --- a/pywb/cdx/cdxserver.py +++ b/pywb/cdx/cdxserver.py @@ -1,7 +1,7 @@ from canonicalize import UrlCanonicalizer, calc_search_range from cdxops import cdx_load -from cdxsource import CDXSource, CDXFile, RemoteCDXSource +from cdxsource import CDXSource, CDXFile, RemoteCDXSource, RedisCDXSource from zipnum import ZipNumCluster from cdxobject import CDXObject, CaptureNotFoundException, CDXException from cdxdomainspecific import load_domain_specific_cdx_rules @@ -206,6 +206,9 @@ def create_cdx_source(filename, config): if is_http(filename): return RemoteCDXSource(filename) + if filename.startswith('redis://'): + return RedisCDXSource(filename, config) + if filename.endswith('.cdx'): return CDXFile(filename) @@ -213,9 +216,6 @@ def create_cdx_source(filename, config): return ZipNumCluster(filename, config) return None - #TODO: support zipnum - #elif filename.startswith('redis://') - # return RedisCDXSource(filename) #================================================================= diff --git a/pywb/cdx/cdxsource.py b/pywb/cdx/cdxsource.py index 39285cf8..26b1b4cb 100644 --- a/pywb/cdx/cdxsource.py +++ b/pywb/cdx/cdxsource.py @@ -3,6 +3,7 @@ from pywb.utils.loaders import SeekableTextFileReader import urllib import urllib2 +import redis #================================================================= @@ -80,3 +81,33 @@ class RemoteCDXSource(CDXSource): def __str__(self): return 'Remote CDX Server: ' + self.remote_url + + +#================================================================= +class RedisCDXSource(CDXSource): + DEFAULT_KEY_PREFIX = 'c:' + + def __init__(self, redis_url, config=None): + self.redis = redis.StrictRedis.from_url(redis_url) + + key_prefix = self.DEFAULT_KEY_PREFIX + if config: + self.key_prefix = config.get('redis_key_prefix', key_prefix) + + def load_cdx(self, params): + """ + Load cdx from redis cache, from an ordered list + + Currently, there is no support for range queries + Only 'exact' matchType is supported + """ + key = params['key'] + + # ensure only url/surt is part of key + key = key.split(' ')[0] + cdx_list = self.redis.zrange(self.key_prefix + key, 0, -1) + + # key is not part of list, so prepend to each line + key += ' ' + cdx_list = itertools.imap(lambda x: key + x, cdx_list) + return cdx_list