mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
cdx: add prototype support for redis cdx source (need testing)
This commit is contained in:
parent
9194e867ea
commit
ef062fee7b
@ -1,7 +1,7 @@
|
|||||||
from canonicalize import UrlCanonicalizer, calc_search_range
|
from canonicalize import UrlCanonicalizer, calc_search_range
|
||||||
|
|
||||||
from cdxops import cdx_load
|
from cdxops import cdx_load
|
||||||
from cdxsource import CDXSource, CDXFile, RemoteCDXSource
|
from cdxsource import CDXSource, CDXFile, RemoteCDXSource, RedisCDXSource
|
||||||
from zipnum import ZipNumCluster
|
from zipnum import ZipNumCluster
|
||||||
from cdxobject import CDXObject, CaptureNotFoundException, CDXException
|
from cdxobject import CDXObject, CaptureNotFoundException, CDXException
|
||||||
from cdxdomainspecific import load_domain_specific_cdx_rules
|
from cdxdomainspecific import load_domain_specific_cdx_rules
|
||||||
@ -206,6 +206,9 @@ def create_cdx_source(filename, config):
|
|||||||
if is_http(filename):
|
if is_http(filename):
|
||||||
return RemoteCDXSource(filename)
|
return RemoteCDXSource(filename)
|
||||||
|
|
||||||
|
if filename.startswith('redis://'):
|
||||||
|
return RedisCDXSource(filename, config)
|
||||||
|
|
||||||
if filename.endswith('.cdx'):
|
if filename.endswith('.cdx'):
|
||||||
return CDXFile(filename)
|
return CDXFile(filename)
|
||||||
|
|
||||||
@ -213,9 +216,6 @@ def create_cdx_source(filename, config):
|
|||||||
return ZipNumCluster(filename, config)
|
return ZipNumCluster(filename, config)
|
||||||
|
|
||||||
return None
|
return None
|
||||||
#TODO: support zipnum
|
|
||||||
#elif filename.startswith('redis://')
|
|
||||||
# return RedisCDXSource(filename)
|
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
@ -3,6 +3,7 @@ from pywb.utils.loaders import SeekableTextFileReader
|
|||||||
|
|
||||||
import urllib
|
import urllib
|
||||||
import urllib2
|
import urllib2
|
||||||
|
import redis
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
@ -80,3 +81,33 @@ class RemoteCDXSource(CDXSource):
|
|||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return 'Remote CDX Server: ' + self.remote_url
|
return 'Remote CDX Server: ' + self.remote_url
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class RedisCDXSource(CDXSource):
|
||||||
|
DEFAULT_KEY_PREFIX = 'c:'
|
||||||
|
|
||||||
|
def __init__(self, redis_url, config=None):
|
||||||
|
self.redis = redis.StrictRedis.from_url(redis_url)
|
||||||
|
|
||||||
|
key_prefix = self.DEFAULT_KEY_PREFIX
|
||||||
|
if config:
|
||||||
|
self.key_prefix = config.get('redis_key_prefix', key_prefix)
|
||||||
|
|
||||||
|
def load_cdx(self, params):
|
||||||
|
"""
|
||||||
|
Load cdx from redis cache, from an ordered list
|
||||||
|
|
||||||
|
Currently, there is no support for range queries
|
||||||
|
Only 'exact' matchType is supported
|
||||||
|
"""
|
||||||
|
key = params['key']
|
||||||
|
|
||||||
|
# ensure only url/surt is part of key
|
||||||
|
key = key.split(' ')[0]
|
||||||
|
cdx_list = self.redis.zrange(self.key_prefix + key, 0, -1)
|
||||||
|
|
||||||
|
# key is not part of list, so prepend to each line
|
||||||
|
key += ' '
|
||||||
|
cdx_list = itertools.imap(lambda x: key + x, cdx_list)
|
||||||
|
return cdx_list
|
||||||
|
Loading…
x
Reference in New Issue
Block a user