mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
when given a redis path of redis://<host>/<db>/<key>, use <key> as a
sorted cdx file with zrangebylex! modified tests but need zrangebylex() support in fakeredis to finish
This commit is contained in:
parent
e4262502b0
commit
2b8bea616e
@ -94,22 +94,43 @@ class RedisCDXSource(CDXSource):
|
||||
|
||||
def __init__(self, redis_url, config=None):
|
||||
import redis
|
||||
|
||||
parts = redis_url.split('/')
|
||||
if len(parts) > 4:
|
||||
self.cdx_key = parts[4]
|
||||
else:
|
||||
self.cdx_key = None
|
||||
|
||||
self.redis_url = redis_url
|
||||
self.redis = redis.StrictRedis.from_url(redis_url)
|
||||
|
||||
self.key_prefix = self.DEFAULT_KEY_PREFIX
|
||||
if config:
|
||||
self.key_prefix = config.get('redis_key_prefix', self.key_prefix)
|
||||
|
||||
def load_cdx(self, query):
|
||||
"""
|
||||
Load cdx from redis cache, from an ordered list
|
||||
|
||||
Currently, there is no support for range queries
|
||||
Only 'exact' matchType is supported
|
||||
"""
|
||||
key = query.key
|
||||
If cdx_key is set, treat it as cdx file and load use
|
||||
zrangebylex! (Supports all match types!)
|
||||
|
||||
Otherwise, assume a key per-url and load all entries for that key.
|
||||
(Only exact match supported)
|
||||
"""
|
||||
|
||||
if self.cdx_key:
|
||||
return self.load_sorted_range(query)
|
||||
else:
|
||||
return self.load_single_key(query.key)
|
||||
|
||||
def load_sorted_range(self, query):
|
||||
cdx_list = self.redis.zrangebylex(self.cdx_key,
|
||||
'[' + query.key,
|
||||
'(' + query.end_key)
|
||||
|
||||
return cdx_list
|
||||
|
||||
|
||||
def load_single_key(self, key):
|
||||
# ensure only url/surt is part of key
|
||||
key = key.split(' ')[0]
|
||||
cdx_list = self.redis.zrange(self.key_prefix + key, 0, -1)
|
||||
|
@ -1,9 +1,12 @@
|
||||
"""
|
||||
>>> redis_cdx('http://example.com')
|
||||
>>> redis_cdx(redis_cdx_server, 'http://example.com')
|
||||
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 example-url-agnostic-revisit.warc.gz
|
||||
com,example)/ 20140127171200 http://example.com text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1046 334 dupes.warc.gz
|
||||
com,example)/ 20140127171251 http://example.com warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 553 11875 dupes.warc.gz
|
||||
|
||||
|
||||
>>> redis_cdx(redis_cdx_server_key, 'http://example.com')
|
||||
|
||||
"""
|
||||
|
||||
from fakeredis import FakeStrictRedis
|
||||
@ -21,13 +24,17 @@ import os
|
||||
test_cdx_dir = get_test_dir() + 'cdx/'
|
||||
|
||||
|
||||
def load_cdx_into_redis(source, filename):
|
||||
def load_cdx_into_redis(source, filename, key=None):
|
||||
# load a cdx into mock redis
|
||||
with open(test_cdx_dir + filename) as fh:
|
||||
for line in fh:
|
||||
zadd_cdx(source, line)
|
||||
zadd_cdx(source, line, key)
|
||||
|
||||
def zadd_cdx(source, cdx, key):
|
||||
if key:
|
||||
source.redis.zadd(key, 0, cdx)
|
||||
return
|
||||
|
||||
def zadd_cdx(source, cdx):
|
||||
parts = cdx.split(' ', 2)
|
||||
|
||||
key = parts[0]
|
||||
@ -49,9 +56,22 @@ def init_redis_server():
|
||||
|
||||
return CDXServer([source])
|
||||
|
||||
def redis_cdx(url, **params):
|
||||
@patch('redis.StrictRedis', FakeStrictRedis)
|
||||
def init_redis_server_key_file():
|
||||
source = RedisCDXSource('redis://127.0.0.1:6379/0/key')
|
||||
|
||||
for f in os.listdir(test_cdx_dir):
|
||||
if f.endswith('.cdx'):
|
||||
load_cdx_into_redis(source, f, source.cdx_key)
|
||||
|
||||
return CDXServer([source])
|
||||
|
||||
|
||||
def redis_cdx(cdx_server, url, **params):
|
||||
cdx_iter = cdx_server.load_cdx(url=url, **params)
|
||||
for cdx in cdx_iter:
|
||||
sys.stdout.write(cdx)
|
||||
|
||||
cdx_server = init_redis_server()
|
||||
redis_cdx_server = init_redis_server()
|
||||
redis_cdx_server_key = init_redis_server_key_file()
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user