mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
cdx: refactor to create seperate CDXQuery object for wrapping
params passed to load_cdx()
This commit is contained in:
parent
af9cabdc72
commit
355fa32600
@ -7,6 +7,7 @@ from pywb.utils.dsrules import BaseRule, RuleSet
|
||||
|
||||
from pywb.utils.canonicalize import unsurt, UrlCanonicalizer
|
||||
|
||||
from cdxobject import CDXQuery
|
||||
|
||||
#=================================================================
|
||||
def load_domain_specific_cdx_rules(ds_rules_file, surt_ordered):
|
||||
@ -70,13 +71,13 @@ class FuzzyQuery:
|
||||
def __init__(self, rules):
|
||||
self.rules = rules
|
||||
|
||||
def __call__(self, params):
|
||||
def __call__(self, query):
|
||||
matched_rule = None
|
||||
|
||||
urlkey = params['key']
|
||||
url = params['url']
|
||||
filter_ = params.get('filter', [])
|
||||
output = params.get('output')
|
||||
urlkey = query.key
|
||||
url = query.url
|
||||
filter_ = query.filters
|
||||
output = query.output
|
||||
|
||||
for rule in self.rules.iter_matching(urlkey):
|
||||
m = rule.regex.search(urlkey)
|
||||
@ -102,7 +103,7 @@ class FuzzyQuery:
|
||||
'filter': filter_,
|
||||
'output': output}
|
||||
|
||||
return params
|
||||
return CDXQuery(**params)
|
||||
|
||||
|
||||
#=================================================================
|
||||
|
@ -1,6 +1,9 @@
|
||||
from collections import OrderedDict
|
||||
import itertools
|
||||
|
||||
from urllib import urlencode
|
||||
from urlparse import parse_qs
|
||||
|
||||
|
||||
#=================================================================
|
||||
class CDXException(Exception):
|
||||
@ -20,6 +23,126 @@ class AccessException(CDXException):
|
||||
return '403 Access Denied'
|
||||
|
||||
|
||||
#=================================================================
|
||||
class CDXQuery(object):
|
||||
def __init__(self, **kwargs):
|
||||
self.params = kwargs
|
||||
|
||||
@property
|
||||
def key(self):
|
||||
return self.params['key']
|
||||
|
||||
@property
|
||||
def end_key(self):
|
||||
return self.params['end_key']
|
||||
|
||||
def set_key(self, key, end_key):
|
||||
self.params['key'] = key
|
||||
self.params['end_key'] = end_key
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
try:
|
||||
return self.params['url']
|
||||
except KeyError:
|
||||
msg = 'A url= param must be specified to query the cdx server'
|
||||
raise CDXException(msg)
|
||||
|
||||
@property
|
||||
def match_type(self):
|
||||
return self.params.get('matchType', 'exact')
|
||||
|
||||
@property
|
||||
def is_exact(self):
|
||||
return self.match_type == 'exact'
|
||||
|
||||
@property
|
||||
def allow_fuzzy(self):
|
||||
return self._get_bool('allowFuzzy')
|
||||
|
||||
@property
|
||||
def output(self):
|
||||
return self.params.get('output', 'text')
|
||||
|
||||
@property
|
||||
def limit(self):
|
||||
return int(self.params.get('limit', 100000))
|
||||
|
||||
@property
|
||||
def collapse_time(self):
|
||||
return self.params.get('collapseTime')
|
||||
|
||||
@property
|
||||
def resolve_revisits(self):
|
||||
return self._get_bool('resolveRevisits')
|
||||
|
||||
@property
|
||||
def filters(self):
|
||||
return self.params.get('filter', [])
|
||||
|
||||
@property
|
||||
def fields(self):
|
||||
v = self.params.get('fields')
|
||||
return v.split(',') if v else None
|
||||
|
||||
@property
|
||||
def closest(self):
|
||||
# sort=closest is not required
|
||||
return self.params.get('closest')
|
||||
|
||||
@property
|
||||
def reverse(self):
|
||||
# sort=reverse overrides reverse=0
|
||||
return (self._get_bool('reverse') or
|
||||
self.params.get('sort') == 'reverse')
|
||||
|
||||
@property
|
||||
def secondary_index_only(self):
|
||||
return self._get_bool('showPagedIndex')
|
||||
|
||||
@property
|
||||
def process(self):
|
||||
return self._get_bool('processOps', True)
|
||||
|
||||
def set_process(self, process):
|
||||
self.params['processOps'] = process
|
||||
|
||||
def _get_bool(self, name, def_val=False):
|
||||
v = self.params.get(name)
|
||||
if v:
|
||||
try:
|
||||
v = int(v)
|
||||
except ValueError as ex:
|
||||
v = (v.lower() == 'true')
|
||||
else:
|
||||
v = def_val
|
||||
|
||||
return bool(v)
|
||||
|
||||
def urlencode(self):
|
||||
return urlencode(self.params, True)
|
||||
|
||||
@staticmethod
|
||||
def from_wsgi_env(env):
|
||||
""" utility function to extract params and create a CDXQuery
|
||||
from a WSGI environment dictionary
|
||||
"""
|
||||
params = parse_qs(env['QUERY_STRING'])
|
||||
|
||||
if not 'output' in params:
|
||||
params['output'] = 'text'
|
||||
|
||||
# parse_qs produces arrays for single values
|
||||
# cdx processing expects singleton params for all params,
|
||||
# except filters, so convert here
|
||||
# use first value of the list
|
||||
for name, val in params.iteritems():
|
||||
if name != 'filter':
|
||||
params[name] = val[0]
|
||||
|
||||
return CDXQuery(**params)
|
||||
|
||||
|
||||
#=================================================================
|
||||
class CDXObject(OrderedDict):
|
||||
CDX_FORMATS = [
|
||||
|
@ -1,4 +1,4 @@
|
||||
from cdxobject import CDXObject, IDXObject, AccessException
|
||||
from cdxobject import CDXObject, IDXObject, AccessException, CDXQuery
|
||||
from pywb.utils.timeutils import timestamp_to_sec
|
||||
|
||||
import bisect
|
||||
@ -10,7 +10,7 @@ from collections import deque
|
||||
|
||||
|
||||
#=================================================================
|
||||
def cdx_load(sources, params, perms_checker=None, filter=True):
|
||||
def cdx_load(sources, query, perms_checker=None, process=True):
|
||||
"""
|
||||
merge text CDX lines from sources, return an iterator for
|
||||
filtered and access-checked sequence of CDX objects.
|
||||
@ -19,25 +19,30 @@ def cdx_load(sources, params, perms_checker=None, filter=True):
|
||||
:param perms_checker: access check filter object implementing
|
||||
allow_url_lookup(key, url), allow_capture(cdxobj) and
|
||||
filter_fields(cdxobj) methods.
|
||||
:param process: bool, perform processing sorting/filtering/grouping ops
|
||||
"""
|
||||
cdx_iter = load_cdx_streams(sources, params)
|
||||
cdx_iter = make_obj_iter(cdx_iter, params)
|
||||
cdx_iter = filter_cdx(cdx_iter, params)
|
||||
cdx_iter = load_cdx_streams(sources, query)
|
||||
cdx_iter = make_obj_iter(cdx_iter, query)
|
||||
|
||||
if process and query.process:
|
||||
cdx_iter = process_cdx(cdx_iter, query)
|
||||
|
||||
if perms_checker:
|
||||
cdx_iter = restrict_cdx(cdx_iter, params, perms_checker)
|
||||
cdx_iter = restrict_cdx(cdx_iter, query, perms_checker)
|
||||
|
||||
return cdx_iter
|
||||
|
||||
#=================================================================
|
||||
def restrict_cdx(cdx_iter, params, perms_checker):
|
||||
def restrict_cdx(cdx_iter, query, perms_checker):
|
||||
"""
|
||||
filter out those cdx records that user doesn't have access to,
|
||||
by consulting :param perms_checker:.
|
||||
:param cdx_iter: cdx record source iterable
|
||||
:param params: request parameters (dict)
|
||||
:param query: request parameters (CDXQuery)
|
||||
:param perms_checker: object implementing permission checker
|
||||
"""
|
||||
if not perms_checker.allow_url_lookup(params['key'], params['url']):
|
||||
if params.get('matchType', 'exact') == 'exact':
|
||||
if not perms_checker.allow_url_lookup(query.key, query.url):
|
||||
if query.is_exact:
|
||||
raise AccessException('Excluded')
|
||||
|
||||
for cdx in cdx_iter:
|
||||
@ -51,31 +56,26 @@ def restrict_cdx(cdx_iter, params, perms_checker):
|
||||
yield cdx
|
||||
|
||||
#=================================================================
|
||||
def filter_cdx(cdx_iter, params):
|
||||
if params.get('proxyAll'):
|
||||
return cdx_iter
|
||||
|
||||
resolve_revisits = params.get('resolveRevisits', False)
|
||||
if resolve_revisits:
|
||||
def process_cdx(cdx_iter, query):
|
||||
if query.resolve_revisits:
|
||||
cdx_iter = cdx_resolve_revisits(cdx_iter)
|
||||
|
||||
filters = params.get('filter', None)
|
||||
filters = query.filters
|
||||
if filters:
|
||||
cdx_iter = cdx_filter(cdx_iter, filters)
|
||||
|
||||
collapse_time = params.get('collapseTime', None)
|
||||
collapse_time = query.collapse_time
|
||||
if collapse_time:
|
||||
cdx_iter = cdx_collapse_time_status(cdx_iter, collapse_time)
|
||||
|
||||
limit = int(params.get('limit', 1000000))
|
||||
limit = query.limit
|
||||
|
||||
reverse = params.get('reverse', False) or params.get('sort') == 'reverse'
|
||||
if reverse:
|
||||
if query.reverse:
|
||||
cdx_iter = cdx_reverse(cdx_iter, limit)
|
||||
|
||||
closest_to = params.get('closest', None)
|
||||
if closest_to:
|
||||
cdx_iter = cdx_sort_closest(closest_to, cdx_iter, limit)
|
||||
closest = query.closest
|
||||
if closest:
|
||||
cdx_iter = cdx_sort_closest(closest, cdx_iter, limit)
|
||||
|
||||
if limit:
|
||||
cdx_iter = cdx_limit(cdx_iter, limit)
|
||||
@ -85,21 +85,21 @@ def filter_cdx(cdx_iter, params):
|
||||
|
||||
#=================================================================
|
||||
# load and source merge cdx streams
|
||||
def load_cdx_streams(sources, params):
|
||||
def load_cdx_streams(sources, query):
|
||||
# Optimize: no need to merge if just one input
|
||||
if len(sources) == 1:
|
||||
return sources[0].load_cdx(params)
|
||||
return sources[0].load_cdx(query)
|
||||
|
||||
source_iters = map(lambda src: src.load_cdx(params), sources)
|
||||
source_iters = map(lambda src: src.load_cdx(query), sources)
|
||||
merged_stream = merge(*(source_iters))
|
||||
return merged_stream
|
||||
|
||||
|
||||
#=================================================================
|
||||
# convert text cdx stream to CDXObject/IDXObject
|
||||
def make_obj_iter(text_iter, params):
|
||||
def make_obj_iter(text_iter, query):
|
||||
# already converted
|
||||
if params.get('showPagedIndex'):
|
||||
if query.secondary_index_only:
|
||||
cls = IDXObject
|
||||
else:
|
||||
cls = CDXObject
|
||||
|
@ -3,7 +3,7 @@ from pywb.utils.canonicalize import UrlCanonicalizer, calc_search_range
|
||||
from cdxops import cdx_load
|
||||
from cdxsource import CDXSource, CDXFile, RemoteCDXSource, RedisCDXSource
|
||||
from zipnum import ZipNumCluster
|
||||
from cdxobject import CDXObject, CaptureNotFoundException, CDXException
|
||||
from cdxobject import CDXObject, CaptureNotFoundException, CDXException, CDXQuery
|
||||
from cdxdomainspecific import load_domain_specific_cdx_rules
|
||||
|
||||
from pywb.utils.loaders import is_http
|
||||
@ -36,7 +36,7 @@ class BaseCDXServer(object):
|
||||
# set perms checker, if any
|
||||
self.perms_checker = kwargs.get('perms_checker')
|
||||
|
||||
def _check_cdx_iter(self, cdx_iter, params):
|
||||
def _check_cdx_iter(self, cdx_iter, query):
|
||||
""" Check cdx iter semantics
|
||||
If iter is empty (no matches), check if fuzzy matching
|
||||
is allowed, and try it -- otherwise,
|
||||
@ -48,21 +48,23 @@ class BaseCDXServer(object):
|
||||
if cdx_iter:
|
||||
return cdx_iter
|
||||
|
||||
url = params['url']
|
||||
|
||||
# check if fuzzy is allowed and ensure that its an
|
||||
# exact match
|
||||
if (self.fuzzy_query and params.get('allowFuzzy') and
|
||||
params.get('matchType', 'exact') == 'exact'):
|
||||
if (self.fuzzy_query and
|
||||
query.allow_fuzzy and
|
||||
query.is_exact):
|
||||
|
||||
fuzzy_params = self.fuzzy_query(params)
|
||||
if fuzzy_params:
|
||||
return self.load_cdx(**fuzzy_params)
|
||||
fuzzy_query_params = self.fuzzy_query(query)
|
||||
if fuzzy_query_params:
|
||||
return self.load_cdx_query(fuzzy_query_params)
|
||||
|
||||
msg = 'No Captures found for: ' + url
|
||||
msg = 'No Captures found for: ' + query.url
|
||||
raise CaptureNotFoundException(msg)
|
||||
|
||||
def load_cdx(self, **params):
|
||||
return self.load_cdx_query(CDXQuery(**params))
|
||||
|
||||
def load_cdx_query(self, query):
|
||||
raise NotImplementedError('Implement in subclass')
|
||||
|
||||
@staticmethod
|
||||
@ -89,26 +91,18 @@ class CDXServer(BaseCDXServer):
|
||||
# config argument.
|
||||
self._create_cdx_sources(paths, kwargs.get('config'))
|
||||
|
||||
def load_cdx(self, **params):
|
||||
# if key not set, assume 'url' is set and needs canonicalization
|
||||
if not params.get('key'):
|
||||
try:
|
||||
url = params['url']
|
||||
except KeyError:
|
||||
msg = 'A url= param must be specified to query the cdx server'
|
||||
raise CDXException(msg)
|
||||
def load_cdx_query(self, query):
|
||||
url = query.url
|
||||
key, end_key = calc_search_range(url=url,
|
||||
match_type=query.match_type,
|
||||
url_canon=self.url_canon)
|
||||
query.set_key(key, end_key)
|
||||
|
||||
match_type = params.get('matchType', 'exact')
|
||||
|
||||
key, end_key = calc_search_range(url=url,
|
||||
match_type=match_type,
|
||||
url_canon=self.url_canon)
|
||||
params['key'] = key
|
||||
params['end_key'] = end_key
|
||||
|
||||
cdx_iter = cdx_load(self.sources, params,
|
||||
cdx_iter = cdx_load(self.sources,
|
||||
query,
|
||||
perms_checker=self.perms_checker)
|
||||
return self._check_cdx_iter(cdx_iter, params)
|
||||
|
||||
return self._check_cdx_iter(cdx_iter, query)
|
||||
|
||||
def _create_cdx_sources(self, paths, config):
|
||||
"""
|
||||
@ -186,9 +180,9 @@ class RemoteCDXServer(BaseCDXServer):
|
||||
else:
|
||||
raise Exception('Invalid remote cdx source: ' + str(source))
|
||||
|
||||
def load_cdx(self, **params):
|
||||
remote_iter = cdx_load((self.sources,), params, filter=False)
|
||||
return self._check_cdx_iter(remote_iter, params)
|
||||
def load_cdx_query(self, query):
|
||||
remote_iter = cdx_load(self.sources, query, process=False)
|
||||
return self._check_cdx_iter(remote_iter, query)
|
||||
|
||||
def __str__(self):
|
||||
return 'Remote CDX server serving from ' + str(self.sources[0])
|
||||
@ -220,23 +214,4 @@ def create_cdx_server(config, ds_rules_file=None):
|
||||
ds_rules_file=ds_rules_file,
|
||||
perms_checker=perms_checker)
|
||||
|
||||
#=================================================================
|
||||
def extract_params_from_wsgi_env(env):
|
||||
""" utility function to extract params from the query
|
||||
string of a WSGI environment dictionary
|
||||
"""
|
||||
# use url= param to get actual url
|
||||
params = urlparse.parse_qs(env['QUERY_STRING'])
|
||||
|
||||
if not 'output' in params:
|
||||
params['output'] = 'text'
|
||||
|
||||
# parse_qs produces arrays for single values
|
||||
# cdx processing expects singleton params for all params,
|
||||
# except filters, so convert here
|
||||
# use first value of the list
|
||||
for name, val in params.iteritems():
|
||||
if name != 'filter':
|
||||
params[name] = val[0]
|
||||
|
||||
return params
|
||||
|
@ -12,7 +12,7 @@ class CDXSource(object):
|
||||
"""
|
||||
Represents any cdx index source
|
||||
"""
|
||||
def load_cdx(self, params):
|
||||
def load_cdx(self, query):
|
||||
raise NotImplementedError('Implement in subclass')
|
||||
|
||||
|
||||
@ -24,9 +24,9 @@ class CDXFile(CDXSource):
|
||||
def __init__(self, filename):
|
||||
self.filename = filename
|
||||
|
||||
def load_cdx(self, params):
|
||||
def load_cdx(self, query):
|
||||
source = SeekableTextFileReader(self.filename)
|
||||
return iter_range(source, params.get('key'), params.get('end_key'))
|
||||
return iter_range(source, query.key, query.end_key)
|
||||
|
||||
def __str__(self):
|
||||
return 'CDX File - ' + self.filename
|
||||
@ -45,20 +45,16 @@ class RemoteCDXSource(CDXSource):
|
||||
self.cookie = cookie
|
||||
self.proxy_all = proxy_all
|
||||
|
||||
def load_cdx(self, proxy_params):
|
||||
def load_cdx(self, query):
|
||||
if self.proxy_all:
|
||||
params = proxy_params
|
||||
params['proxyAll'] = True
|
||||
query.set_process(False)
|
||||
remote_query = query
|
||||
else:
|
||||
# Only send url and matchType params to remote
|
||||
params = {}
|
||||
params['url'] = proxy_params['url']
|
||||
match_type = proxy_params.get('matchType')
|
||||
remote_query = CDXQuery(url=query.url,
|
||||
match_type=query.matchType)
|
||||
|
||||
if match_type:
|
||||
proxy_params['matchType'] = match_type
|
||||
|
||||
urlparams = urllib.urlencode(params, True)
|
||||
urlparams = remote_query.urlencode()
|
||||
|
||||
try:
|
||||
request = urllib2.Request(self.remote_url, urlparams)
|
||||
@ -97,14 +93,14 @@ class RedisCDXSource(CDXSource):
|
||||
self.key_prefix = config.get('redis_key_prefix', self.key_prefix)
|
||||
|
||||
|
||||
def load_cdx(self, params):
|
||||
def load_cdx(self, query):
|
||||
"""
|
||||
Load cdx from redis cache, from an ordered list
|
||||
|
||||
Currently, there is no support for range queries
|
||||
Only 'exact' matchType is supported
|
||||
"""
|
||||
key = params['key']
|
||||
key = query.key
|
||||
|
||||
# ensure only url/surt is part of key
|
||||
key = key.split(' ')[0]
|
||||
|
@ -1,6 +1,7 @@
|
||||
from werkzeug.wrappers import BaseRequest, BaseResponse
|
||||
from werkzeug.wrappers import BaseResponse
|
||||
from cdxserver import create_cdx_server
|
||||
from pywb import get_test_dir
|
||||
from cdxobject import CDXQuery
|
||||
|
||||
import logging
|
||||
import os
|
||||
@ -18,37 +19,10 @@ DEFAULT_PORT = 8080
|
||||
|
||||
#=================================================================
|
||||
|
||||
class CDXQueryRequest(BaseRequest):
|
||||
class CDXQueryRequest(object):
|
||||
def __init__(self, environ):
|
||||
super(CDXQueryRequest, self).__init__(environ)
|
||||
self.query = CDXQuery.from_wsgi_env(environ)
|
||||
|
||||
def _get_bool(self, name):
|
||||
v = self.args.get(name)
|
||||
if v:
|
||||
try:
|
||||
v = int(s)
|
||||
except ValueError as ex:
|
||||
v = (s.lower() == 'true')
|
||||
return bool(v)
|
||||
@property
|
||||
def output(self):
|
||||
return self.args.get('output', 'text')
|
||||
@property
|
||||
def filter(self):
|
||||
return self.args.getlist('filter', [])
|
||||
@property
|
||||
def fields(self):
|
||||
v = self.args.get('fields')
|
||||
return v.split(',') if v else None
|
||||
@property
|
||||
def reverse(self):
|
||||
# sort=reverse overrides reverse=0
|
||||
return (self._get_bool('reverse') or
|
||||
self.args.get('sort') == 'reverse')
|
||||
@property
|
||||
def params(self):
|
||||
return dict(t if t[0] == 'filter' else (t[0], t[1][0])
|
||||
for t in self.args.iterlists())
|
||||
|
||||
class WSGICDXServer(object):
|
||||
def __init__(self, config, rules_file):
|
||||
@ -57,11 +31,11 @@ class WSGICDXServer(object):
|
||||
def __call__(self, environ, start_response):
|
||||
request = CDXQueryRequest(environ)
|
||||
try:
|
||||
logging.debug('request.args=%s', request.params)
|
||||
result = self.cdxserver.load_cdx(**request.params)
|
||||
logging.debug('request.args=%s', request.query)
|
||||
result = self.cdxserver.load_cdx_query(request.query)
|
||||
|
||||
# TODO: select response type by "output" parameter
|
||||
response = PlainTextResponse(result, request.fields)
|
||||
response = PlainTextResponse(result, request.query.fields)
|
||||
return response(environ, start_response)
|
||||
except Exception as exc:
|
||||
logging.error('load_cdx failed', exc_info=1)
|
||||
@ -74,7 +48,7 @@ def cdx_text_out(cdx, fields):
|
||||
if not fields:
|
||||
return str(cdx) + '\n'
|
||||
else:
|
||||
logging.info('cdx fields=%s', cdx.keys())
|
||||
logging.info('cdx fields=%s', cdx.keys)
|
||||
# TODO: this will results in an exception if fields contain
|
||||
# non-existent field name.
|
||||
return ' '.join(cdx[x] for x in fields) + '\n'
|
||||
|
@ -110,21 +110,21 @@ class ZipNumCluster(CDXSource):
|
||||
def lookup_loc(self, part):
|
||||
return self.loc_map[part]
|
||||
|
||||
def load_cdx(self, params):
|
||||
def load_cdx(self, query):
|
||||
self.reload_loc()
|
||||
|
||||
reader = SeekableTextFileReader(self.summary)
|
||||
|
||||
idx_iter = iter_range(reader,
|
||||
params['key'],
|
||||
params['end_key'],
|
||||
query.key,
|
||||
query.end_key,
|
||||
prev_size=1)
|
||||
|
||||
if params.get('showPagedIndex'):
|
||||
params['proxyAll'] = True
|
||||
if query.secondary_index_only:
|
||||
query.set_process(False)
|
||||
return idx_iter
|
||||
else:
|
||||
blocks = self.idx_to_cdx(idx_iter, params)
|
||||
blocks = self.idx_to_cdx(idx_iter, query)
|
||||
|
||||
def gen_cdx():
|
||||
for blk in blocks:
|
||||
@ -133,7 +133,7 @@ class ZipNumCluster(CDXSource):
|
||||
|
||||
return gen_cdx()
|
||||
|
||||
def idx_to_cdx(self, idx_iter, params):
|
||||
def idx_to_cdx(self, idx_iter, query):
|
||||
blocks = None
|
||||
ranges = []
|
||||
|
||||
@ -150,7 +150,7 @@ class ZipNumCluster(CDXSource):
|
||||
|
||||
else:
|
||||
if blocks:
|
||||
yield self.block_to_cdx_iter(blocks, ranges, params)
|
||||
yield self.block_to_cdx_iter(blocks, ranges, query)
|
||||
|
||||
blocks = ZipBlocks(idx['part'],
|
||||
idx['offset'],
|
||||
@ -160,15 +160,15 @@ class ZipNumCluster(CDXSource):
|
||||
ranges = [blocks.length]
|
||||
|
||||
if blocks:
|
||||
yield self.block_to_cdx_iter(blocks, ranges, params)
|
||||
yield self.block_to_cdx_iter(blocks, ranges, query)
|
||||
|
||||
def block_to_cdx_iter(self, blocks, ranges, params):
|
||||
def block_to_cdx_iter(self, blocks, ranges, query):
|
||||
last_exc = None
|
||||
last_traceback = None
|
||||
|
||||
for location in self.lookup_loc(blocks.part):
|
||||
try:
|
||||
return self.load_blocks(location, blocks, ranges, params)
|
||||
return self.load_blocks(location, blocks, ranges, query)
|
||||
except Exception as exc:
|
||||
last_exc = exc
|
||||
import sys
|
||||
@ -179,7 +179,7 @@ class ZipNumCluster(CDXSource):
|
||||
else:
|
||||
raise Exception('No Locations Found for: ' + block.part)
|
||||
|
||||
def load_blocks(self, location, blocks, ranges, params):
|
||||
def load_blocks(self, location, blocks, ranges, query):
|
||||
|
||||
if (logging.getLogger().getEffectiveLevel() <= logging.DEBUG):
|
||||
msg = 'Loading {b.count} blocks from {loc}:{b.offset}+{b.length}'
|
||||
@ -195,9 +195,9 @@ class ZipNumCluster(CDXSource):
|
||||
iter_ = itertools.chain(*itertools.imap(decompress_block, ranges))
|
||||
|
||||
# start bound
|
||||
iter_ = linearsearch(iter_, params['key'])
|
||||
iter_ = linearsearch(iter_, query.key)
|
||||
|
||||
# end bound
|
||||
end = params['end_key']
|
||||
end = query.end_key
|
||||
iter_ = itertools.takewhile(lambda line: line < end, iter_)
|
||||
return iter_
|
||||
|
@ -4,7 +4,7 @@ import mimetypes
|
||||
import time
|
||||
|
||||
from pywb.rewrite.wburl import WbUrl
|
||||
from pywb.cdx.cdxserver import extract_params_from_wsgi_env
|
||||
from pywb.cdx.cdxobject import CDXQuery
|
||||
from wbrequestresponse import WbResponse
|
||||
from wbexceptions import WbException, NotFoundException
|
||||
from views import TextCapturesView
|
||||
@ -79,8 +79,8 @@ class CDXHandler(BaseHandler):
|
||||
self.view = view if view else TextCapturesView()
|
||||
|
||||
def __call__(self, wbrequest):
|
||||
params = extract_params_from_wsgi_env(wbrequest.env)
|
||||
cdx_lines = self.index_reader.load_cdx(**params)
|
||||
query = CDXQuery.from_wsgi_env(wbrequest.env)
|
||||
cdx_lines = self.index_reader.load_cdx_query(query)
|
||||
|
||||
return self.view.render_response(wbrequest, cdx_lines)
|
||||
|
||||
|
@ -34,6 +34,9 @@ class IndexReader(object):
|
||||
|
||||
return cdxlines
|
||||
|
||||
def load_cdx_query(self, query):
|
||||
return self.cdx_server.load_cdx_query(query)
|
||||
|
||||
def load_cdx(self, **params):
|
||||
return self.cdx_server.load_cdx(**params)
|
||||
|
||||
|
2
setup.py
2
setup.py
@ -26,7 +26,6 @@ setup(
|
||||
('sample_archive/text_content/', glob.glob('sample_archive/text_content/*')),
|
||||
],
|
||||
install_requires=[
|
||||
'uwsgi',
|
||||
'rfc3987',
|
||||
'chardet',
|
||||
'redis',
|
||||
@ -36,7 +35,6 @@ setup(
|
||||
'WebTest',
|
||||
'pytest',
|
||||
'werkzeug>=0.9.4',
|
||||
'setuptools',
|
||||
],
|
||||
# tests_require=['WebTest', 'pytest'],
|
||||
zip_safe=False
|
||||
|
Loading…
x
Reference in New Issue
Block a user