1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

cdx: refactor to create seperate CDXQuery object for wrapping

params passed to load_cdx()
This commit is contained in:
Ilya Kreymer 2014-03-01 08:41:24 -08:00
parent af9cabdc72
commit 355fa32600
10 changed files with 223 additions and 153 deletions

View File

@ -7,6 +7,7 @@ from pywb.utils.dsrules import BaseRule, RuleSet
from pywb.utils.canonicalize import unsurt, UrlCanonicalizer from pywb.utils.canonicalize import unsurt, UrlCanonicalizer
from cdxobject import CDXQuery
#================================================================= #=================================================================
def load_domain_specific_cdx_rules(ds_rules_file, surt_ordered): def load_domain_specific_cdx_rules(ds_rules_file, surt_ordered):
@ -70,13 +71,13 @@ class FuzzyQuery:
def __init__(self, rules): def __init__(self, rules):
self.rules = rules self.rules = rules
def __call__(self, params): def __call__(self, query):
matched_rule = None matched_rule = None
urlkey = params['key'] urlkey = query.key
url = params['url'] url = query.url
filter_ = params.get('filter', []) filter_ = query.filters
output = params.get('output') output = query.output
for rule in self.rules.iter_matching(urlkey): for rule in self.rules.iter_matching(urlkey):
m = rule.regex.search(urlkey) m = rule.regex.search(urlkey)
@ -102,7 +103,7 @@ class FuzzyQuery:
'filter': filter_, 'filter': filter_,
'output': output} 'output': output}
return params return CDXQuery(**params)
#================================================================= #=================================================================

View File

@ -1,6 +1,9 @@
from collections import OrderedDict from collections import OrderedDict
import itertools import itertools
from urllib import urlencode
from urlparse import parse_qs
#================================================================= #=================================================================
class CDXException(Exception): class CDXException(Exception):
@ -20,6 +23,126 @@ class AccessException(CDXException):
return '403 Access Denied' return '403 Access Denied'
#=================================================================
class CDXQuery(object):
def __init__(self, **kwargs):
self.params = kwargs
@property
def key(self):
return self.params['key']
@property
def end_key(self):
return self.params['end_key']
def set_key(self, key, end_key):
self.params['key'] = key
self.params['end_key'] = end_key
@property
def url(self):
try:
return self.params['url']
except KeyError:
msg = 'A url= param must be specified to query the cdx server'
raise CDXException(msg)
@property
def match_type(self):
return self.params.get('matchType', 'exact')
@property
def is_exact(self):
return self.match_type == 'exact'
@property
def allow_fuzzy(self):
return self._get_bool('allowFuzzy')
@property
def output(self):
return self.params.get('output', 'text')
@property
def limit(self):
return int(self.params.get('limit', 100000))
@property
def collapse_time(self):
return self.params.get('collapseTime')
@property
def resolve_revisits(self):
return self._get_bool('resolveRevisits')
@property
def filters(self):
return self.params.get('filter', [])
@property
def fields(self):
v = self.params.get('fields')
return v.split(',') if v else None
@property
def closest(self):
# sort=closest is not required
return self.params.get('closest')
@property
def reverse(self):
# sort=reverse overrides reverse=0
return (self._get_bool('reverse') or
self.params.get('sort') == 'reverse')
@property
def secondary_index_only(self):
return self._get_bool('showPagedIndex')
@property
def process(self):
return self._get_bool('processOps', True)
def set_process(self, process):
self.params['processOps'] = process
def _get_bool(self, name, def_val=False):
v = self.params.get(name)
if v:
try:
v = int(v)
except ValueError as ex:
v = (v.lower() == 'true')
else:
v = def_val
return bool(v)
def urlencode(self):
return urlencode(self.params, True)
@staticmethod
def from_wsgi_env(env):
""" utility function to extract params and create a CDXQuery
from a WSGI environment dictionary
"""
params = parse_qs(env['QUERY_STRING'])
if not 'output' in params:
params['output'] = 'text'
# parse_qs produces arrays for single values
# cdx processing expects singleton params for all params,
# except filters, so convert here
# use first value of the list
for name, val in params.iteritems():
if name != 'filter':
params[name] = val[0]
return CDXQuery(**params)
#================================================================= #=================================================================
class CDXObject(OrderedDict): class CDXObject(OrderedDict):
CDX_FORMATS = [ CDX_FORMATS = [

View File

@ -1,4 +1,4 @@
from cdxobject import CDXObject, IDXObject, AccessException from cdxobject import CDXObject, IDXObject, AccessException, CDXQuery
from pywb.utils.timeutils import timestamp_to_sec from pywb.utils.timeutils import timestamp_to_sec
import bisect import bisect
@ -10,7 +10,7 @@ from collections import deque
#================================================================= #=================================================================
def cdx_load(sources, params, perms_checker=None, filter=True): def cdx_load(sources, query, perms_checker=None, process=True):
""" """
merge text CDX lines from sources, return an iterator for merge text CDX lines from sources, return an iterator for
filtered and access-checked sequence of CDX objects. filtered and access-checked sequence of CDX objects.
@ -19,25 +19,30 @@ def cdx_load(sources, params, perms_checker=None, filter=True):
:param perms_checker: access check filter object implementing :param perms_checker: access check filter object implementing
allow_url_lookup(key, url), allow_capture(cdxobj) and allow_url_lookup(key, url), allow_capture(cdxobj) and
filter_fields(cdxobj) methods. filter_fields(cdxobj) methods.
:param process: bool, perform processing sorting/filtering/grouping ops
""" """
cdx_iter = load_cdx_streams(sources, params) cdx_iter = load_cdx_streams(sources, query)
cdx_iter = make_obj_iter(cdx_iter, params) cdx_iter = make_obj_iter(cdx_iter, query)
cdx_iter = filter_cdx(cdx_iter, params)
if process and query.process:
cdx_iter = process_cdx(cdx_iter, query)
if perms_checker: if perms_checker:
cdx_iter = restrict_cdx(cdx_iter, params, perms_checker) cdx_iter = restrict_cdx(cdx_iter, query, perms_checker)
return cdx_iter return cdx_iter
#================================================================= #=================================================================
def restrict_cdx(cdx_iter, params, perms_checker): def restrict_cdx(cdx_iter, query, perms_checker):
""" """
filter out those cdx records that user doesn't have access to, filter out those cdx records that user doesn't have access to,
by consulting :param perms_checker:. by consulting :param perms_checker:.
:param cdx_iter: cdx record source iterable :param cdx_iter: cdx record source iterable
:param params: request parameters (dict) :param query: request parameters (CDXQuery)
:param perms_checker: object implementing permission checker :param perms_checker: object implementing permission checker
""" """
if not perms_checker.allow_url_lookup(params['key'], params['url']): if not perms_checker.allow_url_lookup(query.key, query.url):
if params.get('matchType', 'exact') == 'exact': if query.is_exact:
raise AccessException('Excluded') raise AccessException('Excluded')
for cdx in cdx_iter: for cdx in cdx_iter:
@ -51,31 +56,26 @@ def restrict_cdx(cdx_iter, params, perms_checker):
yield cdx yield cdx
#================================================================= #=================================================================
def filter_cdx(cdx_iter, params): def process_cdx(cdx_iter, query):
if params.get('proxyAll'): if query.resolve_revisits:
return cdx_iter
resolve_revisits = params.get('resolveRevisits', False)
if resolve_revisits:
cdx_iter = cdx_resolve_revisits(cdx_iter) cdx_iter = cdx_resolve_revisits(cdx_iter)
filters = params.get('filter', None) filters = query.filters
if filters: if filters:
cdx_iter = cdx_filter(cdx_iter, filters) cdx_iter = cdx_filter(cdx_iter, filters)
collapse_time = params.get('collapseTime', None) collapse_time = query.collapse_time
if collapse_time: if collapse_time:
cdx_iter = cdx_collapse_time_status(cdx_iter, collapse_time) cdx_iter = cdx_collapse_time_status(cdx_iter, collapse_time)
limit = int(params.get('limit', 1000000)) limit = query.limit
reverse = params.get('reverse', False) or params.get('sort') == 'reverse' if query.reverse:
if reverse:
cdx_iter = cdx_reverse(cdx_iter, limit) cdx_iter = cdx_reverse(cdx_iter, limit)
closest_to = params.get('closest', None) closest = query.closest
if closest_to: if closest:
cdx_iter = cdx_sort_closest(closest_to, cdx_iter, limit) cdx_iter = cdx_sort_closest(closest, cdx_iter, limit)
if limit: if limit:
cdx_iter = cdx_limit(cdx_iter, limit) cdx_iter = cdx_limit(cdx_iter, limit)
@ -85,21 +85,21 @@ def filter_cdx(cdx_iter, params):
#================================================================= #=================================================================
# load and source merge cdx streams # load and source merge cdx streams
def load_cdx_streams(sources, params): def load_cdx_streams(sources, query):
# Optimize: no need to merge if just one input # Optimize: no need to merge if just one input
if len(sources) == 1: if len(sources) == 1:
return sources[0].load_cdx(params) return sources[0].load_cdx(query)
source_iters = map(lambda src: src.load_cdx(params), sources) source_iters = map(lambda src: src.load_cdx(query), sources)
merged_stream = merge(*(source_iters)) merged_stream = merge(*(source_iters))
return merged_stream return merged_stream
#================================================================= #=================================================================
# convert text cdx stream to CDXObject/IDXObject # convert text cdx stream to CDXObject/IDXObject
def make_obj_iter(text_iter, params): def make_obj_iter(text_iter, query):
# already converted # already converted
if params.get('showPagedIndex'): if query.secondary_index_only:
cls = IDXObject cls = IDXObject
else: else:
cls = CDXObject cls = CDXObject

View File

@ -3,7 +3,7 @@ from pywb.utils.canonicalize import UrlCanonicalizer, calc_search_range
from cdxops import cdx_load from cdxops import cdx_load
from cdxsource import CDXSource, CDXFile, RemoteCDXSource, RedisCDXSource from cdxsource import CDXSource, CDXFile, RemoteCDXSource, RedisCDXSource
from zipnum import ZipNumCluster from zipnum import ZipNumCluster
from cdxobject import CDXObject, CaptureNotFoundException, CDXException from cdxobject import CDXObject, CaptureNotFoundException, CDXException, CDXQuery
from cdxdomainspecific import load_domain_specific_cdx_rules from cdxdomainspecific import load_domain_specific_cdx_rules
from pywb.utils.loaders import is_http from pywb.utils.loaders import is_http
@ -36,7 +36,7 @@ class BaseCDXServer(object):
# set perms checker, if any # set perms checker, if any
self.perms_checker = kwargs.get('perms_checker') self.perms_checker = kwargs.get('perms_checker')
def _check_cdx_iter(self, cdx_iter, params): def _check_cdx_iter(self, cdx_iter, query):
""" Check cdx iter semantics """ Check cdx iter semantics
If iter is empty (no matches), check if fuzzy matching If iter is empty (no matches), check if fuzzy matching
is allowed, and try it -- otherwise, is allowed, and try it -- otherwise,
@ -48,21 +48,23 @@ class BaseCDXServer(object):
if cdx_iter: if cdx_iter:
return cdx_iter return cdx_iter
url = params['url']
# check if fuzzy is allowed and ensure that its an # check if fuzzy is allowed and ensure that its an
# exact match # exact match
if (self.fuzzy_query and params.get('allowFuzzy') and if (self.fuzzy_query and
params.get('matchType', 'exact') == 'exact'): query.allow_fuzzy and
query.is_exact):
fuzzy_params = self.fuzzy_query(params) fuzzy_query_params = self.fuzzy_query(query)
if fuzzy_params: if fuzzy_query_params:
return self.load_cdx(**fuzzy_params) return self.load_cdx_query(fuzzy_query_params)
msg = 'No Captures found for: ' + url msg = 'No Captures found for: ' + query.url
raise CaptureNotFoundException(msg) raise CaptureNotFoundException(msg)
def load_cdx(self, **params): def load_cdx(self, **params):
return self.load_cdx_query(CDXQuery(**params))
def load_cdx_query(self, query):
raise NotImplementedError('Implement in subclass') raise NotImplementedError('Implement in subclass')
@staticmethod @staticmethod
@ -89,26 +91,18 @@ class CDXServer(BaseCDXServer):
# config argument. # config argument.
self._create_cdx_sources(paths, kwargs.get('config')) self._create_cdx_sources(paths, kwargs.get('config'))
def load_cdx(self, **params): def load_cdx_query(self, query):
# if key not set, assume 'url' is set and needs canonicalization url = query.url
if not params.get('key'): key, end_key = calc_search_range(url=url,
try: match_type=query.match_type,
url = params['url'] url_canon=self.url_canon)
except KeyError: query.set_key(key, end_key)
msg = 'A url= param must be specified to query the cdx server'
raise CDXException(msg)
match_type = params.get('matchType', 'exact') cdx_iter = cdx_load(self.sources,
query,
key, end_key = calc_search_range(url=url,
match_type=match_type,
url_canon=self.url_canon)
params['key'] = key
params['end_key'] = end_key
cdx_iter = cdx_load(self.sources, params,
perms_checker=self.perms_checker) perms_checker=self.perms_checker)
return self._check_cdx_iter(cdx_iter, params)
return self._check_cdx_iter(cdx_iter, query)
def _create_cdx_sources(self, paths, config): def _create_cdx_sources(self, paths, config):
""" """
@ -186,9 +180,9 @@ class RemoteCDXServer(BaseCDXServer):
else: else:
raise Exception('Invalid remote cdx source: ' + str(source)) raise Exception('Invalid remote cdx source: ' + str(source))
def load_cdx(self, **params): def load_cdx_query(self, query):
remote_iter = cdx_load((self.sources,), params, filter=False) remote_iter = cdx_load(self.sources, query, process=False)
return self._check_cdx_iter(remote_iter, params) return self._check_cdx_iter(remote_iter, query)
def __str__(self): def __str__(self):
return 'Remote CDX server serving from ' + str(self.sources[0]) return 'Remote CDX server serving from ' + str(self.sources[0])
@ -220,23 +214,4 @@ def create_cdx_server(config, ds_rules_file=None):
ds_rules_file=ds_rules_file, ds_rules_file=ds_rules_file,
perms_checker=perms_checker) perms_checker=perms_checker)
#=================================================================
def extract_params_from_wsgi_env(env):
""" utility function to extract params from the query
string of a WSGI environment dictionary
"""
# use url= param to get actual url
params = urlparse.parse_qs(env['QUERY_STRING'])
if not 'output' in params:
params['output'] = 'text'
# parse_qs produces arrays for single values
# cdx processing expects singleton params for all params,
# except filters, so convert here
# use first value of the list
for name, val in params.iteritems():
if name != 'filter':
params[name] = val[0]
return params

View File

@ -12,7 +12,7 @@ class CDXSource(object):
""" """
Represents any cdx index source Represents any cdx index source
""" """
def load_cdx(self, params): def load_cdx(self, query):
raise NotImplementedError('Implement in subclass') raise NotImplementedError('Implement in subclass')
@ -24,9 +24,9 @@ class CDXFile(CDXSource):
def __init__(self, filename): def __init__(self, filename):
self.filename = filename self.filename = filename
def load_cdx(self, params): def load_cdx(self, query):
source = SeekableTextFileReader(self.filename) source = SeekableTextFileReader(self.filename)
return iter_range(source, params.get('key'), params.get('end_key')) return iter_range(source, query.key, query.end_key)
def __str__(self): def __str__(self):
return 'CDX File - ' + self.filename return 'CDX File - ' + self.filename
@ -45,20 +45,16 @@ class RemoteCDXSource(CDXSource):
self.cookie = cookie self.cookie = cookie
self.proxy_all = proxy_all self.proxy_all = proxy_all
def load_cdx(self, proxy_params): def load_cdx(self, query):
if self.proxy_all: if self.proxy_all:
params = proxy_params query.set_process(False)
params['proxyAll'] = True remote_query = query
else: else:
# Only send url and matchType params to remote # Only send url and matchType params to remote
params = {} remote_query = CDXQuery(url=query.url,
params['url'] = proxy_params['url'] match_type=query.matchType)
match_type = proxy_params.get('matchType')
if match_type: urlparams = remote_query.urlencode()
proxy_params['matchType'] = match_type
urlparams = urllib.urlencode(params, True)
try: try:
request = urllib2.Request(self.remote_url, urlparams) request = urllib2.Request(self.remote_url, urlparams)
@ -97,14 +93,14 @@ class RedisCDXSource(CDXSource):
self.key_prefix = config.get('redis_key_prefix', self.key_prefix) self.key_prefix = config.get('redis_key_prefix', self.key_prefix)
def load_cdx(self, params): def load_cdx(self, query):
""" """
Load cdx from redis cache, from an ordered list Load cdx from redis cache, from an ordered list
Currently, there is no support for range queries Currently, there is no support for range queries
Only 'exact' matchType is supported Only 'exact' matchType is supported
""" """
key = params['key'] key = query.key
# ensure only url/surt is part of key # ensure only url/surt is part of key
key = key.split(' ')[0] key = key.split(' ')[0]

View File

@ -1,6 +1,7 @@
from werkzeug.wrappers import BaseRequest, BaseResponse from werkzeug.wrappers import BaseResponse
from cdxserver import create_cdx_server from cdxserver import create_cdx_server
from pywb import get_test_dir from pywb import get_test_dir
from cdxobject import CDXQuery
import logging import logging
import os import os
@ -18,37 +19,10 @@ DEFAULT_PORT = 8080
#================================================================= #=================================================================
class CDXQueryRequest(BaseRequest): class CDXQueryRequest(object):
def __init__(self, environ): def __init__(self, environ):
super(CDXQueryRequest, self).__init__(environ) self.query = CDXQuery.from_wsgi_env(environ)
def _get_bool(self, name):
v = self.args.get(name)
if v:
try:
v = int(s)
except ValueError as ex:
v = (s.lower() == 'true')
return bool(v)
@property
def output(self):
return self.args.get('output', 'text')
@property
def filter(self):
return self.args.getlist('filter', [])
@property
def fields(self):
v = self.args.get('fields')
return v.split(',') if v else None
@property
def reverse(self):
# sort=reverse overrides reverse=0
return (self._get_bool('reverse') or
self.args.get('sort') == 'reverse')
@property
def params(self):
return dict(t if t[0] == 'filter' else (t[0], t[1][0])
for t in self.args.iterlists())
class WSGICDXServer(object): class WSGICDXServer(object):
def __init__(self, config, rules_file): def __init__(self, config, rules_file):
@ -57,11 +31,11 @@ class WSGICDXServer(object):
def __call__(self, environ, start_response): def __call__(self, environ, start_response):
request = CDXQueryRequest(environ) request = CDXQueryRequest(environ)
try: try:
logging.debug('request.args=%s', request.params) logging.debug('request.args=%s', request.query)
result = self.cdxserver.load_cdx(**request.params) result = self.cdxserver.load_cdx_query(request.query)
# TODO: select response type by "output" parameter # TODO: select response type by "output" parameter
response = PlainTextResponse(result, request.fields) response = PlainTextResponse(result, request.query.fields)
return response(environ, start_response) return response(environ, start_response)
except Exception as exc: except Exception as exc:
logging.error('load_cdx failed', exc_info=1) logging.error('load_cdx failed', exc_info=1)
@ -74,7 +48,7 @@ def cdx_text_out(cdx, fields):
if not fields: if not fields:
return str(cdx) + '\n' return str(cdx) + '\n'
else: else:
logging.info('cdx fields=%s', cdx.keys()) logging.info('cdx fields=%s', cdx.keys)
# TODO: this will results in an exception if fields contain # TODO: this will results in an exception if fields contain
# non-existent field name. # non-existent field name.
return ' '.join(cdx[x] for x in fields) + '\n' return ' '.join(cdx[x] for x in fields) + '\n'

View File

@ -110,21 +110,21 @@ class ZipNumCluster(CDXSource):
def lookup_loc(self, part): def lookup_loc(self, part):
return self.loc_map[part] return self.loc_map[part]
def load_cdx(self, params): def load_cdx(self, query):
self.reload_loc() self.reload_loc()
reader = SeekableTextFileReader(self.summary) reader = SeekableTextFileReader(self.summary)
idx_iter = iter_range(reader, idx_iter = iter_range(reader,
params['key'], query.key,
params['end_key'], query.end_key,
prev_size=1) prev_size=1)
if params.get('showPagedIndex'): if query.secondary_index_only:
params['proxyAll'] = True query.set_process(False)
return idx_iter return idx_iter
else: else:
blocks = self.idx_to_cdx(idx_iter, params) blocks = self.idx_to_cdx(idx_iter, query)
def gen_cdx(): def gen_cdx():
for blk in blocks: for blk in blocks:
@ -133,7 +133,7 @@ class ZipNumCluster(CDXSource):
return gen_cdx() return gen_cdx()
def idx_to_cdx(self, idx_iter, params): def idx_to_cdx(self, idx_iter, query):
blocks = None blocks = None
ranges = [] ranges = []
@ -150,7 +150,7 @@ class ZipNumCluster(CDXSource):
else: else:
if blocks: if blocks:
yield self.block_to_cdx_iter(blocks, ranges, params) yield self.block_to_cdx_iter(blocks, ranges, query)
blocks = ZipBlocks(idx['part'], blocks = ZipBlocks(idx['part'],
idx['offset'], idx['offset'],
@ -160,15 +160,15 @@ class ZipNumCluster(CDXSource):
ranges = [blocks.length] ranges = [blocks.length]
if blocks: if blocks:
yield self.block_to_cdx_iter(blocks, ranges, params) yield self.block_to_cdx_iter(blocks, ranges, query)
def block_to_cdx_iter(self, blocks, ranges, params): def block_to_cdx_iter(self, blocks, ranges, query):
last_exc = None last_exc = None
last_traceback = None last_traceback = None
for location in self.lookup_loc(blocks.part): for location in self.lookup_loc(blocks.part):
try: try:
return self.load_blocks(location, blocks, ranges, params) return self.load_blocks(location, blocks, ranges, query)
except Exception as exc: except Exception as exc:
last_exc = exc last_exc = exc
import sys import sys
@ -179,7 +179,7 @@ class ZipNumCluster(CDXSource):
else: else:
raise Exception('No Locations Found for: ' + block.part) raise Exception('No Locations Found for: ' + block.part)
def load_blocks(self, location, blocks, ranges, params): def load_blocks(self, location, blocks, ranges, query):
if (logging.getLogger().getEffectiveLevel() <= logging.DEBUG): if (logging.getLogger().getEffectiveLevel() <= logging.DEBUG):
msg = 'Loading {b.count} blocks from {loc}:{b.offset}+{b.length}' msg = 'Loading {b.count} blocks from {loc}:{b.offset}+{b.length}'
@ -195,9 +195,9 @@ class ZipNumCluster(CDXSource):
iter_ = itertools.chain(*itertools.imap(decompress_block, ranges)) iter_ = itertools.chain(*itertools.imap(decompress_block, ranges))
# start bound # start bound
iter_ = linearsearch(iter_, params['key']) iter_ = linearsearch(iter_, query.key)
# end bound # end bound
end = params['end_key'] end = query.end_key
iter_ = itertools.takewhile(lambda line: line < end, iter_) iter_ = itertools.takewhile(lambda line: line < end, iter_)
return iter_ return iter_

View File

@ -4,7 +4,7 @@ import mimetypes
import time import time
from pywb.rewrite.wburl import WbUrl from pywb.rewrite.wburl import WbUrl
from pywb.cdx.cdxserver import extract_params_from_wsgi_env from pywb.cdx.cdxobject import CDXQuery
from wbrequestresponse import WbResponse from wbrequestresponse import WbResponse
from wbexceptions import WbException, NotFoundException from wbexceptions import WbException, NotFoundException
from views import TextCapturesView from views import TextCapturesView
@ -79,8 +79,8 @@ class CDXHandler(BaseHandler):
self.view = view if view else TextCapturesView() self.view = view if view else TextCapturesView()
def __call__(self, wbrequest): def __call__(self, wbrequest):
params = extract_params_from_wsgi_env(wbrequest.env) query = CDXQuery.from_wsgi_env(wbrequest.env)
cdx_lines = self.index_reader.load_cdx(**params) cdx_lines = self.index_reader.load_cdx_query(query)
return self.view.render_response(wbrequest, cdx_lines) return self.view.render_response(wbrequest, cdx_lines)

View File

@ -34,6 +34,9 @@ class IndexReader(object):
return cdxlines return cdxlines
def load_cdx_query(self, query):
return self.cdx_server.load_cdx_query(query)
def load_cdx(self, **params): def load_cdx(self, **params):
return self.cdx_server.load_cdx(**params) return self.cdx_server.load_cdx(**params)

View File

@ -26,7 +26,6 @@ setup(
('sample_archive/text_content/', glob.glob('sample_archive/text_content/*')), ('sample_archive/text_content/', glob.glob('sample_archive/text_content/*')),
], ],
install_requires=[ install_requires=[
'uwsgi',
'rfc3987', 'rfc3987',
'chardet', 'chardet',
'redis', 'redis',
@ -36,7 +35,6 @@ setup(
'WebTest', 'WebTest',
'pytest', 'pytest',
'werkzeug>=0.9.4', 'werkzeug>=0.9.4',
'setuptools',
], ],
# tests_require=['WebTest', 'pytest'], # tests_require=['WebTest', 'pytest'],
zip_safe=False zip_safe=False