mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
sample perms addition to cdx ops
This commit is contained in:
parent
531464902f
commit
be284859be
@ -10,44 +10,25 @@ from collections import deque
|
||||
|
||||
|
||||
#=================================================================
|
||||
def cdx_text_out(cdx, fields):
|
||||
if not fields:
|
||||
return str(cdx)
|
||||
else:
|
||||
return ' '.join(map(lambda x: cdx[x], fields.split(',')))
|
||||
class AllowAllPerms:
|
||||
"""
|
||||
Sample Perm Checker which allows all
|
||||
"""
|
||||
def allow_url(self, url):
|
||||
return True
|
||||
|
||||
def allow_url_timestamp(self, url, timestamp):
|
||||
return True
|
||||
|
||||
def filter_fields(self, cdx):
|
||||
return cdx
|
||||
|
||||
|
||||
#=================================================================
|
||||
def cdx_load(sources, params):
|
||||
cdx_iter = load_cdx_streams(sources, params)
|
||||
def cdx_load(source, params, perms_checker = AllowAllPerms()):
|
||||
|
||||
cdx_iter = make_cdx_iter(cdx_iter)
|
||||
|
||||
if not params.get('proxy_all'):
|
||||
resolve_revisits = params.get('resolve_revisits', False)
|
||||
if resolve_revisits:
|
||||
cdx_iter = cdx_resolve_revisits(cdx_iter)
|
||||
|
||||
filters = params.get('filter', None)
|
||||
if filters:
|
||||
cdx_iter = cdx_filter(cdx_iter, filters)
|
||||
|
||||
collapse_time = params.get('collapse_time', None)
|
||||
if collapse_time:
|
||||
cdx_iter = cdx_collapse_time_status(cdx_iter, collapse_time)
|
||||
|
||||
limit = int(params.get('limit', 1000000))
|
||||
|
||||
reverse = params.get('reverse', False)
|
||||
if reverse:
|
||||
cdx_iter = cdx_reverse(cdx_iter, limit)
|
||||
|
||||
closest_to = params.get('closest', None)
|
||||
if closest_to:
|
||||
cdx_iter = cdx_sort_closest(closest_to, cdx_iter, limit)
|
||||
|
||||
if limit:
|
||||
cdx_iter = cdx_limit(cdx_iter, limit)
|
||||
#cdx_iter = cdx_load_all(source, params)
|
||||
cdx_iter = cdx_load_with_perms(source, params, perms_checker)
|
||||
|
||||
# output raw cdx objects
|
||||
if params.get('output') == 'raw':
|
||||
@ -60,6 +41,69 @@ def cdx_load(sources, params):
|
||||
return write_cdx(params.get('fields'))
|
||||
|
||||
|
||||
#=================================================================
|
||||
def cdx_load_with_perms(source, params, perms_checker):
|
||||
if not perms_checker.allow_url(params['url']):
|
||||
if params.get('matchType', 'exact') == 'exact':
|
||||
yield
|
||||
|
||||
cdx_iter = cdx_load_all(source, params)
|
||||
|
||||
for cdx in cdx_iter:
|
||||
if not perms_checker.allow_url_timestamp(cdx['original'],
|
||||
cdx['timestamp']):
|
||||
continue
|
||||
|
||||
cdx = perms_checker.filter_fields(cdx)
|
||||
|
||||
yield cdx
|
||||
|
||||
|
||||
#=================================================================
|
||||
def cdx_text_out(cdx, fields):
|
||||
if not fields:
|
||||
return str(cdx)
|
||||
else:
|
||||
return ' '.join(map(lambda x: cdx[x], fields.split(',')))
|
||||
|
||||
|
||||
#=================================================================
|
||||
def cdx_load_all(sources, params):
|
||||
cdx_iter = load_cdx_streams(sources, params)
|
||||
|
||||
cdx_iter = make_cdx_iter(cdx_iter)
|
||||
|
||||
if params.get('proxy_all'):
|
||||
return cdx_iter
|
||||
|
||||
resolve_revisits = params.get('resolve_revisits', False)
|
||||
if resolve_revisits:
|
||||
cdx_iter = cdx_resolve_revisits(cdx_iter)
|
||||
|
||||
filters = params.get('filter', None)
|
||||
if filters:
|
||||
cdx_iter = cdx_filter(cdx_iter, filters)
|
||||
|
||||
collapse_time = params.get('collapse_time', None)
|
||||
if collapse_time:
|
||||
cdx_iter = cdx_collapse_time_status(cdx_iter, collapse_time)
|
||||
|
||||
limit = int(params.get('limit', 1000000))
|
||||
|
||||
reverse = params.get('reverse', False)
|
||||
if reverse:
|
||||
cdx_iter = cdx_reverse(cdx_iter, limit)
|
||||
|
||||
closest_to = params.get('closest', None)
|
||||
if closest_to:
|
||||
cdx_iter = cdx_sort_closest(closest_to, cdx_iter, limit)
|
||||
|
||||
if limit:
|
||||
cdx_iter = cdx_limit(cdx_iter, limit)
|
||||
|
||||
return cdx_iter
|
||||
|
||||
|
||||
#=================================================================
|
||||
# load and source merge cdx streams
|
||||
def load_cdx_streams(sources, params):
|
||||
|
Loading…
x
Reference in New Issue
Block a user