1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 15:09:54 +01:00

sample perms addition to cdx ops

This commit is contained in:
Ilya Kreymer 2014-02-19 17:52:13 -08:00
parent 531464902f
commit be284859be

View File

@ -9,6 +9,56 @@ from heapq import merge
from collections import deque
#=================================================================
class AllowAllPerms:
"""
Sample Perm Checker which allows all
"""
def allow_url(self, url):
return True
def allow_url_timestamp(self, url, timestamp):
return True
def filter_fields(self, cdx):
return cdx
#=================================================================
def cdx_load(source, params, perms_checker = AllowAllPerms()):
#cdx_iter = cdx_load_all(source, params)
cdx_iter = cdx_load_with_perms(source, params, perms_checker)
# output raw cdx objects
if params.get('output') == 'raw':
return cdx_iter
def write_cdx(fields):
for cdx in cdx_iter:
yield cdx_text_out(cdx, fields) + '\n'
return write_cdx(params.get('fields'))
#=================================================================
def cdx_load_with_perms(source, params, perms_checker):
if not perms_checker.allow_url(params['url']):
if params.get('matchType', 'exact') == 'exact':
yield
cdx_iter = cdx_load_all(source, params)
for cdx in cdx_iter:
if not perms_checker.allow_url_timestamp(cdx['original'],
cdx['timestamp']):
continue
cdx = perms_checker.filter_fields(cdx)
yield cdx
#=================================================================
def cdx_text_out(cdx, fields):
if not fields:
@ -18,12 +68,14 @@ def cdx_text_out(cdx, fields):
#=================================================================
def cdx_load(sources, params):
def cdx_load_all(sources, params):
cdx_iter = load_cdx_streams(sources, params)
cdx_iter = make_cdx_iter(cdx_iter)
if not params.get('proxy_all'):
if params.get('proxy_all'):
return cdx_iter
resolve_revisits = params.get('resolve_revisits', False)
if resolve_revisits:
cdx_iter = cdx_resolve_revisits(cdx_iter)
@ -49,16 +101,8 @@ def cdx_load(sources, params):
if limit:
cdx_iter = cdx_limit(cdx_iter, limit)
# output raw cdx objects
if params.get('output') == 'raw':
return cdx_iter
def write_cdx(fields):
for cdx in cdx_iter:
yield cdx_text_out(cdx, fields) + '\n'
return write_cdx(params.get('fields'))
#=================================================================
# load and source merge cdx streams