mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
add cdx_server app!
port wsgi cdx server tests to test new app! move base handlers to basehandlers in framework pkg (remove werkzeug dependency)
This commit is contained in:
parent
f0a0976038
commit
0bf651c2e3
30
pywb/apps/cdx_server.py
Normal file
30
pywb/apps/cdx_server.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
from pywb.cdx.cdxserver import create_cdx_server
|
||||||
|
|
||||||
|
from pywb.framework.wsgi_wrappers import init_app, start_wsgi_server
|
||||||
|
from pywb.framework.archivalrouter import ArchivalRouter, Route
|
||||||
|
|
||||||
|
from pywb.core.handlers import CDXHandler
|
||||||
|
|
||||||
|
DEFAULT_RULES = 'pywb/rules.yaml'
|
||||||
|
|
||||||
|
# cdx-server only config
|
||||||
|
DEFAULT_CONFIG = 'pywb/cdx/config.yaml'
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
# create simple cdx server under '/cdx' using config file
|
||||||
|
# TODO: support multiple collections like full wayback?
|
||||||
|
|
||||||
|
def create_cdx_server_app(config):
|
||||||
|
cdx_server = create_cdx_server(config, DEFAULT_RULES)
|
||||||
|
routes = [Route('cdx', CDXHandler(cdx_server))]
|
||||||
|
return ArchivalRouter(routes)
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
# init pywb app
|
||||||
|
#=================================================================
|
||||||
|
application = init_app(create_cdx_server_app,
|
||||||
|
load_yaml=True,
|
||||||
|
config_file=DEFAULT_CONFIG)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
start_wsgi_server(application)
|
@ -63,7 +63,7 @@ class CDXObject(OrderedDict):
|
|||||||
cdxformat = i
|
cdxformat = i
|
||||||
|
|
||||||
if not cdxformat:
|
if not cdxformat:
|
||||||
raise Exception('unknown {0}-field cdx format'.format(len(fields)))
|
raise CDXException('unknown {0}-field cdx format'.format(len(fields)))
|
||||||
|
|
||||||
for header, field in itertools.izip(cdxformat, fields):
|
for header, field in itertools.izip(cdxformat, fields):
|
||||||
self[header] = field
|
self[header] = field
|
||||||
@ -87,8 +87,15 @@ class CDXObject(OrderedDict):
|
|||||||
"""
|
"""
|
||||||
if fields is None:
|
if fields is None:
|
||||||
return str(self) + '\n'
|
return str(self) + '\n'
|
||||||
else:
|
|
||||||
return ' '.join(self[x] for x in fields) + '\n'
|
try:
|
||||||
|
result = ' '.join(self[x] for x in fields) + '\n'
|
||||||
|
except KeyError as ke:
|
||||||
|
msg = 'Invalid field "{0}" found in fields= argument'
|
||||||
|
msg = msg.format(ke.message)
|
||||||
|
raise CDXException(msg)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
if self.cdxline:
|
if self.cdxline:
|
||||||
@ -111,7 +118,7 @@ class IDXObject(OrderedDict):
|
|||||||
|
|
||||||
if len(fields) < self.NUM_REQ_FIELDS:
|
if len(fields) < self.NUM_REQ_FIELDS:
|
||||||
msg = 'invalid idx format: {0} fields found, {1} required'
|
msg = 'invalid idx format: {0} fields found, {1} required'
|
||||||
raise Exception(msg.format(len(fields), self.NUM_REQ_FIELDS))
|
raise CDXException(msg.format(len(fields), self.NUM_REQ_FIELDS))
|
||||||
|
|
||||||
for header, field in itertools.izip(self.FORMAT, fields):
|
for header, field in itertools.izip(self.FORMAT, fields):
|
||||||
self[header] = field
|
self[header] = field
|
||||||
|
@ -31,9 +31,18 @@ def cdx_load(sources, query, perms_checker=None, process=True):
|
|||||||
if perms_checker:
|
if perms_checker:
|
||||||
cdx_iter = restrict_cdx(cdx_iter, query, perms_checker)
|
cdx_iter = restrict_cdx(cdx_iter, query, perms_checker)
|
||||||
|
|
||||||
|
if query.output == 'text':
|
||||||
|
cdx_iter = cdx_to_text(cdx_iter, query.fields)
|
||||||
|
|
||||||
return cdx_iter
|
return cdx_iter
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
def cdx_to_text(cdx_iter, fields):
|
||||||
|
for cdx in cdx_iter:
|
||||||
|
yield cdx.to_text(fields)
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def restrict_cdx(cdx_iter, query, perms_checker):
|
def restrict_cdx(cdx_iter, query, perms_checker):
|
||||||
"""
|
"""
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from urlparse import parse_qs
|
from urlparse import parse_qs
|
||||||
|
from cdxobject import CDXException
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
@ -62,6 +63,9 @@ class CDXQuery(object):
|
|||||||
@property
|
@property
|
||||||
def fields(self):
|
def fields(self):
|
||||||
v = self.params.get('fields')
|
v = self.params.get('fields')
|
||||||
|
# check old param name
|
||||||
|
if not v:
|
||||||
|
v = self.params.get('fl')
|
||||||
return v.split(',') if v else None
|
return v.split(',') if v else None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -105,9 +109,6 @@ class CDXQuery(object):
|
|||||||
"""
|
"""
|
||||||
params = parse_qs(env['QUERY_STRING'])
|
params = parse_qs(env['QUERY_STRING'])
|
||||||
|
|
||||||
if not 'output' in params:
|
|
||||||
params['output'] = 'text'
|
|
||||||
|
|
||||||
# parse_qs produces arrays for single values
|
# parse_qs produces arrays for single values
|
||||||
# cdx processing expects singleton params for all params,
|
# cdx processing expects singleton params for all params,
|
||||||
# except filters, so convert here
|
# except filters, so convert here
|
||||||
@ -116,4 +117,8 @@ class CDXQuery(object):
|
|||||||
if name != 'filter':
|
if name != 'filter':
|
||||||
params[name] = val[0]
|
params[name] = val[0]
|
||||||
|
|
||||||
|
if not 'output' in params:
|
||||||
|
params['output'] = 'text'
|
||||||
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
@ -187,6 +187,7 @@ import pytest
|
|||||||
|
|
||||||
def cdx_ops_test(url, sources = [test_cdx_dir + 'iana.cdx'], **kwparams):
|
def cdx_ops_test(url, sources = [test_cdx_dir + 'iana.cdx'], **kwparams):
|
||||||
kwparams['url'] = url
|
kwparams['url'] = url
|
||||||
|
kwparams['output'] = 'cdxobject'
|
||||||
fields = kwparams.get('fields')
|
fields = kwparams.get('fields')
|
||||||
if fields:
|
if fields:
|
||||||
fields = fields.split(',')
|
fields = fields.split(',')
|
||||||
|
@ -1,15 +0,0 @@
|
|||||||
import webtest
|
|
||||||
from pywb.cdx.wsgi_cdxserver import create_app
|
|
||||||
from pywb import get_test_dir
|
|
||||||
|
|
||||||
class TestCdx:
|
|
||||||
def setup(self):
|
|
||||||
self.app = create_app(get_test_dir() + 'cdx/')
|
|
||||||
self.testapp = webtest.TestApp(self.app)
|
|
||||||
|
|
||||||
def test_cdx(self):
|
|
||||||
resp = self.testapp.get('/cdx?url=http://www.iana.org/_css/2013.1/screen.css')
|
|
||||||
assert resp.content_type == 'text/plain'
|
|
||||||
assert resp.content_length > 0
|
|
||||||
|
|
||||||
|
|
@ -1,103 +0,0 @@
|
|||||||
from werkzeug.wrappers import BaseResponse
|
|
||||||
from cdxserver import create_cdx_server
|
|
||||||
from pywb import get_test_dir
|
|
||||||
from query import CDXQuery
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import yaml
|
|
||||||
import pkg_resources
|
|
||||||
|
|
||||||
#=================================================================
|
|
||||||
CONFIG_FILE = 'config.yaml'
|
|
||||||
|
|
||||||
RULES_FILE = 'rules.yaml'
|
|
||||||
|
|
||||||
DEFAULT_PORT = 8080
|
|
||||||
|
|
||||||
#=================================================================
|
|
||||||
|
|
||||||
class CDXQueryRequest(object):
|
|
||||||
def __init__(self, environ):
|
|
||||||
self.query = CDXQuery.from_wsgi_env(environ)
|
|
||||||
|
|
||||||
|
|
||||||
class WSGICDXServer(object):
|
|
||||||
def __init__(self, config, rules_file):
|
|
||||||
self.cdxserver = create_cdx_server(config, rules_file)
|
|
||||||
|
|
||||||
def __call__(self, environ, start_response):
|
|
||||||
request = CDXQueryRequest(environ)
|
|
||||||
try:
|
|
||||||
logging.debug('request.args=%s', request.query)
|
|
||||||
result = self.cdxserver.load_cdx_query(request.query)
|
|
||||||
|
|
||||||
# TODO: select response type by "output" parameter
|
|
||||||
response = PlainTextResponse(result, request.query.fields)
|
|
||||||
return response(environ, start_response)
|
|
||||||
except Exception as exc:
|
|
||||||
logging.error('load_cdx failed', exc_info=1)
|
|
||||||
# TODO: error response should be different for each response
|
|
||||||
# type
|
|
||||||
start_response('400 Error', [('Content-Type', 'text/plain')])
|
|
||||||
return [str(exc)]
|
|
||||||
|
|
||||||
def cdx_text_out(cdx, fields):
|
|
||||||
if not fields:
|
|
||||||
return str(cdx) + '\n'
|
|
||||||
else:
|
|
||||||
logging.info('cdx fields=%s', cdx.keys)
|
|
||||||
# TODO: this will results in an exception if fields contain
|
|
||||||
# non-existent field name.
|
|
||||||
return ' '.join(cdx[x] for x in fields) + '\n'
|
|
||||||
|
|
||||||
class PlainTextResponse(BaseResponse):
|
|
||||||
def __init__(self, cdxitr, fields, status=200, content_type='text/plain'):
|
|
||||||
super(PlainTextResponse, self).__init__(
|
|
||||||
response=(
|
|
||||||
cdx.to_text(fields) for cdx in cdxitr
|
|
||||||
),
|
|
||||||
status=status, content_type=content_type)
|
|
||||||
|
|
||||||
# class JsonResponse(Response):
|
|
||||||
# pass
|
|
||||||
# class MementoResponse(Response):
|
|
||||||
# pass
|
|
||||||
|
|
||||||
def create_app(config=None):
|
|
||||||
logging.basicConfig(format='%(asctime)s: [%(levelname)s]: %(message)s',
|
|
||||||
level=logging.DEBUG)
|
|
||||||
|
|
||||||
if not config:
|
|
||||||
index_paths = get_test_dir() + 'cdx/'
|
|
||||||
config = dict(index_paths=index_paths)
|
|
||||||
|
|
||||||
return WSGICDXServer(config, RULES_FILE)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
from optparse import OptionParser
|
|
||||||
from werkzeug.serving import run_simple
|
|
||||||
|
|
||||||
opt = OptionParser('%prog [OPTIONS]')
|
|
||||||
opt.add_option('-p', '--port', type='int', default=None)
|
|
||||||
|
|
||||||
options, args = opt.parse_args()
|
|
||||||
|
|
||||||
configdata = pkg_resources.resource_string(__name__, CONFIG_FILE)
|
|
||||||
config = yaml.load(configdata)
|
|
||||||
|
|
||||||
port = options.port
|
|
||||||
if port is None:
|
|
||||||
port = (config and config.get('port')) or DEFAULT_PORT
|
|
||||||
|
|
||||||
app = create_app(config)
|
|
||||||
|
|
||||||
logging.debug('Starting CDX Server on port %s', port)
|
|
||||||
try:
|
|
||||||
run_simple('0.0.0.0', port, app, use_reloader=True, use_debugger=True)
|
|
||||||
except KeyboardInterrupt as ex:
|
|
||||||
pass
|
|
||||||
logging.debug('Stopping CDX Server')
|
|
||||||
else:
|
|
||||||
# XXX pass production config
|
|
||||||
application = create_app()
|
|
@ -3,28 +3,13 @@ import pkgutil
|
|||||||
import mimetypes
|
import mimetypes
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from pywb.rewrite.wburl import WbUrl
|
|
||||||
from pywb.cdx.query import CDXQuery
|
from pywb.cdx.query import CDXQuery
|
||||||
|
from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
|
||||||
from pywb.framework.wbrequestresponse import WbResponse
|
from pywb.framework.wbrequestresponse import WbResponse
|
||||||
from wbexceptions import WbException, NotFoundException
|
from pywb.framework.wbexceptions import WbException, NotFoundException
|
||||||
from views import TextCapturesView
|
from views import TextCapturesView
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
|
||||||
class BaseHandler(object):
|
|
||||||
def __call__(self, wbrequest):
|
|
||||||
return wbrequest
|
|
||||||
|
|
||||||
def get_wburl_type(self):
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
|
||||||
class WbUrlHandler(BaseHandler):
|
|
||||||
def get_wburl_type(self):
|
|
||||||
return WbUrl
|
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# Standard WB Handler
|
# Standard WB Handler
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
@ -29,6 +29,7 @@ class IndexReader(object):
|
|||||||
params.update(wbrequest.custom_params)
|
params.update(wbrequest.custom_params)
|
||||||
|
|
||||||
params['allowFuzzy'] = True
|
params['allowFuzzy'] = True
|
||||||
|
params['output'] = 'cdxobject'
|
||||||
|
|
||||||
cdxlines = self.load_cdx(url=wburl.url, **params)
|
cdxlines = self.load_cdx(url=wburl.url, **params)
|
||||||
|
|
||||||
|
@ -13,7 +13,12 @@ class ArchivalRouter(object):
|
|||||||
home_view=None, error_view=None):
|
home_view=None, error_view=None):
|
||||||
|
|
||||||
self.routes = routes
|
self.routes = routes
|
||||||
self.fallback = ReferRedirect(hostpaths)
|
|
||||||
|
if hostpaths:
|
||||||
|
self.fallback = ReferRedirect(hostpaths)
|
||||||
|
else:
|
||||||
|
self.fallback = None
|
||||||
|
|
||||||
self.abs_path = abs_path
|
self.abs_path = abs_path
|
||||||
|
|
||||||
self.home_view = home_view
|
self.home_view = home_view
|
||||||
|
23
pywb/framework/basehandlers.py
Normal file
23
pywb/framework/basehandlers.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
from pywb.rewrite.wburl import WbUrl
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class BaseHandler(object):
|
||||||
|
"""
|
||||||
|
Represents a base handler class that handles any request
|
||||||
|
"""
|
||||||
|
def __call__(self, wbrequest):
|
||||||
|
return wbrequest
|
||||||
|
|
||||||
|
def get_wburl_type(self):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
#=================================================================
|
||||||
|
class WbUrlHandler(BaseHandler):
|
||||||
|
"""
|
||||||
|
Represents a handler which assumes the request contains a WbUrl
|
||||||
|
Ensure that the WbUrl is parsed in the request
|
||||||
|
"""
|
||||||
|
def get_wburl_type(self):
|
||||||
|
return WbUrl
|
@ -85,7 +85,7 @@ False
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from pywb.framework.archivalrouter import Route, ReferRedirect
|
from pywb.framework.archivalrouter import Route, ReferRedirect
|
||||||
from pywb.core.handlers import BaseHandler, WbUrlHandler
|
from pywb.framework.basehandlers import BaseHandler, WbUrlHandler
|
||||||
import pprint
|
import pprint
|
||||||
|
|
||||||
def print_req(req):
|
def print_req(req):
|
||||||
|
@ -3,9 +3,9 @@ from loaders import load_yaml_config
|
|||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
|
||||||
DEFAULT_RULES_FILE = 'pywb/rules.yaml'
|
DEFAULT_RULES_FILE = 'pywb/rules.yaml'
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class RuleSet(object):
|
class RuleSet(object):
|
||||||
DEFAULT_KEY = ''
|
DEFAULT_KEY = ''
|
||||||
|
@ -58,13 +58,13 @@ class BlockLoader(object):
|
|||||||
afile = open(url, 'rb')
|
afile = open(url, 'rb')
|
||||||
|
|
||||||
except IOError:
|
except IOError:
|
||||||
#if file_only:
|
if file_only:
|
||||||
# raise
|
raise
|
||||||
|
|
||||||
# then, try as package.path/file
|
# then, try as package.path/file
|
||||||
pkg_split = url.split('/', 1)
|
pkg_split = url.split('/', 1)
|
||||||
#if len(pkg_split) == 1:
|
if len(pkg_split) == 1:
|
||||||
# raise
|
raise
|
||||||
|
|
||||||
afile = pkg_resources.resource_stream(pkg_split[0],
|
afile = pkg_resources.resource_stream(pkg_split[0],
|
||||||
pkg_split[1])
|
pkg_split[1])
|
||||||
|
@ -176,6 +176,6 @@ class ResolvingLoader:
|
|||||||
params = {'url': url,
|
params = {'url': url,
|
||||||
'closest': timestamp,
|
'closest': timestamp,
|
||||||
'filter': 'digest:' + digest,
|
'filter': 'digest:' + digest,
|
||||||
'output': 'raw'}
|
'output': 'cdxobject'}
|
||||||
|
|
||||||
return self.cdx_server.load_cdx(**params)
|
return self.cdx_server.load_cdx(**params)
|
||||||
|
3
setup.py
3
setup.py
@ -19,9 +19,8 @@ setup(
|
|||||||
'pywb.cdx',
|
'pywb.cdx',
|
||||||
'pywb.warc',
|
'pywb.warc',
|
||||||
'pywb.rewrite',
|
'pywb.rewrite',
|
||||||
|
'pywb.framework'
|
||||||
'pywb.core',
|
'pywb.core',
|
||||||
'pywb.dispatch',
|
|
||||||
'pywb.bootstrap'
|
|
||||||
'pywb.apps'
|
'pywb.apps'
|
||||||
],
|
],
|
||||||
package_data={
|
package_data={
|
||||||
|
@ -1,32 +1,26 @@
|
|||||||
import os
|
|
||||||
import re
|
import re
|
||||||
|
import webtest
|
||||||
|
|
||||||
import pytest
|
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
|
|
||||||
from werkzeug.test import Client
|
|
||||||
from werkzeug.wrappers import BaseResponse, Response
|
|
||||||
|
|
||||||
import yaml
|
|
||||||
|
|
||||||
from pywb.cdx.cdxobject import CDXObject
|
from pywb.cdx.cdxobject import CDXObject
|
||||||
from pywb.cdx.wsgi_cdxserver import create_app
|
from pywb.apps.cdx_server import application
|
||||||
|
|
||||||
from tests.fixture import testconfig
|
import pytest
|
||||||
|
|
||||||
|
#================================================================
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def client(testconfig):
|
def client():
|
||||||
app = create_app(testconfig)
|
return webtest.TestApp(application)
|
||||||
return Client(app, Response)
|
|
||||||
|
|
||||||
# ================================================================
|
|
||||||
|
|
||||||
def query(client, url, **params):
|
#================================================================
|
||||||
|
def query(client, url, is_error=False, **params):
|
||||||
params['url'] = url
|
params['url'] = url
|
||||||
return client.get('/cdx?' + urlencode(params, doseq=1))
|
return client.get('/cdx?' + urlencode(params, doseq=1), expect_errors=is_error)
|
||||||
|
|
||||||
# ================================================================
|
|
||||||
|
|
||||||
|
#================================================================
|
||||||
def test_exact_url(client):
|
def test_exact_url(client):
|
||||||
"""
|
"""
|
||||||
basic exact match, no filters, etc.
|
basic exact match, no filters, etc.
|
||||||
@ -34,48 +28,54 @@ def test_exact_url(client):
|
|||||||
resp = query(client, 'http://www.iana.org/')
|
resp = query(client, 'http://www.iana.org/')
|
||||||
|
|
||||||
assert resp.status_code == 200
|
assert resp.status_code == 200
|
||||||
print resp.data
|
print resp.body
|
||||||
|
|
||||||
|
|
||||||
|
#================================================================
|
||||||
def test_prefix_match(client):
|
def test_prefix_match(client):
|
||||||
"""
|
"""
|
||||||
prefix match test
|
prefix match test
|
||||||
"""
|
"""
|
||||||
resp = query(client, 'http://www.iana.org/', matchType='prefix')
|
resp = query(client, 'http://www.iana.org/', matchType='prefix')
|
||||||
|
|
||||||
print resp.data.splitlines()
|
print resp.body.splitlines()
|
||||||
assert resp.status_code == 200
|
assert resp.status_code == 200
|
||||||
|
|
||||||
suburls = 0
|
suburls = 0
|
||||||
for l in resp.data.splitlines():
|
for l in resp.body.splitlines():
|
||||||
fields = l.split(' ')
|
fields = l.split(' ')
|
||||||
if len(fields[0]) > len('org,iana)/'):
|
if len(fields[0]) > len('org,iana)/'):
|
||||||
suburls += 1
|
suburls += 1
|
||||||
assert suburls > 0
|
assert suburls > 0
|
||||||
|
|
||||||
|
|
||||||
|
#================================================================
|
||||||
def test_filters(client):
|
def test_filters(client):
|
||||||
"""
|
"""
|
||||||
filter cdxes by mimetype and filename field, exact match.
|
filter cdxes by mimetype and filename field, exact match.
|
||||||
"""
|
"""
|
||||||
resp = query(client, 'http://www.iana.org/_css/2013.1/screen.css',
|
resp = query(client, 'http://www.iana.org/_css/2013.1/screen.css',
|
||||||
filter=('mimetype:warc/revisit', 'filename:dupes.warc.gz'))
|
filter=('mimetype:warc/revisit', 'filename:dupes.warc.gz'))
|
||||||
|
|
||||||
assert resp.status_code == 200
|
|
||||||
assert resp.mimetype == 'text/plain'
|
|
||||||
|
|
||||||
for l in resp.data.splitlines():
|
assert resp.status_code == 200
|
||||||
|
assert resp.content_type == 'text/plain'
|
||||||
|
|
||||||
|
for l in resp.body.splitlines():
|
||||||
fields = l.split(' ')
|
fields = l.split(' ')
|
||||||
assert fields[0] == 'org,iana)/_css/2013.1/screen.css'
|
assert fields[0] == 'org,iana)/_css/2013.1/screen.css'
|
||||||
assert fields[3] == 'warc/revisit'
|
assert fields[3] == 'warc/revisit'
|
||||||
assert fields[10] == 'dupes.warc.gz'
|
assert fields[10] == 'dupes.warc.gz'
|
||||||
|
|
||||||
|
|
||||||
|
#================================================================
|
||||||
def test_limit(client):
|
def test_limit(client):
|
||||||
resp = query(client, 'http://www.iana.org/_css/2013.1/screen.css',
|
resp = query(client, 'http://www.iana.org/_css/2013.1/screen.css',
|
||||||
limit='1')
|
limit='1')
|
||||||
|
|
||||||
assert resp.status_code == 200
|
assert resp.status_code == 200
|
||||||
assert resp.mimetype == 'text/plain'
|
assert resp.content_type == 'text/plain'
|
||||||
|
|
||||||
cdxes = resp.data.splitlines()
|
cdxes = resp.body.splitlines()
|
||||||
assert len(cdxes) == 1
|
assert len(cdxes) == 1
|
||||||
fields = cdxes[0].split(' ')
|
fields = cdxes[0].split(' ')
|
||||||
assert fields[0] == 'org,iana)/_css/2013.1/screen.css'
|
assert fields[0] == 'org,iana)/_css/2013.1/screen.css'
|
||||||
@ -86,15 +86,17 @@ def test_limit(client):
|
|||||||
limit='1', reverse='1')
|
limit='1', reverse='1')
|
||||||
|
|
||||||
assert resp.status_code == 200
|
assert resp.status_code == 200
|
||||||
assert resp.mimetype == 'text/plain'
|
assert resp.content_type == 'text/plain'
|
||||||
|
|
||||||
cdxes = resp.data.splitlines()
|
cdxes = resp.body.splitlines()
|
||||||
assert len(cdxes) == 1
|
assert len(cdxes) == 1
|
||||||
fields = cdxes[0].split(' ')
|
fields = cdxes[0].split(' ')
|
||||||
assert fields[0] == 'org,iana)/_css/2013.1/screen.css'
|
assert fields[0] == 'org,iana)/_css/2013.1/screen.css'
|
||||||
assert fields[1] == '20140127171239'
|
assert fields[1] == '20140127171239'
|
||||||
assert fields[3] == 'warc/revisit'
|
assert fields[3] == 'warc/revisit'
|
||||||
|
|
||||||
|
|
||||||
|
#================================================================
|
||||||
def test_fields(client):
|
def test_fields(client):
|
||||||
"""
|
"""
|
||||||
retrieve subset of fields with ``fields`` parameter.
|
retrieve subset of fields with ``fields`` parameter.
|
||||||
@ -104,7 +106,7 @@ def test_fields(client):
|
|||||||
|
|
||||||
assert resp.status_code == 200
|
assert resp.status_code == 200
|
||||||
|
|
||||||
cdxes = resp.data.splitlines()
|
cdxes = resp.body.splitlines()
|
||||||
|
|
||||||
for cdx in cdxes:
|
for cdx in cdxes:
|
||||||
fields = cdx.split(' ')
|
fields = cdx.split(' ')
|
||||||
@ -113,16 +115,21 @@ def test_fields(client):
|
|||||||
assert re.match(r'\d{14}$', fields[1])
|
assert re.match(r'\d{14}$', fields[1])
|
||||||
assert re.match(r'\d{3}|-', fields[2])
|
assert re.match(r'\d{3}|-', fields[2])
|
||||||
|
|
||||||
|
|
||||||
|
#================================================================
|
||||||
def test_fields_undefined(client):
|
def test_fields_undefined(client):
|
||||||
"""
|
"""
|
||||||
server shall respond with Bad Request (TODO: with proper explanation),
|
server shall respond with Bad Request and name of undefined
|
||||||
when ``fields`` parameter contains undefined name(s).
|
when ``fields`` parameter contains undefined name(s).
|
||||||
"""
|
"""
|
||||||
resp = query(client, 'http://www.iana.org/_css/2013.1/print.css',
|
resp = query(client, 'http://www.iana.org/_css/2013.1/print.css',
|
||||||
|
is_error=True,
|
||||||
fields='urlkey,nosuchfield')
|
fields='urlkey,nosuchfield')
|
||||||
|
|
||||||
resp.status_code == 400
|
resp.status_code == 400
|
||||||
|
|
||||||
|
|
||||||
|
#================================================================
|
||||||
def test_resolveRevisits(client):
|
def test_resolveRevisits(client):
|
||||||
"""
|
"""
|
||||||
with ``resolveRevisits=true``, server adds three fields pointing to
|
with ``resolveRevisits=true``, server adds three fields pointing to
|
||||||
@ -132,9 +139,9 @@ def test_resolveRevisits(client):
|
|||||||
resolveRevisits='true'
|
resolveRevisits='true'
|
||||||
)
|
)
|
||||||
assert resp.status_code == 200
|
assert resp.status_code == 200
|
||||||
assert resp.mimetype == 'text/plain'
|
assert resp.content_type == 'text/plain'
|
||||||
|
|
||||||
cdxes = resp.data.splitlines()
|
cdxes = resp.body.splitlines()
|
||||||
originals = {}
|
originals = {}
|
||||||
for cdx in cdxes:
|
for cdx in cdxes:
|
||||||
fields = cdx.split(' ')
|
fields = cdx.split(' ')
|
||||||
@ -151,6 +158,8 @@ def test_resolveRevisits(client):
|
|||||||
orig = originals.get(sha)
|
orig = originals.get(sha)
|
||||||
assert orig == (int(orig_size), int(orig_offset), orig_fn)
|
assert orig == (int(orig_size), int(orig_offset), orig_fn)
|
||||||
|
|
||||||
|
|
||||||
|
#================================================================
|
||||||
def test_resolveRevisits_orig_fields(client):
|
def test_resolveRevisits_orig_fields(client):
|
||||||
"""
|
"""
|
||||||
when resolveRevisits=true, extra three fields are named
|
when resolveRevisits=true, extra three fields are named
|
||||||
@ -162,9 +171,9 @@ def test_resolveRevisits_orig_fields(client):
|
|||||||
fields='urlkey,orig.length,orig.offset,orig.filename'
|
fields='urlkey,orig.length,orig.offset,orig.filename'
|
||||||
)
|
)
|
||||||
assert resp.status_code == 200
|
assert resp.status_code == 200
|
||||||
assert resp.mimetype == 'text/plain'
|
assert resp.content_type == 'text/plain'
|
||||||
|
|
||||||
cdxes = resp.data.splitlines()
|
cdxes = resp.body.splitlines()
|
||||||
for cdx in cdxes:
|
for cdx in cdxes:
|
||||||
fields = cdx.split(' ')
|
fields = cdx.split(' ')
|
||||||
assert len(fields) == 4
|
assert len(fields) == 4
|
||||||
@ -172,6 +181,8 @@ def test_resolveRevisits_orig_fields(client):
|
|||||||
assert (orig_len == '-' and orig_offset == '-' and orig_fn == '-' or
|
assert (orig_len == '-' and orig_offset == '-' and orig_fn == '-' or
|
||||||
(int(orig_len), int(orig_offset), orig_fn))
|
(int(orig_len), int(orig_offset), orig_fn))
|
||||||
|
|
||||||
|
|
||||||
|
#================================================================
|
||||||
def test_collapseTime_resolveRevisits_reverse(client):
|
def test_collapseTime_resolveRevisits_reverse(client):
|
||||||
resp = query(client, 'http://www.iana.org/_css/2013.1/print.css',
|
resp = query(client, 'http://www.iana.org/_css/2013.1/print.css',
|
||||||
collapseTime='11',
|
collapseTime='11',
|
||||||
@ -179,11 +190,10 @@ def test_collapseTime_resolveRevisits_reverse(client):
|
|||||||
reverse='true'
|
reverse='true'
|
||||||
)
|
)
|
||||||
|
|
||||||
cdxes = [CDXObject(l) for l in resp.data.splitlines()]
|
cdxes = [CDXObject(l) for l in resp.body.splitlines()]
|
||||||
|
|
||||||
assert len(cdxes) == 3
|
assert len(cdxes) == 3
|
||||||
|
|
||||||
# timestamp is in descending order
|
# timestamp is in descending order
|
||||||
for i in range(len(cdxes) - 1):
|
for i in range(len(cdxes) - 1):
|
||||||
assert cdxes[i]['timestamp'] >= cdxes[i + 1]['timestamp']
|
assert cdxes[i]['timestamp'] >= cdxes[i + 1]['timestamp']
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user