mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
refactor: webagg -> warcserver rename
- ResAggApp -> BaseWarcServer - AutoApp -> WarcServer - move index related files to warcserver.index package, tests to warcserver.index.test - move resource loading related files to warcserver.resource package, tests to warcserver.resource.test - pywb.cdx -> pywb.warcserver.index - split pywb.warc -> pywb.warcserver.resource or pywb.indexer (for cdx generation) - bump to 0.51.0 for now! - tests for pywb.warcserver should be working
This commit is contained in:
parent
4975d75910
commit
ad33dc6728
@ -1,4 +1,4 @@
|
||||
__version__ = '0.50.0'
|
||||
__version__ = '0.51.0'
|
||||
|
||||
DEFAULT_CONFIG = 'pywb/default_config.yaml'
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
from pywb.webagg.inputrequest import DirectWSGIInputRequest, POSTInputRequest
|
||||
from pywb.warcserver.inputrequest import DirectWSGIInputRequest, POSTInputRequest
|
||||
|
||||
from werkzeug.routing import Map, Rule
|
||||
from werkzeug.exceptions import HTTPException
|
||||
|
||||
@ -13,7 +14,7 @@ JSON_CT = 'application/json; charset=utf-8'
|
||||
|
||||
|
||||
#=============================================================================
|
||||
class ResAggApp(object):
|
||||
class BaseWarcServer(object):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.route_dict = {}
|
||||
self.debug = kwargs.get('debug', False)
|
@ -1,10 +1,11 @@
|
||||
from pywb.webagg.responseloader import WARCPathLoader, LiveWebLoader, VideoLoader
|
||||
from pywb.webagg.utils import MementoUtils
|
||||
from pywb.utils.wbexception import BadRequestException, WbException
|
||||
from pywb.utils.wbexception import NotFoundException
|
||||
|
||||
from warcio.recordloader import ArchiveLoadFailed
|
||||
|
||||
from pywb.webagg.fuzzymatcher import FuzzyMatcher
|
||||
from pywb.warcserver.index.fuzzymatcher import FuzzyMatcher
|
||||
from pywb.warcserver.resource.responseloader import WARCPathLoader, LiveWebLoader, VideoLoader
|
||||
from pywb.warcserver.utils import MementoUtils
|
||||
|
||||
import six
|
||||
|
@ -7,17 +7,16 @@ import os
|
||||
|
||||
from warcio.timeutils import timestamp_now
|
||||
|
||||
from pywb.cdx.cdxops import process_cdx
|
||||
from pywb.cdx.query import CDXQuery
|
||||
|
||||
from heapq import merge
|
||||
from collections import deque
|
||||
from itertools import chain
|
||||
|
||||
from pywb.webagg.indexsource import FileIndexSource, RedisIndexSource
|
||||
from pywb.utils.wbexception import NotFoundException, WbException
|
||||
from pywb.warcserver.utils import ParamFormatter, res_template
|
||||
|
||||
from pywb.webagg.utils import ParamFormatter, res_template
|
||||
from pywb.warcserver.index.indexsource import FileIndexSource, RedisIndexSource
|
||||
from pywb.warcserver.index.cdxops import process_cdx
|
||||
from pywb.warcserver.index.query import CDXQuery
|
||||
|
||||
import six
|
||||
import glob
|
@ -1,8 +1,8 @@
|
||||
from pywb.cdx.cdxobject import CDXObject, IDXObject
|
||||
from pywb.cdx.cdxobject import TIMESTAMP, STATUSCODE, MIMETYPE, DIGEST
|
||||
from pywb.cdx.cdxobject import OFFSET, LENGTH, FILENAME
|
||||
from pywb.warcserver.index.cdxobject import CDXObject, IDXObject
|
||||
from pywb.warcserver.index.cdxobject import TIMESTAMP, STATUSCODE, MIMETYPE, DIGEST
|
||||
from pywb.warcserver.index.cdxobject import OFFSET, LENGTH, FILENAME
|
||||
|
||||
from pywb.cdx.query import CDXQuery
|
||||
from pywb.warcserver.index.query import CDXQuery
|
||||
|
||||
from warcio.timeutils import timestamp_to_sec, pad_timestamp
|
||||
from warcio.timeutils import PAD_14_DOWN, PAD_14_UP
|
@ -1,13 +1,14 @@
|
||||
from pywb.utils.binsearch import iter_range
|
||||
from warcio.timeutils import timestamp_to_http_date, http_date_to_timestamp
|
||||
from warcio.timeutils import timestamp_now
|
||||
from pywb.utils.canonicalize import canonicalize
|
||||
from pywb.utils.wbexception import NotFoundException
|
||||
|
||||
from pywb.cdx.cdxobject import CDXObject
|
||||
from warcio.timeutils import timestamp_to_http_date, http_date_to_timestamp
|
||||
from warcio.timeutils import timestamp_now
|
||||
|
||||
from pywb.webagg.utils import ParamFormatter, res_template
|
||||
from pywb.webagg.utils import MementoUtils
|
||||
from pywb.warcserver.index.cdxobject import CDXObject
|
||||
|
||||
from pywb.warcserver.utils import ParamFormatter, res_template
|
||||
from pywb.warcserver.utils import MementoUtils
|
||||
|
||||
import redis
|
||||
|
@ -1,5 +1,5 @@
|
||||
from six.moves.urllib.parse import urlencode
|
||||
from pywb.cdx.cdxobject import CDXException
|
||||
from pywb.warcserver.index.cdxobject import CDXException
|
||||
from pywb.utils.canonicalize import calc_search_range
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from pywb.cdx.cdxobject import CDXObject, IDXObject, CDXException
|
||||
from pywb.warcserver.index.cdxobject import CDXObject, IDXObject, CDXException
|
||||
from pytest import raises
|
||||
|
||||
def test_empty_cdxobject():
|
@ -159,7 +159,7 @@ org,iana)/domains/root/db 20140126200928 http://www.iana.org/domains/root/db tex
|
||||
"""
|
||||
|
||||
#=================================================================
|
||||
from pywb.webagg.autoapp import init_index_agg
|
||||
from pywb.warcserver.warcserver import init_index_agg
|
||||
|
||||
import os
|
||||
import sys
|
@ -3,15 +3,15 @@ import os
|
||||
import shutil
|
||||
import json
|
||||
|
||||
from .testutils import to_path, to_json_list, TempDirTests, BaseTestClass, TEST_CDX_PATH
|
||||
from pywb.warcserver.test.testutils import to_path, to_json_list, TempDirTests, BaseTestClass, TEST_CDX_PATH
|
||||
|
||||
from mock import patch
|
||||
|
||||
import time
|
||||
|
||||
from pywb.webagg.aggregator import DirectoryIndexSource, CacheDirectoryIndexSource
|
||||
from pywb.webagg.aggregator import SimpleAggregator
|
||||
from pywb.webagg.indexsource import MementoIndexSource
|
||||
from pywb.warcserver.index.aggregator import DirectoryIndexSource, CacheDirectoryIndexSource
|
||||
from pywb.warcserver.index.aggregator import SimpleAggregator
|
||||
from pywb.warcserver.index.indexsource import MementoIndexSource
|
||||
|
||||
|
||||
#=============================================================================
|
||||
@ -114,7 +114,7 @@ class TestDirAgg(TempDirTests, BaseTestClass):
|
||||
assert(to_json_list(res) == exp)
|
||||
assert(errs == {})
|
||||
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header)
|
||||
@patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', mock_link_header)
|
||||
def test_agg_dir_and_memento(self):
|
||||
sources = {'ia': MementoIndexSource.from_timegate_url('http://web.archive.org/web/'),
|
||||
'local': self.dir_loader}
|
@ -1,11 +1,11 @@
|
||||
from pywb.webagg.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource, RedisIndexSource
|
||||
from pywb.webagg.indexsource import LiveIndexSource
|
||||
from pywb.warcserver.index.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource, RedisIndexSource
|
||||
from pywb.warcserver.index.indexsource import LiveIndexSource
|
||||
|
||||
from pywb.webagg.aggregator import SimpleAggregator
|
||||
from pywb.warcserver.index.aggregator import SimpleAggregator
|
||||
|
||||
from warcio.timeutils import timestamp_now
|
||||
|
||||
from .testutils import key_ts_res, TEST_CDX_PATH
|
||||
from pywb.warcserver.test.testutils import key_ts_res, TEST_CDX_PATH
|
||||
|
||||
import pytest
|
||||
import os
|
@ -1,6 +1,6 @@
|
||||
from pywb.utils.wbexception import AccessException
|
||||
from pywb.cdx.cdxops import cdx_load
|
||||
from pywb.cdx.query import CDXQuery
|
||||
from pywb.warcserver.index.cdxops import cdx_load
|
||||
from pywb.warcserver.index.query import CDXQuery
|
||||
|
||||
from pytest import raises
|
||||
|
@ -1,10 +1,11 @@
|
||||
from gevent import monkey; monkey.patch_all(thread=False)
|
||||
|
||||
from pywb.webagg.aggregator import SimpleAggregator, GeventTimeoutAggregator
|
||||
from pywb.webagg.aggregator import BaseAggregator
|
||||
from pywb.warcserver.index.aggregator import SimpleAggregator, GeventTimeoutAggregator
|
||||
from pywb.warcserver.index.aggregator import BaseAggregator
|
||||
|
||||
from pywb.webagg.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource
|
||||
from .testutils import to_json_list, to_path, TEST_CDX_PATH, MementoOverrideTests, BaseTestClass
|
||||
from pywb.warcserver.index.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource
|
||||
|
||||
from pywb.warcserver.test.testutils import to_json_list, to_path, TEST_CDX_PATH, MementoOverrideTests, BaseTestClass
|
||||
|
||||
import json
|
||||
import pytest
|
||||
@ -13,7 +14,7 @@ import six
|
||||
|
||||
from mock import patch
|
||||
|
||||
from pywb.webagg.handlers import IndexHandler
|
||||
from pywb.warcserver.handlers import IndexHandler
|
||||
|
||||
|
||||
# Aggregator Mappings
|
||||
@ -40,7 +41,7 @@ agg_nf = {'simple': SimpleAggregator(nf),
|
||||
# ============================================================================
|
||||
class TestMemAgg(MementoOverrideTests, BaseTestClass):
|
||||
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_1'))
|
||||
@patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_1'))
|
||||
def test_mem_agg_index_1(self, agg):
|
||||
url = 'http://iana.org/'
|
||||
res, errs = agg(dict(url=url, closest='20140126000000', limit=5))
|
||||
@ -58,7 +59,7 @@ class TestMemAgg(MementoOverrideTests, BaseTestClass):
|
||||
|
||||
|
||||
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_2'))
|
||||
@patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_2'))
|
||||
def test_mem_agg_index_2(self, agg):
|
||||
url = 'http://example.com/'
|
||||
res, errs = agg(dict(url=url, closest='20100512', limit=6))
|
||||
@ -76,7 +77,7 @@ class TestMemAgg(MementoOverrideTests, BaseTestClass):
|
||||
|
||||
|
||||
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_3'))
|
||||
@patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_3'))
|
||||
def test_mem_agg_index_3(self, agg):
|
||||
url = 'http://vvork.com/'
|
||||
res, errs = agg(dict(url=url, closest='20141001', limit=5))
|
||||
@ -92,7 +93,7 @@ class TestMemAgg(MementoOverrideTests, BaseTestClass):
|
||||
|
||||
|
||||
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_4'))
|
||||
@patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_4'))
|
||||
def test_mem_agg_index_4(self, agg):
|
||||
url = 'http://vvork.com/'
|
||||
res, errs = agg(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))
|
@ -1,5 +1,5 @@
|
||||
from pywb.webagg.aggregator import RedisMultiKeyIndexSource
|
||||
from .testutils import to_path, to_json_list, FakeRedisTests, BaseTestClass, TEST_CDX_PATH
|
||||
from pywb.warcserver.index.aggregator import RedisMultiKeyIndexSource
|
||||
from pywb.warcserver.test.testutils import to_path, to_json_list, FakeRedisTests, BaseTestClass, TEST_CDX_PATH
|
||||
|
||||
|
||||
class TestRedisAgg(FakeRedisTests, BaseTestClass):
|
@ -1,11 +1,11 @@
|
||||
from gevent import monkey; monkey.patch_all(thread=False)
|
||||
import time
|
||||
from pywb.webagg.indexsource import FileIndexSource
|
||||
from pywb.warcserver.index.indexsource import FileIndexSource
|
||||
|
||||
from pywb.webagg.aggregator import SimpleAggregator, TimeoutMixin
|
||||
from pywb.webagg.aggregator import GeventTimeoutAggregator, GeventTimeoutAggregator
|
||||
from pywb.warcserver.index.aggregator import SimpleAggregator, TimeoutMixin
|
||||
from pywb.warcserver.index.aggregator import GeventTimeoutAggregator, GeventTimeoutAggregator
|
||||
|
||||
from .testutils import to_json_list, TEST_CDX_PATH
|
||||
from pywb.warcserver.test.testutils import to_json_list, TEST_CDX_PATH
|
||||
|
||||
|
||||
class TimeoutFileSource(FileIndexSource):
|
@ -122,9 +122,9 @@ Exception: No Locations Found for: foo2
|
||||
|
||||
"""
|
||||
|
||||
from pywb.webagg.test.test_cdxops import cdx_ops_test, cdx_ops_test_data
|
||||
from pywb import get_test_dir
|
||||
from pywb.webagg.autoapp import init_index_agg
|
||||
from pywb.warcserver.index.test.test_cdxops import cdx_ops_test, cdx_ops_test_data
|
||||
from pywb.warcserver.warcserver import init_index_agg
|
||||
|
||||
import shutil
|
||||
import tempfile
|
@ -9,13 +9,14 @@ import six
|
||||
|
||||
from six.moves import map
|
||||
|
||||
#from pywb.cdx.cdxsource import CDXSource
|
||||
from pywb.webagg.indexsource import BaseIndexSource
|
||||
from pywb.cdx.cdxobject import IDXObject, CDXException, CDXObject
|
||||
from pywb.cdx.query import CDXQuery
|
||||
from warcio.bufferedreaders import gzip_decompressor
|
||||
|
||||
#from pywb.warcserver.index.cdxsource import CDXSource
|
||||
from pywb.warcserver.index.indexsource import BaseIndexSource
|
||||
from pywb.warcserver.index.cdxobject import IDXObject, CDXException, CDXObject
|
||||
from pywb.warcserver.index.query import CDXQuery
|
||||
|
||||
from pywb.utils.loaders import BlockLoader, read_last_line
|
||||
from warcio.bufferedreaders import gzip_decompressor
|
||||
from pywb.utils.binsearch import iter_range, linearsearch, search
|
||||
|
||||
|
@ -2,7 +2,7 @@ from warcio.bufferedreaders import DecompressingBufferedReader
|
||||
from warcio.recordloader import ArcWarcRecordLoader
|
||||
|
||||
from pywb.utils.loaders import BlockLoader
|
||||
from pywb.webagg.utils import BUFF_SIZE
|
||||
from pywb.warcserver.utils import BUFF_SIZE
|
||||
|
||||
|
||||
#=================================================================
|
@ -3,7 +3,7 @@ import redis
|
||||
from pywb.utils.binsearch import iter_exact
|
||||
from pywb.utils.loaders import to_native_str
|
||||
|
||||
from pywb.webagg.indexsource import RedisIndexSource
|
||||
from pywb.warcserver.index.indexsource import RedisIndexSource
|
||||
|
||||
from six.moves.urllib.request import url2pathname
|
||||
import six
|
@ -1,7 +1,7 @@
|
||||
from warcio.recordloader import ArchiveLoadFailed
|
||||
from warcio.timeutils import iso_date_to_timestamp
|
||||
|
||||
from pywb.webagg.resource.blockrecordloader import BlockArcWarcRecordLoader
|
||||
from pywb.warcserver.resource.blockrecordloader import BlockArcWarcRecordLoader
|
||||
|
||||
from pywb.utils.wbexception import NotFoundException
|
||||
|
@ -1,6 +1,3 @@
|
||||
from pywb.webagg.utils import MementoUtils, StreamIter, compress_gzip_iter
|
||||
from pywb.webagg.utils import ParamFormatter
|
||||
|
||||
from warcio.timeutils import timestamp_to_datetime, datetime_to_timestamp
|
||||
from warcio.timeutils import iso_date_to_datetime, datetime_to_iso_date
|
||||
from warcio.timeutils import http_date_to_datetime, datetime_to_http_date
|
||||
@ -10,8 +7,11 @@ from warcio.statusandheaders import StatusAndHeaders, StatusAndHeadersParser
|
||||
|
||||
from pywb.utils.wbexception import LiveResourceException, WbException
|
||||
|
||||
from pywb.webagg.resource.resolvingloader import ResolvingLoader
|
||||
from pywb.webagg.resource.pathresolvers import DefaultResolverMixin
|
||||
from pywb.warcserver.utils import MementoUtils, StreamIter, compress_gzip_iter
|
||||
from pywb.warcserver.utils import ParamFormatter
|
||||
|
||||
from pywb.warcserver.resource.resolvingloader import ResolvingLoader
|
||||
from pywb.warcserver.resource.pathresolvers import DefaultResolverMixin
|
||||
|
||||
from six.moves.urllib.parse import urlsplit, quote, unquote
|
||||
|
@ -294,11 +294,11 @@ import six
|
||||
|
||||
from warcio.recordloader import ArcWarcRecordLoader, ArchiveLoadFailed
|
||||
|
||||
from pywb.webagg.resource.blockrecordloader import BlockArcWarcRecordLoader
|
||||
from pywb.webagg.resource.resolvingloader import ResolvingLoader
|
||||
from pywb.webagg.resource.pathresolvers import DefaultResolverMixin
|
||||
from pywb.warcserver.resource.blockrecordloader import BlockArcWarcRecordLoader
|
||||
from pywb.warcserver.resource.resolvingloader import ResolvingLoader
|
||||
from pywb.warcserver.resource.pathresolvers import DefaultResolverMixin
|
||||
|
||||
from pywb.cdx.cdxobject import CDXObject
|
||||
from pywb.warcserver.index.cdxobject import CDXObject
|
||||
|
||||
from pywb import get_test_dir
|
||||
from mock import patch
|
@ -1,8 +1,9 @@
|
||||
from pywb import get_test_dir
|
||||
from pywb.webagg.resource.pathresolvers import PrefixResolver, PathIndexResolver, RedisResolver
|
||||
from pywb.webagg.resource.pathresolvers import DefaultResolverMixin
|
||||
from pywb.utils.loaders import to_file_url
|
||||
from pywb.cdx.cdxobject import CDXObject
|
||||
|
||||
from pywb.warcserver.resource.pathresolvers import PrefixResolver, PathIndexResolver, RedisResolver
|
||||
from pywb.warcserver.resource.pathresolvers import DefaultResolverMixin
|
||||
from pywb.warcserver.index.cdxobject import CDXObject
|
||||
|
||||
import os
|
||||
|
0
pywb/warcserver/test/__init__.py
Normal file
0
pywb/warcserver/test/__init__.py
Normal file
@ -1,17 +1,17 @@
|
||||
from .testutils import TempDirTests, BaseTestClass
|
||||
from pywb.webagg.autoapp import AutoConfigApp
|
||||
from pywb.warcserver.warcserver import WarcServer
|
||||
import os
|
||||
|
||||
from pywb.webagg.indexsource import RemoteIndexSource, LiveIndexSource, MementoIndexSource, FileIndexSource
|
||||
from pywb.webagg.handlers import ResourceHandler, HandlerSeq
|
||||
from pywb.webagg.aggregator import BaseSourceListAggregator, DirectoryIndexSource
|
||||
from pywb.warcserver.index.indexsource import RemoteIndexSource, LiveIndexSource, MementoIndexSource, FileIndexSource
|
||||
from pywb.warcserver.index.aggregator import BaseSourceListAggregator, DirectoryIndexSource
|
||||
from pywb.warcserver.handlers import ResourceHandler, HandlerSeq
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class TestAutoConfigApp(TempDirTests, BaseTestClass):
|
||||
class TestWarcServer(TempDirTests, BaseTestClass):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super(TestAutoConfigApp, cls).setup_class()
|
||||
super(TestWarcServer, cls).setup_class()
|
||||
cls.orig_cwd = os.getcwd()
|
||||
os.chdir(cls.root_dir)
|
||||
os.mkdir('./local')
|
||||
@ -30,12 +30,12 @@ class TestAutoConfigApp(TempDirTests, BaseTestClass):
|
||||
with open(os.path.join('local', 'indexes', 'file.loc'), 'a') as fh:
|
||||
fh.write('foo')
|
||||
|
||||
cls.loader = AutoConfigApp(os.path.join(cls.get_curr_dir(), 'test_autoapp.yaml'))
|
||||
cls.loader = WarcServer(os.path.join(cls.get_curr_dir(), 'test_warcserver_config.yaml'))
|
||||
|
||||
@classmethod
|
||||
def teardown_class(cls):
|
||||
os.chdir(cls.orig_cwd)
|
||||
super(TestAutoConfigApp, cls).teardown_class()
|
||||
super(TestWarcServer, cls).teardown_class()
|
||||
|
||||
@staticmethod
|
||||
def get_curr_dir():
|
@ -1,22 +1,6 @@
|
||||
#from gevent import monkey; monkey.patch_all(thread=False)
|
||||
|
||||
from .testutils import to_path, MementoOverrideTests, FakeRedisTests, BaseTestClass, TEST_CDX_PATH, TEST_WARC_PATH
|
||||
from collections import OrderedDict
|
||||
|
||||
from pywb.webagg.handlers import DefaultResourceHandler, HandlerSeq
|
||||
|
||||
from pywb.webagg.indexsource import MementoIndexSource, FileIndexSource, LiveIndexSource
|
||||
from pywb.webagg.indexsource import RemoteIndexSource
|
||||
|
||||
from pywb.webagg.aggregator import GeventTimeoutAggregator, SimpleAggregator
|
||||
from pywb.webagg.aggregator import DirectoryIndexSource
|
||||
|
||||
from pywb.webagg.app import ResAggApp
|
||||
from pywb.webagg.utils import MementoUtils
|
||||
|
||||
from warcio.recordloader import ArcWarcRecordLoader
|
||||
from warcio.statusandheaders import StatusAndHeadersParser
|
||||
from warcio.bufferedreaders import ChunkedDataReader
|
||||
|
||||
from io import BytesIO
|
||||
from six.moves.urllib.parse import urlencode
|
||||
|
||||
@ -24,10 +8,24 @@ import webtest
|
||||
from fakeredis import FakeStrictRedis
|
||||
from mock import patch
|
||||
|
||||
from .testutils import to_path, MementoOverrideTests, FakeRedisTests, BaseTestClass, TEST_CDX_PATH, TEST_WARC_PATH
|
||||
|
||||
import json
|
||||
|
||||
from warcio.recordloader import ArcWarcRecordLoader
|
||||
from warcio.statusandheaders import StatusAndHeadersParser
|
||||
from warcio.bufferedreaders import ChunkedDataReader
|
||||
|
||||
from pywb.warcserver.handlers import DefaultResourceHandler, HandlerSeq
|
||||
|
||||
from pywb.warcserver.index.indexsource import MementoIndexSource, FileIndexSource, LiveIndexSource
|
||||
from pywb.warcserver.index.indexsource import RemoteIndexSource
|
||||
|
||||
from pywb.warcserver.index.aggregator import GeventTimeoutAggregator, SimpleAggregator
|
||||
from pywb.warcserver.index.aggregator import DirectoryIndexSource
|
||||
|
||||
from pywb.warcserver.basewarcserver import BaseWarcServer
|
||||
from pywb.warcserver.utils import MementoUtils
|
||||
|
||||
|
||||
sources = {
|
||||
'local': DirectoryIndexSource(TEST_CDX_PATH),
|
||||
'ia': MementoIndexSource.from_timegate_url('http://web.archive.org/web/'),
|
||||
@ -43,14 +41,14 @@ ia_cdx = {
|
||||
|
||||
|
||||
|
||||
class TestResAgg(MementoOverrideTests, FakeRedisTests, BaseTestClass):
|
||||
class TestBaseWarcServer(MementoOverrideTests, FakeRedisTests, BaseTestClass):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super(TestResAgg, cls).setup_class()
|
||||
super(TestBaseWarcServer, cls).setup_class()
|
||||
|
||||
live_source = SimpleAggregator({'live': LiveIndexSource()})
|
||||
live_handler = DefaultResourceHandler(live_source)
|
||||
app = ResAggApp()
|
||||
app = BaseWarcServer()
|
||||
app.add_route('/live', live_handler)
|
||||
|
||||
source1 = GeventTimeoutAggregator(sources)
|
||||
@ -178,7 +176,7 @@ class TestResAgg(MementoOverrideTests, FakeRedisTests, BaseTestClass):
|
||||
|
||||
assert 'ResErrors' not in resp.headers
|
||||
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_1'))
|
||||
@patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_1'))
|
||||
def test_agg_select_mem_1(self):
|
||||
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20141001')
|
||||
|
||||
@ -193,7 +191,7 @@ class TestResAgg(MementoOverrideTests, FakeRedisTests, BaseTestClass):
|
||||
|
||||
assert 'ResErrors' not in resp.headers
|
||||
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_2'))
|
||||
@patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_2'))
|
||||
def test_agg_select_mem_2(self):
|
||||
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20151231')
|
||||
|
||||
@ -219,7 +217,7 @@ class TestResAgg(MementoOverrideTests, FakeRedisTests, BaseTestClass):
|
||||
assert record.http_headers.get_statuscode() == '302'
|
||||
assert record.http_headers.get_header('Location') == 'https://www.iana.org/'
|
||||
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live'))
|
||||
@patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live'))
|
||||
def test_agg_select_live(self):
|
||||
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=now')
|
||||
|
||||
@ -232,7 +230,7 @@ class TestResAgg(MementoOverrideTests, FakeRedisTests, BaseTestClass):
|
||||
|
||||
assert 'ResErrors' not in resp.headers
|
||||
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local'))
|
||||
@patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local'))
|
||||
def test_agg_select_local(self):
|
||||
resp = self.testapp.get('/many/resource?url=http://iana.org/&closest=20140126200624')
|
||||
|
||||
@ -245,7 +243,7 @@ class TestResAgg(MementoOverrideTests, FakeRedisTests, BaseTestClass):
|
||||
|
||||
assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
|
||||
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_postreq'))
|
||||
@patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_postreq'))
|
||||
def test_agg_select_local_postreq(self):
|
||||
req_data = """\
|
||||
GET / HTTP/1.1
|
||||
@ -265,7 +263,7 @@ Host: iana.org
|
||||
|
||||
assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
|
||||
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live_postreq'))
|
||||
@patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live_postreq'))
|
||||
def test_agg_live_postreq(self):
|
||||
req_data = """\
|
||||
GET /get?foo=bar HTTP/1.1
|
||||
@ -449,7 +447,7 @@ host: www.youtube.com\
|
||||
|
||||
assert resp.text == resp.headers['ResErrors']
|
||||
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_revisit'))
|
||||
@patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_revisit'))
|
||||
def test_agg_local_revisit(self):
|
||||
resp = self.testapp.get('/many/resource?url=http://www.example.com/&closest=20140127171251&sources=local')
|
||||
|
||||
@ -476,7 +474,7 @@ host: www.youtube.com\
|
||||
assert resp.json == {'message': 'output=foobar not supported'}
|
||||
assert resp.text == resp.headers['ResErrors']
|
||||
|
||||
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_not_found'))
|
||||
@patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_not_found'))
|
||||
def test_error_local_not_found(self):
|
||||
resp = self.testapp.get('/many/resource?url=http://not-found.error/&sources=local', status=404)
|
||||
|
@ -1,4 +1,4 @@
|
||||
from pywb.webagg.inputrequest import DirectWSGIInputRequest, POSTInputRequest
|
||||
from pywb.warcserver.inputrequest import DirectWSGIInputRequest, POSTInputRequest
|
||||
from werkzeug.routing import Map, Rule
|
||||
|
||||
import webtest
|
@ -3,21 +3,23 @@ from gevent import monkey; monkey.patch_all(thread=False)
|
||||
import webtest
|
||||
|
||||
from io import BytesIO
|
||||
from pywb.webagg.app import ResAggApp
|
||||
import requests
|
||||
|
||||
from pywb.webagg.handlers import DefaultResourceHandler
|
||||
from pywb.webagg.aggregator import SimpleAggregator
|
||||
from pywb.webagg.upstreamindexsource import UpstreamMementoIndexSource, UpstreamAggIndexSource
|
||||
|
||||
from warcio.recordloader import ArcWarcRecordLoader
|
||||
|
||||
from pywb.warcserver.handlers import DefaultResourceHandler
|
||||
from pywb.warcserver.basewarcserver import BaseWarcServer
|
||||
|
||||
from pywb.warcserver.index.aggregator import SimpleAggregator
|
||||
|
||||
from pywb.warcserver.upstreamindexsource import UpstreamMementoIndexSource, UpstreamAggIndexSource
|
||||
|
||||
from .testutils import LiveServerTests, BaseTestClass
|
||||
|
||||
|
||||
class TestUpstream(LiveServerTests, BaseTestClass):
|
||||
def setup(self):
|
||||
app = ResAggApp()
|
||||
app = BaseWarcServer()
|
||||
|
||||
base_url = 'http://localhost:{0}'.format(self.server.port)
|
||||
app.add_route('/upstream',
|
@ -8,10 +8,11 @@ import time
|
||||
from fakeredis import FakeStrictRedis
|
||||
from mock import patch
|
||||
|
||||
from pywb.webagg.aggregator import SimpleAggregator
|
||||
from pywb.webagg.app import ResAggApp
|
||||
from pywb.webagg.handlers import DefaultResourceHandler
|
||||
from pywb.webagg.indexsource import LiveIndexSource, MementoIndexSource
|
||||
from pywb.warcserver.basewarcserver import BaseWarcServer
|
||||
from pywb.warcserver.handlers import DefaultResourceHandler
|
||||
|
||||
from pywb.warcserver.index.aggregator import SimpleAggregator
|
||||
from pywb.warcserver.index.indexsource import LiveIndexSource, MementoIndexSource
|
||||
|
||||
from pywb.urlrewrite.geventserver import GeventServer
|
||||
|
||||
@ -141,7 +142,7 @@ class LiveServerTests(object):
|
||||
|
||||
@staticmethod
|
||||
def make_live_app():
|
||||
app = ResAggApp()
|
||||
app = BaseWarcServer()
|
||||
app.add_route('/live',
|
||||
DefaultResourceHandler(SimpleAggregator(
|
||||
{'live': LiveIndexSource()})
|
@ -1,10 +1,12 @@
|
||||
from pywb.cdx.cdxobject import CDXObject
|
||||
from pywb.utils.wbexception import NotFoundException
|
||||
from pywb.webagg.indexsource import BaseIndexSource, RemoteIndexSource
|
||||
from pywb.webagg.responseloader import LiveWebLoader
|
||||
from pywb.webagg.utils import ParamFormatter, res_template
|
||||
from warcio.timeutils import timestamp_now
|
||||
|
||||
from pywb.utils.wbexception import NotFoundException
|
||||
|
||||
from pywb.warcserver.index.cdxobject import CDXObject
|
||||
from pywb.warcserver.index.indexsource import BaseIndexSource, RemoteIndexSource
|
||||
from pywb.warcserver.resource.responseloader import LiveWebLoader
|
||||
from pywb.warcserver.utils import ParamFormatter, res_template
|
||||
|
||||
|
||||
#=============================================================================
|
||||
class UpstreamAggIndexSource(RemoteIndexSource):
|
@ -1,16 +1,17 @@
|
||||
from pywb.webagg.app import ResAggApp
|
||||
from pywb.webagg.utils import load_config
|
||||
from pywb.utils.loaders import load_yaml_config
|
||||
|
||||
from pywb.webagg.aggregator import CacheDirectoryIndexSource, RedisMultiKeyIndexSource
|
||||
from pywb.webagg.aggregator import GeventTimeoutAggregator, SimpleAggregator
|
||||
from pywb.warcserver.basewarcserver import BaseWarcServer
|
||||
from pywb.warcserver.utils import load_config
|
||||
|
||||
from pywb.webagg.handlers import DefaultResourceHandler, HandlerSeq
|
||||
from pywb.warcserver.index.aggregator import CacheDirectoryIndexSource, RedisMultiKeyIndexSource
|
||||
from pywb.warcserver.index.aggregator import GeventTimeoutAggregator, SimpleAggregator
|
||||
|
||||
from pywb.webagg.indexsource import FileIndexSource, RemoteIndexSource
|
||||
from pywb.webagg.indexsource import MementoIndexSource, RedisIndexSource
|
||||
from pywb.webagg.indexsource import LiveIndexSource
|
||||
from pywb.webagg.zipnum import ZipNumIndexSource
|
||||
from pywb.warcserver.handlers import DefaultResourceHandler, HandlerSeq
|
||||
|
||||
from pywb.warcserver.index.indexsource import FileIndexSource, RemoteIndexSource
|
||||
from pywb.warcserver.index.indexsource import MementoIndexSource, RedisIndexSource
|
||||
from pywb.warcserver.index.indexsource import LiveIndexSource
|
||||
from pywb.warcserver.index.zipnum import ZipNumIndexSource
|
||||
|
||||
from pywb import DEFAULT_CONFIG
|
||||
|
||||
@ -30,7 +31,7 @@ SOURCE_LIST = [LiveIndexSource,
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class AutoConfigApp(ResAggApp):
|
||||
class WarcServer(BaseWarcServer):
|
||||
AUTO_DIR_INDEX_PATH = '{coll}/indexes/'
|
||||
AUTO_DIR_ARCHIVE_PATH = '{coll}/archive/'
|
||||
|
||||
@ -51,7 +52,7 @@ class AutoConfigApp(ResAggApp):
|
||||
custom_config['collections'].update(config['collections'])
|
||||
config.update(custom_config)
|
||||
|
||||
super(AutoConfigApp, self).__init__(debug=config.get('debug', False))
|
||||
super(WarcServer, self).__init__(debug=config.get('debug', False))
|
||||
self.config = config
|
||||
|
||||
if self.config.get('enable_auto_colls', True):
|
Loading…
x
Reference in New Issue
Block a user