1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00

refactor: webagg -> warcserver rename

- ResAggApp -> BaseWarcServer
- AutoApp -> WarcServer
- move index related files to warcserver.index package, tests to warcserver.index.test
- move resource loading related files to warcserver.resource package, tests to warcserver.resource.test
- pywb.cdx -> pywb.warcserver.index
- split pywb.warc -> pywb.warcserver.resource or pywb.indexer (for cdx generation)
- bump to 0.51.0 for now!
- tests for pywb.warcserver should be working
This commit is contained in:
Ilya Kreymer 2017-05-23 09:19:09 -07:00
parent 4975d75910
commit ad33dc6728
49 changed files with 146 additions and 137 deletions

View File

@ -1,4 +1,4 @@
__version__ = '0.50.0' __version__ = '0.51.0'
DEFAULT_CONFIG = 'pywb/default_config.yaml' DEFAULT_CONFIG = 'pywb/default_config.yaml'

View File

@ -1,4 +1,5 @@
from pywb.webagg.inputrequest import DirectWSGIInputRequest, POSTInputRequest from pywb.warcserver.inputrequest import DirectWSGIInputRequest, POSTInputRequest
from werkzeug.routing import Map, Rule from werkzeug.routing import Map, Rule
from werkzeug.exceptions import HTTPException from werkzeug.exceptions import HTTPException
@ -13,7 +14,7 @@ JSON_CT = 'application/json; charset=utf-8'
#============================================================================= #=============================================================================
class ResAggApp(object): class BaseWarcServer(object):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
self.route_dict = {} self.route_dict = {}
self.debug = kwargs.get('debug', False) self.debug = kwargs.get('debug', False)

View File

@ -1,10 +1,11 @@
from pywb.webagg.responseloader import WARCPathLoader, LiveWebLoader, VideoLoader
from pywb.webagg.utils import MementoUtils
from pywb.utils.wbexception import BadRequestException, WbException from pywb.utils.wbexception import BadRequestException, WbException
from pywb.utils.wbexception import NotFoundException from pywb.utils.wbexception import NotFoundException
from warcio.recordloader import ArchiveLoadFailed from warcio.recordloader import ArchiveLoadFailed
from pywb.webagg.fuzzymatcher import FuzzyMatcher from pywb.warcserver.index.fuzzymatcher import FuzzyMatcher
from pywb.warcserver.resource.responseloader import WARCPathLoader, LiveWebLoader, VideoLoader
from pywb.warcserver.utils import MementoUtils
import six import six

View File

@ -7,17 +7,16 @@ import os
from warcio.timeutils import timestamp_now from warcio.timeutils import timestamp_now
from pywb.cdx.cdxops import process_cdx
from pywb.cdx.query import CDXQuery
from heapq import merge from heapq import merge
from collections import deque from collections import deque
from itertools import chain from itertools import chain
from pywb.webagg.indexsource import FileIndexSource, RedisIndexSource
from pywb.utils.wbexception import NotFoundException, WbException from pywb.utils.wbexception import NotFoundException, WbException
from pywb.warcserver.utils import ParamFormatter, res_template
from pywb.webagg.utils import ParamFormatter, res_template from pywb.warcserver.index.indexsource import FileIndexSource, RedisIndexSource
from pywb.warcserver.index.cdxops import process_cdx
from pywb.warcserver.index.query import CDXQuery
import six import six
import glob import glob

View File

@ -1,8 +1,8 @@
from pywb.cdx.cdxobject import CDXObject, IDXObject from pywb.warcserver.index.cdxobject import CDXObject, IDXObject
from pywb.cdx.cdxobject import TIMESTAMP, STATUSCODE, MIMETYPE, DIGEST from pywb.warcserver.index.cdxobject import TIMESTAMP, STATUSCODE, MIMETYPE, DIGEST
from pywb.cdx.cdxobject import OFFSET, LENGTH, FILENAME from pywb.warcserver.index.cdxobject import OFFSET, LENGTH, FILENAME
from pywb.cdx.query import CDXQuery from pywb.warcserver.index.query import CDXQuery
from warcio.timeutils import timestamp_to_sec, pad_timestamp from warcio.timeutils import timestamp_to_sec, pad_timestamp
from warcio.timeutils import PAD_14_DOWN, PAD_14_UP from warcio.timeutils import PAD_14_DOWN, PAD_14_UP

View File

@ -1,13 +1,14 @@
from pywb.utils.binsearch import iter_range from pywb.utils.binsearch import iter_range
from warcio.timeutils import timestamp_to_http_date, http_date_to_timestamp
from warcio.timeutils import timestamp_now
from pywb.utils.canonicalize import canonicalize from pywb.utils.canonicalize import canonicalize
from pywb.utils.wbexception import NotFoundException from pywb.utils.wbexception import NotFoundException
from pywb.cdx.cdxobject import CDXObject from warcio.timeutils import timestamp_to_http_date, http_date_to_timestamp
from warcio.timeutils import timestamp_now
from pywb.webagg.utils import ParamFormatter, res_template from pywb.warcserver.index.cdxobject import CDXObject
from pywb.webagg.utils import MementoUtils
from pywb.warcserver.utils import ParamFormatter, res_template
from pywb.warcserver.utils import MementoUtils
import redis import redis

View File

@ -1,5 +1,5 @@
from six.moves.urllib.parse import urlencode from six.moves.urllib.parse import urlencode
from pywb.cdx.cdxobject import CDXException from pywb.warcserver.index.cdxobject import CDXException
from pywb.utils.canonicalize import calc_search_range from pywb.utils.canonicalize import calc_search_range

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from pywb.cdx.cdxobject import CDXObject, IDXObject, CDXException from pywb.warcserver.index.cdxobject import CDXObject, IDXObject, CDXException
from pytest import raises from pytest import raises
def test_empty_cdxobject(): def test_empty_cdxobject():

View File

@ -159,7 +159,7 @@ org,iana)/domains/root/db 20140126200928 http://www.iana.org/domains/root/db tex
""" """
#================================================================= #=================================================================
from pywb.webagg.autoapp import init_index_agg from pywb.warcserver.warcserver import init_index_agg
import os import os
import sys import sys

View File

@ -3,15 +3,15 @@ import os
import shutil import shutil
import json import json
from .testutils import to_path, to_json_list, TempDirTests, BaseTestClass, TEST_CDX_PATH from pywb.warcserver.test.testutils import to_path, to_json_list, TempDirTests, BaseTestClass, TEST_CDX_PATH
from mock import patch from mock import patch
import time import time
from pywb.webagg.aggregator import DirectoryIndexSource, CacheDirectoryIndexSource from pywb.warcserver.index.aggregator import DirectoryIndexSource, CacheDirectoryIndexSource
from pywb.webagg.aggregator import SimpleAggregator from pywb.warcserver.index.aggregator import SimpleAggregator
from pywb.webagg.indexsource import MementoIndexSource from pywb.warcserver.index.indexsource import MementoIndexSource
#============================================================================= #=============================================================================
@ -114,7 +114,7 @@ class TestDirAgg(TempDirTests, BaseTestClass):
assert(to_json_list(res) == exp) assert(to_json_list(res) == exp)
assert(errs == {}) assert(errs == {})
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', mock_link_header) @patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', mock_link_header)
def test_agg_dir_and_memento(self): def test_agg_dir_and_memento(self):
sources = {'ia': MementoIndexSource.from_timegate_url('http://web.archive.org/web/'), sources = {'ia': MementoIndexSource.from_timegate_url('http://web.archive.org/web/'),
'local': self.dir_loader} 'local': self.dir_loader}

View File

@ -1,11 +1,11 @@
from pywb.webagg.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource, RedisIndexSource from pywb.warcserver.index.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource, RedisIndexSource
from pywb.webagg.indexsource import LiveIndexSource from pywb.warcserver.index.indexsource import LiveIndexSource
from pywb.webagg.aggregator import SimpleAggregator from pywb.warcserver.index.aggregator import SimpleAggregator
from warcio.timeutils import timestamp_now from warcio.timeutils import timestamp_now
from .testutils import key_ts_res, TEST_CDX_PATH from pywb.warcserver.test.testutils import key_ts_res, TEST_CDX_PATH
import pytest import pytest
import os import os

View File

@ -1,6 +1,6 @@
from pywb.utils.wbexception import AccessException from pywb.utils.wbexception import AccessException
from pywb.cdx.cdxops import cdx_load from pywb.warcserver.index.cdxops import cdx_load
from pywb.cdx.query import CDXQuery from pywb.warcserver.index.query import CDXQuery
from pytest import raises from pytest import raises

View File

@ -1,10 +1,11 @@
from gevent import monkey; monkey.patch_all(thread=False) from gevent import monkey; monkey.patch_all(thread=False)
from pywb.webagg.aggregator import SimpleAggregator, GeventTimeoutAggregator from pywb.warcserver.index.aggregator import SimpleAggregator, GeventTimeoutAggregator
from pywb.webagg.aggregator import BaseAggregator from pywb.warcserver.index.aggregator import BaseAggregator
from pywb.webagg.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource from pywb.warcserver.index.indexsource import FileIndexSource, RemoteIndexSource, MementoIndexSource
from .testutils import to_json_list, to_path, TEST_CDX_PATH, MementoOverrideTests, BaseTestClass
from pywb.warcserver.test.testutils import to_json_list, to_path, TEST_CDX_PATH, MementoOverrideTests, BaseTestClass
import json import json
import pytest import pytest
@ -13,7 +14,7 @@ import six
from mock import patch from mock import patch
from pywb.webagg.handlers import IndexHandler from pywb.warcserver.handlers import IndexHandler
# Aggregator Mappings # Aggregator Mappings
@ -40,7 +41,7 @@ agg_nf = {'simple': SimpleAggregator(nf),
# ============================================================================ # ============================================================================
class TestMemAgg(MementoOverrideTests, BaseTestClass): class TestMemAgg(MementoOverrideTests, BaseTestClass):
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys())) @pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_1')) @patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_1'))
def test_mem_agg_index_1(self, agg): def test_mem_agg_index_1(self, agg):
url = 'http://iana.org/' url = 'http://iana.org/'
res, errs = agg(dict(url=url, closest='20140126000000', limit=5)) res, errs = agg(dict(url=url, closest='20140126000000', limit=5))
@ -58,7 +59,7 @@ class TestMemAgg(MementoOverrideTests, BaseTestClass):
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys())) @pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_2')) @patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_2'))
def test_mem_agg_index_2(self, agg): def test_mem_agg_index_2(self, agg):
url = 'http://example.com/' url = 'http://example.com/'
res, errs = agg(dict(url=url, closest='20100512', limit=6)) res, errs = agg(dict(url=url, closest='20100512', limit=6))
@ -76,7 +77,7 @@ class TestMemAgg(MementoOverrideTests, BaseTestClass):
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys())) @pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_3')) @patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_3'))
def test_mem_agg_index_3(self, agg): def test_mem_agg_index_3(self, agg):
url = 'http://vvork.com/' url = 'http://vvork.com/'
res, errs = agg(dict(url=url, closest='20141001', limit=5)) res, errs = agg(dict(url=url, closest='20141001', limit=5))
@ -92,7 +93,7 @@ class TestMemAgg(MementoOverrideTests, BaseTestClass):
@pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys())) @pytest.mark.parametrize("agg", list(aggs.values()), ids=list(aggs.keys()))
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_4')) @patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('agg_test_4'))
def test_mem_agg_index_4(self, agg): def test_mem_agg_index_4(self, agg):
url = 'http://vvork.com/' url = 'http://vvork.com/'
res, errs = agg(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait')) res, errs = agg(dict(url=url, closest='20141001', limit=2, sources='rhiz,ait'))

View File

@ -1,5 +1,5 @@
from pywb.webagg.aggregator import RedisMultiKeyIndexSource from pywb.warcserver.index.aggregator import RedisMultiKeyIndexSource
from .testutils import to_path, to_json_list, FakeRedisTests, BaseTestClass, TEST_CDX_PATH from pywb.warcserver.test.testutils import to_path, to_json_list, FakeRedisTests, BaseTestClass, TEST_CDX_PATH
class TestRedisAgg(FakeRedisTests, BaseTestClass): class TestRedisAgg(FakeRedisTests, BaseTestClass):

View File

@ -1,11 +1,11 @@
from gevent import monkey; monkey.patch_all(thread=False) from gevent import monkey; monkey.patch_all(thread=False)
import time import time
from pywb.webagg.indexsource import FileIndexSource from pywb.warcserver.index.indexsource import FileIndexSource
from pywb.webagg.aggregator import SimpleAggregator, TimeoutMixin from pywb.warcserver.index.aggregator import SimpleAggregator, TimeoutMixin
from pywb.webagg.aggregator import GeventTimeoutAggregator, GeventTimeoutAggregator from pywb.warcserver.index.aggregator import GeventTimeoutAggregator, GeventTimeoutAggregator
from .testutils import to_json_list, TEST_CDX_PATH from pywb.warcserver.test.testutils import to_json_list, TEST_CDX_PATH
class TimeoutFileSource(FileIndexSource): class TimeoutFileSource(FileIndexSource):

View File

@ -122,9 +122,9 @@ Exception: No Locations Found for: foo2
""" """
from pywb.webagg.test.test_cdxops import cdx_ops_test, cdx_ops_test_data
from pywb import get_test_dir from pywb import get_test_dir
from pywb.webagg.autoapp import init_index_agg from pywb.warcserver.index.test.test_cdxops import cdx_ops_test, cdx_ops_test_data
from pywb.warcserver.warcserver import init_index_agg
import shutil import shutil
import tempfile import tempfile

View File

@ -9,13 +9,14 @@ import six
from six.moves import map from six.moves import map
#from pywb.cdx.cdxsource import CDXSource from warcio.bufferedreaders import gzip_decompressor
from pywb.webagg.indexsource import BaseIndexSource
from pywb.cdx.cdxobject import IDXObject, CDXException, CDXObject #from pywb.warcserver.index.cdxsource import CDXSource
from pywb.cdx.query import CDXQuery from pywb.warcserver.index.indexsource import BaseIndexSource
from pywb.warcserver.index.cdxobject import IDXObject, CDXException, CDXObject
from pywb.warcserver.index.query import CDXQuery
from pywb.utils.loaders import BlockLoader, read_last_line from pywb.utils.loaders import BlockLoader, read_last_line
from warcio.bufferedreaders import gzip_decompressor
from pywb.utils.binsearch import iter_range, linearsearch, search from pywb.utils.binsearch import iter_range, linearsearch, search

View File

@ -2,7 +2,7 @@ from warcio.bufferedreaders import DecompressingBufferedReader
from warcio.recordloader import ArcWarcRecordLoader from warcio.recordloader import ArcWarcRecordLoader
from pywb.utils.loaders import BlockLoader from pywb.utils.loaders import BlockLoader
from pywb.webagg.utils import BUFF_SIZE from pywb.warcserver.utils import BUFF_SIZE
#================================================================= #=================================================================

View File

@ -3,7 +3,7 @@ import redis
from pywb.utils.binsearch import iter_exact from pywb.utils.binsearch import iter_exact
from pywb.utils.loaders import to_native_str from pywb.utils.loaders import to_native_str
from pywb.webagg.indexsource import RedisIndexSource from pywb.warcserver.index.indexsource import RedisIndexSource
from six.moves.urllib.request import url2pathname from six.moves.urllib.request import url2pathname
import six import six

View File

@ -1,7 +1,7 @@
from warcio.recordloader import ArchiveLoadFailed from warcio.recordloader import ArchiveLoadFailed
from warcio.timeutils import iso_date_to_timestamp from warcio.timeutils import iso_date_to_timestamp
from pywb.webagg.resource.blockrecordloader import BlockArcWarcRecordLoader from pywb.warcserver.resource.blockrecordloader import BlockArcWarcRecordLoader
from pywb.utils.wbexception import NotFoundException from pywb.utils.wbexception import NotFoundException

View File

@ -1,6 +1,3 @@
from pywb.webagg.utils import MementoUtils, StreamIter, compress_gzip_iter
from pywb.webagg.utils import ParamFormatter
from warcio.timeutils import timestamp_to_datetime, datetime_to_timestamp from warcio.timeutils import timestamp_to_datetime, datetime_to_timestamp
from warcio.timeutils import iso_date_to_datetime, datetime_to_iso_date from warcio.timeutils import iso_date_to_datetime, datetime_to_iso_date
from warcio.timeutils import http_date_to_datetime, datetime_to_http_date from warcio.timeutils import http_date_to_datetime, datetime_to_http_date
@ -10,8 +7,11 @@ from warcio.statusandheaders import StatusAndHeaders, StatusAndHeadersParser
from pywb.utils.wbexception import LiveResourceException, WbException from pywb.utils.wbexception import LiveResourceException, WbException
from pywb.webagg.resource.resolvingloader import ResolvingLoader from pywb.warcserver.utils import MementoUtils, StreamIter, compress_gzip_iter
from pywb.webagg.resource.pathresolvers import DefaultResolverMixin from pywb.warcserver.utils import ParamFormatter
from pywb.warcserver.resource.resolvingloader import ResolvingLoader
from pywb.warcserver.resource.pathresolvers import DefaultResolverMixin
from six.moves.urllib.parse import urlsplit, quote, unquote from six.moves.urllib.parse import urlsplit, quote, unquote

View File

@ -294,11 +294,11 @@ import six
from warcio.recordloader import ArcWarcRecordLoader, ArchiveLoadFailed from warcio.recordloader import ArcWarcRecordLoader, ArchiveLoadFailed
from pywb.webagg.resource.blockrecordloader import BlockArcWarcRecordLoader from pywb.warcserver.resource.blockrecordloader import BlockArcWarcRecordLoader
from pywb.webagg.resource.resolvingloader import ResolvingLoader from pywb.warcserver.resource.resolvingloader import ResolvingLoader
from pywb.webagg.resource.pathresolvers import DefaultResolverMixin from pywb.warcserver.resource.pathresolvers import DefaultResolverMixin
from pywb.cdx.cdxobject import CDXObject from pywb.warcserver.index.cdxobject import CDXObject
from pywb import get_test_dir from pywb import get_test_dir
from mock import patch from mock import patch

View File

@ -1,8 +1,9 @@
from pywb import get_test_dir from pywb import get_test_dir
from pywb.webagg.resource.pathresolvers import PrefixResolver, PathIndexResolver, RedisResolver
from pywb.webagg.resource.pathresolvers import DefaultResolverMixin
from pywb.utils.loaders import to_file_url from pywb.utils.loaders import to_file_url
from pywb.cdx.cdxobject import CDXObject
from pywb.warcserver.resource.pathresolvers import PrefixResolver, PathIndexResolver, RedisResolver
from pywb.warcserver.resource.pathresolvers import DefaultResolverMixin
from pywb.warcserver.index.cdxobject import CDXObject
import os import os

View File

View File

@ -1,17 +1,17 @@
from .testutils import TempDirTests, BaseTestClass from .testutils import TempDirTests, BaseTestClass
from pywb.webagg.autoapp import AutoConfigApp from pywb.warcserver.warcserver import WarcServer
import os import os
from pywb.webagg.indexsource import RemoteIndexSource, LiveIndexSource, MementoIndexSource, FileIndexSource from pywb.warcserver.index.indexsource import RemoteIndexSource, LiveIndexSource, MementoIndexSource, FileIndexSource
from pywb.webagg.handlers import ResourceHandler, HandlerSeq from pywb.warcserver.index.aggregator import BaseSourceListAggregator, DirectoryIndexSource
from pywb.webagg.aggregator import BaseSourceListAggregator, DirectoryIndexSource from pywb.warcserver.handlers import ResourceHandler, HandlerSeq
# ============================================================================ # ============================================================================
class TestAutoConfigApp(TempDirTests, BaseTestClass): class TestWarcServer(TempDirTests, BaseTestClass):
@classmethod @classmethod
def setup_class(cls): def setup_class(cls):
super(TestAutoConfigApp, cls).setup_class() super(TestWarcServer, cls).setup_class()
cls.orig_cwd = os.getcwd() cls.orig_cwd = os.getcwd()
os.chdir(cls.root_dir) os.chdir(cls.root_dir)
os.mkdir('./local') os.mkdir('./local')
@ -30,12 +30,12 @@ class TestAutoConfigApp(TempDirTests, BaseTestClass):
with open(os.path.join('local', 'indexes', 'file.loc'), 'a') as fh: with open(os.path.join('local', 'indexes', 'file.loc'), 'a') as fh:
fh.write('foo') fh.write('foo')
cls.loader = AutoConfigApp(os.path.join(cls.get_curr_dir(), 'test_autoapp.yaml')) cls.loader = WarcServer(os.path.join(cls.get_curr_dir(), 'test_warcserver_config.yaml'))
@classmethod @classmethod
def teardown_class(cls): def teardown_class(cls):
os.chdir(cls.orig_cwd) os.chdir(cls.orig_cwd)
super(TestAutoConfigApp, cls).teardown_class() super(TestWarcServer, cls).teardown_class()
@staticmethod @staticmethod
def get_curr_dir(): def get_curr_dir():

View File

@ -1,22 +1,6 @@
#from gevent import monkey; monkey.patch_all(thread=False) from .testutils import to_path, MementoOverrideTests, FakeRedisTests, BaseTestClass, TEST_CDX_PATH, TEST_WARC_PATH
from collections import OrderedDict from collections import OrderedDict
from pywb.webagg.handlers import DefaultResourceHandler, HandlerSeq
from pywb.webagg.indexsource import MementoIndexSource, FileIndexSource, LiveIndexSource
from pywb.webagg.indexsource import RemoteIndexSource
from pywb.webagg.aggregator import GeventTimeoutAggregator, SimpleAggregator
from pywb.webagg.aggregator import DirectoryIndexSource
from pywb.webagg.app import ResAggApp
from pywb.webagg.utils import MementoUtils
from warcio.recordloader import ArcWarcRecordLoader
from warcio.statusandheaders import StatusAndHeadersParser
from warcio.bufferedreaders import ChunkedDataReader
from io import BytesIO from io import BytesIO
from six.moves.urllib.parse import urlencode from six.moves.urllib.parse import urlencode
@ -24,10 +8,24 @@ import webtest
from fakeredis import FakeStrictRedis from fakeredis import FakeStrictRedis
from mock import patch from mock import patch
from .testutils import to_path, MementoOverrideTests, FakeRedisTests, BaseTestClass, TEST_CDX_PATH, TEST_WARC_PATH
import json import json
from warcio.recordloader import ArcWarcRecordLoader
from warcio.statusandheaders import StatusAndHeadersParser
from warcio.bufferedreaders import ChunkedDataReader
from pywb.warcserver.handlers import DefaultResourceHandler, HandlerSeq
from pywb.warcserver.index.indexsource import MementoIndexSource, FileIndexSource, LiveIndexSource
from pywb.warcserver.index.indexsource import RemoteIndexSource
from pywb.warcserver.index.aggregator import GeventTimeoutAggregator, SimpleAggregator
from pywb.warcserver.index.aggregator import DirectoryIndexSource
from pywb.warcserver.basewarcserver import BaseWarcServer
from pywb.warcserver.utils import MementoUtils
sources = { sources = {
'local': DirectoryIndexSource(TEST_CDX_PATH), 'local': DirectoryIndexSource(TEST_CDX_PATH),
'ia': MementoIndexSource.from_timegate_url('http://web.archive.org/web/'), 'ia': MementoIndexSource.from_timegate_url('http://web.archive.org/web/'),
@ -43,14 +41,14 @@ ia_cdx = {
class TestResAgg(MementoOverrideTests, FakeRedisTests, BaseTestClass): class TestBaseWarcServer(MementoOverrideTests, FakeRedisTests, BaseTestClass):
@classmethod @classmethod
def setup_class(cls): def setup_class(cls):
super(TestResAgg, cls).setup_class() super(TestBaseWarcServer, cls).setup_class()
live_source = SimpleAggregator({'live': LiveIndexSource()}) live_source = SimpleAggregator({'live': LiveIndexSource()})
live_handler = DefaultResourceHandler(live_source) live_handler = DefaultResourceHandler(live_source)
app = ResAggApp() app = BaseWarcServer()
app.add_route('/live', live_handler) app.add_route('/live', live_handler)
source1 = GeventTimeoutAggregator(sources) source1 = GeventTimeoutAggregator(sources)
@ -178,7 +176,7 @@ class TestResAgg(MementoOverrideTests, FakeRedisTests, BaseTestClass):
assert 'ResErrors' not in resp.headers assert 'ResErrors' not in resp.headers
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_1')) @patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_1'))
def test_agg_select_mem_1(self): def test_agg_select_mem_1(self):
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20141001') resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20141001')
@ -193,7 +191,7 @@ class TestResAgg(MementoOverrideTests, FakeRedisTests, BaseTestClass):
assert 'ResErrors' not in resp.headers assert 'ResErrors' not in resp.headers
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_2')) @patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_mem_2'))
def test_agg_select_mem_2(self): def test_agg_select_mem_2(self):
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20151231') resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20151231')
@ -219,7 +217,7 @@ class TestResAgg(MementoOverrideTests, FakeRedisTests, BaseTestClass):
assert record.http_headers.get_statuscode() == '302' assert record.http_headers.get_statuscode() == '302'
assert record.http_headers.get_header('Location') == 'https://www.iana.org/' assert record.http_headers.get_header('Location') == 'https://www.iana.org/'
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live')) @patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live'))
def test_agg_select_live(self): def test_agg_select_live(self):
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=now') resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=now')
@ -232,7 +230,7 @@ class TestResAgg(MementoOverrideTests, FakeRedisTests, BaseTestClass):
assert 'ResErrors' not in resp.headers assert 'ResErrors' not in resp.headers
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local')) @patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local'))
def test_agg_select_local(self): def test_agg_select_local(self):
resp = self.testapp.get('/many/resource?url=http://iana.org/&closest=20140126200624') resp = self.testapp.get('/many/resource?url=http://iana.org/&closest=20140126200624')
@ -245,7 +243,7 @@ class TestResAgg(MementoOverrideTests, FakeRedisTests, BaseTestClass):
assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"} assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_postreq')) @patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_postreq'))
def test_agg_select_local_postreq(self): def test_agg_select_local_postreq(self):
req_data = """\ req_data = """\
GET / HTTP/1.1 GET / HTTP/1.1
@ -265,7 +263,7 @@ Host: iana.org
assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"} assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('http://webenact.rhizome.org/vvork/http://iana.org/',)"}
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live_postreq')) @patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_live_postreq'))
def test_agg_live_postreq(self): def test_agg_live_postreq(self):
req_data = """\ req_data = """\
GET /get?foo=bar HTTP/1.1 GET /get?foo=bar HTTP/1.1
@ -449,7 +447,7 @@ host: www.youtube.com\
assert resp.text == resp.headers['ResErrors'] assert resp.text == resp.headers['ResErrors']
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_revisit')) @patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_local_revisit'))
def test_agg_local_revisit(self): def test_agg_local_revisit(self):
resp = self.testapp.get('/many/resource?url=http://www.example.com/&closest=20140127171251&sources=local') resp = self.testapp.get('/many/resource?url=http://www.example.com/&closest=20140127171251&sources=local')
@ -476,7 +474,7 @@ host: www.youtube.com\
assert resp.json == {'message': 'output=foobar not supported'} assert resp.json == {'message': 'output=foobar not supported'}
assert resp.text == resp.headers['ResErrors'] assert resp.text == resp.headers['ResErrors']
@patch('pywb.webagg.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_not_found')) @patch('pywb.warcserver.index.indexsource.MementoIndexSource.get_timegate_links', MementoOverrideTests.mock_link_header('select_not_found'))
def test_error_local_not_found(self): def test_error_local_not_found(self):
resp = self.testapp.get('/many/resource?url=http://not-found.error/&sources=local', status=404) resp = self.testapp.get('/many/resource?url=http://not-found.error/&sources=local', status=404)

View File

@ -1,4 +1,4 @@
from pywb.webagg.inputrequest import DirectWSGIInputRequest, POSTInputRequest from pywb.warcserver.inputrequest import DirectWSGIInputRequest, POSTInputRequest
from werkzeug.routing import Map, Rule from werkzeug.routing import Map, Rule
import webtest import webtest

View File

@ -3,21 +3,23 @@ from gevent import monkey; monkey.patch_all(thread=False)
import webtest import webtest
from io import BytesIO from io import BytesIO
from pywb.webagg.app import ResAggApp
import requests import requests
from pywb.webagg.handlers import DefaultResourceHandler
from pywb.webagg.aggregator import SimpleAggregator
from pywb.webagg.upstreamindexsource import UpstreamMementoIndexSource, UpstreamAggIndexSource
from warcio.recordloader import ArcWarcRecordLoader from warcio.recordloader import ArcWarcRecordLoader
from pywb.warcserver.handlers import DefaultResourceHandler
from pywb.warcserver.basewarcserver import BaseWarcServer
from pywb.warcserver.index.aggregator import SimpleAggregator
from pywb.warcserver.upstreamindexsource import UpstreamMementoIndexSource, UpstreamAggIndexSource
from .testutils import LiveServerTests, BaseTestClass from .testutils import LiveServerTests, BaseTestClass
class TestUpstream(LiveServerTests, BaseTestClass): class TestUpstream(LiveServerTests, BaseTestClass):
def setup(self): def setup(self):
app = ResAggApp() app = BaseWarcServer()
base_url = 'http://localhost:{0}'.format(self.server.port) base_url = 'http://localhost:{0}'.format(self.server.port)
app.add_route('/upstream', app.add_route('/upstream',

View File

@ -8,10 +8,11 @@ import time
from fakeredis import FakeStrictRedis from fakeredis import FakeStrictRedis
from mock import patch from mock import patch
from pywb.webagg.aggregator import SimpleAggregator from pywb.warcserver.basewarcserver import BaseWarcServer
from pywb.webagg.app import ResAggApp from pywb.warcserver.handlers import DefaultResourceHandler
from pywb.webagg.handlers import DefaultResourceHandler
from pywb.webagg.indexsource import LiveIndexSource, MementoIndexSource from pywb.warcserver.index.aggregator import SimpleAggregator
from pywb.warcserver.index.indexsource import LiveIndexSource, MementoIndexSource
from pywb.urlrewrite.geventserver import GeventServer from pywb.urlrewrite.geventserver import GeventServer
@ -141,7 +142,7 @@ class LiveServerTests(object):
@staticmethod @staticmethod
def make_live_app(): def make_live_app():
app = ResAggApp() app = BaseWarcServer()
app.add_route('/live', app.add_route('/live',
DefaultResourceHandler(SimpleAggregator( DefaultResourceHandler(SimpleAggregator(
{'live': LiveIndexSource()}) {'live': LiveIndexSource()})

View File

@ -1,10 +1,12 @@
from pywb.cdx.cdxobject import CDXObject
from pywb.utils.wbexception import NotFoundException
from pywb.webagg.indexsource import BaseIndexSource, RemoteIndexSource
from pywb.webagg.responseloader import LiveWebLoader
from pywb.webagg.utils import ParamFormatter, res_template
from warcio.timeutils import timestamp_now from warcio.timeutils import timestamp_now
from pywb.utils.wbexception import NotFoundException
from pywb.warcserver.index.cdxobject import CDXObject
from pywb.warcserver.index.indexsource import BaseIndexSource, RemoteIndexSource
from pywb.warcserver.resource.responseloader import LiveWebLoader
from pywb.warcserver.utils import ParamFormatter, res_template
#============================================================================= #=============================================================================
class UpstreamAggIndexSource(RemoteIndexSource): class UpstreamAggIndexSource(RemoteIndexSource):

View File

@ -1,16 +1,17 @@
from pywb.webagg.app import ResAggApp
from pywb.webagg.utils import load_config
from pywb.utils.loaders import load_yaml_config from pywb.utils.loaders import load_yaml_config
from pywb.webagg.aggregator import CacheDirectoryIndexSource, RedisMultiKeyIndexSource from pywb.warcserver.basewarcserver import BaseWarcServer
from pywb.webagg.aggregator import GeventTimeoutAggregator, SimpleAggregator from pywb.warcserver.utils import load_config
from pywb.webagg.handlers import DefaultResourceHandler, HandlerSeq from pywb.warcserver.index.aggregator import CacheDirectoryIndexSource, RedisMultiKeyIndexSource
from pywb.warcserver.index.aggregator import GeventTimeoutAggregator, SimpleAggregator
from pywb.webagg.indexsource import FileIndexSource, RemoteIndexSource from pywb.warcserver.handlers import DefaultResourceHandler, HandlerSeq
from pywb.webagg.indexsource import MementoIndexSource, RedisIndexSource
from pywb.webagg.indexsource import LiveIndexSource from pywb.warcserver.index.indexsource import FileIndexSource, RemoteIndexSource
from pywb.webagg.zipnum import ZipNumIndexSource from pywb.warcserver.index.indexsource import MementoIndexSource, RedisIndexSource
from pywb.warcserver.index.indexsource import LiveIndexSource
from pywb.warcserver.index.zipnum import ZipNumIndexSource
from pywb import DEFAULT_CONFIG from pywb import DEFAULT_CONFIG
@ -30,7 +31,7 @@ SOURCE_LIST = [LiveIndexSource,
# ============================================================================ # ============================================================================
class AutoConfigApp(ResAggApp): class WarcServer(BaseWarcServer):
AUTO_DIR_INDEX_PATH = '{coll}/indexes/' AUTO_DIR_INDEX_PATH = '{coll}/indexes/'
AUTO_DIR_ARCHIVE_PATH = '{coll}/archive/' AUTO_DIR_ARCHIVE_PATH = '{coll}/archive/'
@ -51,7 +52,7 @@ class AutoConfigApp(ResAggApp):
custom_config['collections'].update(config['collections']) custom_config['collections'].update(config['collections'])
config.update(custom_config) config.update(custom_config)
super(AutoConfigApp, self).__init__(debug=config.get('debug', False)) super(WarcServer, self).__init__(debug=config.get('debug', False))
self.config = config self.config = config
if self.config.get('enable_auto_colls', True): if self.config.get('enable_auto_colls', True):