mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-31 19:24:11 +02:00
- ResAggApp -> BaseWarcServer - AutoApp -> WarcServer - move index related files to warcserver.index package, tests to warcserver.index.test - move resource loading related files to warcserver.resource package, tests to warcserver.resource.test - pywb.cdx -> pywb.warcserver.index - split pywb.warc -> pywb.warcserver.resource or pywb.indexer (for cdx generation) - bump to 0.51.0 for now! - tests for pywb.warcserver should be working
57 lines
2.0 KiB
Python
57 lines
2.0 KiB
Python
from warcio.timeutils import timestamp_now
|
|
|
|
from pywb.utils.wbexception import NotFoundException
|
|
|
|
from pywb.warcserver.index.cdxobject import CDXObject
|
|
from pywb.warcserver.index.indexsource import BaseIndexSource, RemoteIndexSource
|
|
from pywb.warcserver.resource.responseloader import LiveWebLoader
|
|
from pywb.warcserver.utils import ParamFormatter, res_template
|
|
|
|
|
|
#=============================================================================
|
|
class UpstreamAggIndexSource(RemoteIndexSource):
|
|
def __init__(self, base_url):
|
|
api_url = base_url + '/index?url={url}'
|
|
proxy_url = base_url + '/resource?url={url}&closest={timestamp}'
|
|
super(UpstreamAggIndexSource, self).__init__(api_url, proxy_url, 'filename')
|
|
|
|
def _set_load_url(self, cdx, params):
|
|
super(UpstreamAggIndexSource, self)._set_load_url(cdx, params)
|
|
cdx['offset'] = '0'
|
|
cdx.pop('load_url', '')
|
|
|
|
|
|
#=============================================================================
|
|
class UpstreamMementoIndexSource(BaseIndexSource):
|
|
def __init__(self, proxy_url='{url}'):
|
|
self.proxy_url = proxy_url
|
|
self.loader = LiveWebLoader()
|
|
|
|
def load_index(self, params):
|
|
cdx = CDXObject()
|
|
cdx['urlkey'] = params.get('key').decode('utf-8')
|
|
|
|
closest = params.get('closest')
|
|
cdx['timestamp'] = closest if closest else timestamp_now()
|
|
cdx['url'] = params['url']
|
|
cdx['load_url'] = res_template(self.proxy_url, params)
|
|
cdx['memento_url'] = cdx['load_url']
|
|
return self._do_load(cdx, params)
|
|
|
|
def _do_load(self, cdx, params):
|
|
result = self.loader.load_resource(cdx, params)
|
|
if not result:
|
|
raise NotFoundException('Not a memento: ' + cdx['url'])
|
|
|
|
cdx['_cached_result'] = result
|
|
yield cdx
|
|
|
|
def __str__(self):
|
|
return 'upstream'
|
|
|
|
@staticmethod
|
|
def upstream_resource(base_url):
|
|
return UpstreamMementoIndexSource(base_url + '/resource?url={url}&closest={closest}')
|
|
|
|
|