1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

renaming pass:

- webagg->warcserver
- setup.py: packages and entry points
- templateview param: 'webrec.template_params' -> 'pywb.template_params'
This commit is contained in:
Ilya Kreymer 2017-08-10 10:24:53 -07:00
parent aa0a019567
commit 903fa6c6a2
9 changed files with 41 additions and 47 deletions

View File

@ -5,10 +5,10 @@ import logging
#=============================================================================
def webagg(args=None):
def warcserver(args=None):
WarcServerCli(args=args,
default_port=8070,
desc='pywb Web Aggregator Server').run()
desc='pywb WarcServer').run()
#=============================================================================

View File

@ -235,7 +235,7 @@ class RewriterApp(object):
memento_dt = r.headers.get('Memento-Datetime')
target_uri = r.headers.get('WARC-Target-URI')
cdx = CDXObject(r.headers.get('Webagg-Cdx').encode('utf-8'))
cdx = CDXObject(r.headers.get('Warcserver-Cdx').encode('utf-8'))
#cdx['urlkey'] = urlkey
#cdx['timestamp'] = http_date_to_timestamp(memento_dt)
@ -589,8 +589,6 @@ class RewriterApp(object):
def _add_custom_params(self, cdx, headers, kwargs):
pass
#if resp_headers.get('Webagg-Source-Live') == '1':
# cdx['is_live'] = 'true'
def get_top_frame_params(self, wb_url, kwargs):
return None

View File

@ -96,7 +96,7 @@ class CollectionFilter(SkipDefaultFilter):
if not rx:
rx = self.rx_accept_map.get('*')
if rx and not rx.match(resp_headers.get('WebAgg-Source-Coll', '')):
if rx and not rx.match(resp_headers.get('Warcserver-Source-Coll', '')):
return True
return False

View File

@ -80,7 +80,7 @@ class TestRecorder(LiveServerTests, FakeRedisTests, TempDirTests, BaseTestClass)
if not recorder_app.write_queue.empty():
recorder_app._write_one()
assert resp.headers['WebAgg-Source-Coll'] == 'live'
assert resp.headers['Warcserver-Source-Coll'] == 'live'
if not link_url:
link_url = unquote(url)

View File

@ -131,7 +131,7 @@ class BaseInsertView(object):
if not template:
template = self.jenv.jinja_env.get_template(self.insert_file)
params = env.get('webrec.template_params')
params = env.get('pywb.template_params')
if params:
kwargs.update(params)
kwargs['env'] = env

View File

@ -50,15 +50,15 @@ class BaseLoader(object):
source = self._get_source_id(cdx)
out_headers = {}
out_headers['WebAgg-Type'] = 'warc'
out_headers['Warcserver-Type'] = 'warc'
out_headers['Content-Type'] = 'application/warc-record'
if params.get('recorder_skip'):
out_headers['Recorder-Skip'] = '1'
cdx['recorder_skip'] = '1'
out_headers['WebAgg-Cdx'] = to_native_str(cdx.to_cdxj().rstrip())
out_headers['WebAgg-Source-Coll'] = to_native_str(source)
out_headers['Warcserver-Cdx'] = to_native_str(cdx.to_cdxj().rstrip())
out_headers['Warcserver-Source-Coll'] = to_native_str(source)
if not warc_headers:
if other_headers:
@ -304,9 +304,9 @@ class LiveWebLoader(BaseLoader):
# then its an error
return None
agg_type = upstream_res.headers.get('WebAgg-Type')
agg_type = upstream_res.headers.get('Warcserver-Type')
if agg_type == 'warc':
cdx['source'] = unquote(upstream_res.headers.get('WebAgg-Source-Coll'))
cdx['source'] = unquote(upstream_res.headers.get('Warcserver-Source-Coll'))
return None, upstream_res.headers, upstream_res
if upstream_res.version == 11:

View File

@ -148,7 +148,7 @@ class TestBaseWarcServer(MementoOverrideTests, FakeRedisTests, BaseTestClass):
headers = {'foo': 'bar'}
resp = self.testapp.get('/live/resource?url=http://httpbin.org/get?foo=bar', headers=headers)
assert resp.headers['WebAgg-Source-Coll'] == 'live'
assert resp.headers['Warcserver-Source-Coll'] == 'live'
self._check_uri_date(resp, 'http://httpbin.org/get?foo=bar', True)
@ -164,7 +164,7 @@ class TestBaseWarcServer(MementoOverrideTests, FakeRedisTests, BaseTestClass):
resp = self.testapp.post('/live/resource?url=http://httpbin.org/post',
OrderedDict([('foo', 'bar')]))
assert resp.headers['WebAgg-Source-Coll'] == 'live'
assert resp.headers['Warcserver-Source-Coll'] == 'live'
self._check_uri_date(resp, 'http://httpbin.org/post', True)
@ -180,7 +180,7 @@ class TestBaseWarcServer(MementoOverrideTests, FakeRedisTests, BaseTestClass):
def test_agg_select_mem_1(self):
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20141001')
assert resp.headers['WebAgg-Source-Coll'] == 'rhiz'
assert resp.headers['Warcserver-Source-Coll'] == 'rhiz'
self._check_uri_date(resp, 'http://www.vvork.com/', '2014-10-06T18:43:57Z')
@ -195,7 +195,7 @@ class TestBaseWarcServer(MementoOverrideTests, FakeRedisTests, BaseTestClass):
def test_agg_select_mem_2(self):
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20151231')
assert resp.headers['WebAgg-Source-Coll'] == 'ia'
assert resp.headers['Warcserver-Source-Coll'] == 'ia'
self._check_uri_date(resp, 'http://vvork.com/', '2016-01-10T13:48:55Z')
@ -209,7 +209,7 @@ class TestBaseWarcServer(MementoOverrideTests, FakeRedisTests, BaseTestClass):
def test_agg_select_mem_unrewrite_headers(self):
resp = self.testapp.get('/cdx_api/resource?closest=20161103124134&url=http://iana.org/')
assert resp.headers['WebAgg-Source-Coll'] == 'ia-cdx'
assert resp.headers['Warcserver-Source-Coll'] == 'ia-cdx'
buff = BytesIO(resp.body)
record = ArcWarcRecordLoader().parse_record_stream(buff, no_record_parse=False)
@ -221,7 +221,7 @@ class TestBaseWarcServer(MementoOverrideTests, FakeRedisTests, BaseTestClass):
def test_agg_select_live(self):
resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=now')
assert resp.headers['WebAgg-Source-Coll'] == 'live'
assert resp.headers['Warcserver-Source-Coll'] == 'live'
self._check_uri_date(resp, 'http://vvork.com/', True)
@ -234,7 +234,7 @@ class TestBaseWarcServer(MementoOverrideTests, FakeRedisTests, BaseTestClass):
def test_agg_select_local(self):
resp = self.testapp.get('/many/resource?url=http://iana.org/&closest=20140126200624')
assert resp.headers['WebAgg-Source-Coll'] == 'local:iana.cdxj'
assert resp.headers['Warcserver-Source-Coll'] == 'local:iana.cdxj'
self._check_uri_date(resp, 'http://www.iana.org/', '2014-01-26T20:06:24Z')
@ -254,7 +254,7 @@ Host: iana.org
resp = self.testapp.post('/many/resource/postreq?url=http://iana.org/&closest=20140126200624', req_data)
assert resp.headers['WebAgg-Source-Coll'] == 'local:iana.cdxj'
assert resp.headers['Warcserver-Source-Coll'] == 'local:iana.cdxj'
self._check_uri_date(resp, 'http://www.iana.org/', '2014-01-26T20:06:24Z')
@ -274,7 +274,7 @@ Host: httpbin.org
resp = self.testapp.post('/many/resource/postreq?url=http://httpbin.org/get?foo=bar&closest=now', req_data)
assert resp.headers['WebAgg-Source-Coll'] == 'live'
assert resp.headers['Warcserver-Source-Coll'] == 'live'
self._check_uri_date(resp, 'http://httpbin.org/get?foo=bar', True)
@ -299,7 +299,7 @@ foo=bar&test=abc"""
resp = self.testapp.post('/posttest/resource/postreq?url=http://httpbin.org/post', req_data)
assert resp.headers['WebAgg-Source-Coll'] == 'post'
assert resp.headers['Warcserver-Source-Coll'] == 'post'
self._check_uri_date(resp, 'http://httpbin.org/post', True)
@ -318,7 +318,7 @@ foo=bar&test=abc"""
resp = self.testapp.post('/fallback/resource?url=http://httpbin.org/post', req_data)
assert resp.headers['WebAgg-Source-Coll'] == 'post'
assert resp.headers['Warcserver-Source-Coll'] == 'post'
self._check_uri_date(resp, 'http://httpbin.org/post', True)
@ -334,7 +334,7 @@ foo=bar&test=abc"""
def test_agg_seq_fallback_1(self):
resp = self.testapp.get('/fallback/resource?url=http://httpbin.org/status/200')
assert resp.headers['WebAgg-Source-Coll'] == 'live'
assert resp.headers['Warcserver-Source-Coll'] == 'live'
self._check_uri_date(resp, 'http://httpbin.org/status/200', True)
@ -347,7 +347,7 @@ foo=bar&test=abc"""
def test_agg_seq_fallback_2(self):
resp = self.testapp.get('/fallback/resource?url=http://www.example.com/')
assert resp.headers['WebAgg-Source-Coll'] == 'example'
assert resp.headers['Warcserver-Source-Coll'] == 'example'
self._check_uri_date(resp, 'http://example.com/', '2016-02-25T04:23:29Z')
@ -364,7 +364,7 @@ foo=bar&test=abc"""
resp = self.testapp.get('/allredis/resource?url=http://www.example.com/')
assert resp.headers['WebAgg-Source-Coll'] == 'example'
assert resp.headers['Warcserver-Source-Coll'] == 'example'
def test_url_agnost(self):
f = FakeStrictRedis.from_url('redis://localhost/2')
@ -375,7 +375,7 @@ foo=bar&test=abc"""
assert resp.status_int == 200
assert resp.headers['Link'] == MementoUtils.make_link('http://test@example.com/', 'original')
assert resp.headers['WebAgg-Source-Coll'] == 'url-agnost'
assert resp.headers['Warcserver-Source-Coll'] == 'url-agnost'
assert resp.headers['Memento-Datetime'] == 'Mon, 29 Jul 2013 19:51:51 GMT'
def test_live_video_loader(self):
@ -385,7 +385,7 @@ foo=bar&test=abc"""
resp = self.testapp.get('/live/resource', params=params)
assert resp.headers['WebAgg-Source-Coll'] == 'live'
assert resp.headers['Warcserver-Source-Coll'] == 'live'
self._check_uri_date(resp, 'metadata://www.youtube.com/v/BfBgWtAIbRc', True)
@ -409,7 +409,7 @@ host: www.youtube.com\
resp = self.testapp.post('/live/resource/postreq?&' + urlencode(params), req_data)
assert resp.headers['WebAgg-Source-Coll'] == 'live'
assert resp.headers['Warcserver-Source-Coll'] == 'live'
self._check_uri_date(resp, 'metadata://www.youtube.com/v/BfBgWtAIbRc', True)
@ -451,7 +451,7 @@ host: www.youtube.com\
def test_agg_local_revisit(self):
resp = self.testapp.get('/many/resource?url=http://www.example.com/&closest=20140127171251&sources=local')
assert resp.headers['WebAgg-Source-Coll'] == 'local:dupes.cdxj'
assert resp.headers['Warcserver-Source-Coll'] == 'local:dupes.cdxj'
buff = BytesIO(resp.body)
status_headers = StatusAndHeadersParser(['WARC/1.0']).parse(buff)

View File

@ -48,7 +48,7 @@ class TestUpstream(LiveServerTests, BaseTestClass):
def test_live_1(self):
resp = requests.get(self.base_url + '/live/resource?url=http://httpbin.org/get', stream=True)
assert resp.headers['WebAgg-Source-Coll'] == 'live'
assert resp.headers['Warcserver-Source-Coll'] == 'live'
record = ArcWarcRecordLoader().parse_record_stream(resp.raw, no_record_parse=False)
assert record.rec_headers.get_header('WARC-Target-URI') == 'http://httpbin.org/get'
@ -56,7 +56,7 @@ class TestUpstream(LiveServerTests, BaseTestClass):
def test_upstream_1(self):
resp = self.testapp.get('/upstream/resource?url=http://httpbin.org/get')
assert resp.headers['WebAgg-Source-Coll'] == 'upstream:live'
assert resp.headers['Warcserver-Source-Coll'] == 'upstream:live'
raw = BytesIO(resp.body)
@ -66,7 +66,7 @@ class TestUpstream(LiveServerTests, BaseTestClass):
def test_upstream_2(self):
resp = self.testapp.get('/upstream_opt/resource?url=http://httpbin.org/get')
assert resp.headers['WebAgg-Source-Coll'] == 'upstream_opt:live', resp.headers
assert resp.headers['Warcserver-Source-Coll'] == 'upstream_opt:live', resp.headers
raw = BytesIO(resp.body)

View File

@ -69,7 +69,7 @@ setup(
url='https://github.com/ikreymer/pywb',
author='Ilya Kreymer',
author_email='ikreymer@gmail.com',
description='Python WayBack for web archive replay and live web proxy',
description='Pywb Webrecorder web archive replay and capture tools',
long_description=long_description,
license='GPL',
packages=find_packages(),
@ -77,17 +77,14 @@ setup(
provides=[
'pywb',
'pywb.utils',
'pywb.cdx',
'pywb.warc',
'pywb.rewrite',
'pywb.framework',
'pywb.manager',
'pywb.perms',
'pywb.webapp',
'pywb.apps',
'pywb.webagg',
'pywb.warcserver',
'pywb.warcserver.index',
'pywb.warcserver.resource',
'pywb.recorder',
'pywb.urlrewrite'
'pywb.rewrite',
'pywb.indexer',
'pywb.manager',
'pywb.apps',
],
package_data={
'pywb': ['static/flowplayer/*', 'static/*.*', 'templates/*', '*.yaml'],
@ -123,8 +120,7 @@ setup(
live-rewrite-server = pywb.apps.cli:live_rewrite_server
cdx-indexer = pywb.indexer.cdxindexer:main
wb-manager = pywb.manager.manager:main_wrap_exc
webagg-server = pywb.apps.cli:webagg
new-wayback = pywb.apps.cli:new_wayback
warcserver = pywb.apps.cli:warcserver
""",
classifiers=[
'Development Status :: 5 - Production/Stable',