diff --git a/pywb/apps/cli.py b/pywb/apps/cli.py index 53564921..12ee7cbd 100644 --- a/pywb/apps/cli.py +++ b/pywb/apps/cli.py @@ -5,10 +5,10 @@ import logging #============================================================================= -def webagg(args=None): +def warcserver(args=None): WarcServerCli(args=args, default_port=8070, - desc='pywb Web Aggregator Server').run() + desc='pywb WarcServer').run() #============================================================================= diff --git a/pywb/apps/rewriterapp.py b/pywb/apps/rewriterapp.py index 25c6e2f7..1b846bcf 100644 --- a/pywb/apps/rewriterapp.py +++ b/pywb/apps/rewriterapp.py @@ -235,7 +235,7 @@ class RewriterApp(object): memento_dt = r.headers.get('Memento-Datetime') target_uri = r.headers.get('WARC-Target-URI') - cdx = CDXObject(r.headers.get('Webagg-Cdx').encode('utf-8')) + cdx = CDXObject(r.headers.get('Warcserver-Cdx').encode('utf-8')) #cdx['urlkey'] = urlkey #cdx['timestamp'] = http_date_to_timestamp(memento_dt) @@ -589,8 +589,6 @@ class RewriterApp(object): def _add_custom_params(self, cdx, headers, kwargs): pass - #if resp_headers.get('Webagg-Source-Live') == '1': - # cdx['is_live'] = 'true' def get_top_frame_params(self, wb_url, kwargs): return None diff --git a/pywb/recorder/filters.py b/pywb/recorder/filters.py index dd8cb45c..ef8c987e 100644 --- a/pywb/recorder/filters.py +++ b/pywb/recorder/filters.py @@ -96,7 +96,7 @@ class CollectionFilter(SkipDefaultFilter): if not rx: rx = self.rx_accept_map.get('*') - if rx and not rx.match(resp_headers.get('WebAgg-Source-Coll', '')): + if rx and not rx.match(resp_headers.get('Warcserver-Source-Coll', '')): return True return False diff --git a/pywb/recorder/test/test_recorder.py b/pywb/recorder/test/test_recorder.py index 1ffc77c7..02268452 100644 --- a/pywb/recorder/test/test_recorder.py +++ b/pywb/recorder/test/test_recorder.py @@ -80,7 +80,7 @@ class TestRecorder(LiveServerTests, FakeRedisTests, TempDirTests, BaseTestClass) if not recorder_app.write_queue.empty(): recorder_app._write_one() - assert resp.headers['WebAgg-Source-Coll'] == 'live' + assert resp.headers['Warcserver-Source-Coll'] == 'live' if not link_url: link_url = unquote(url) diff --git a/pywb/rewrite/templateview.py b/pywb/rewrite/templateview.py index 2d94fe89..4023485b 100644 --- a/pywb/rewrite/templateview.py +++ b/pywb/rewrite/templateview.py @@ -131,7 +131,7 @@ class BaseInsertView(object): if not template: template = self.jenv.jinja_env.get_template(self.insert_file) - params = env.get('webrec.template_params') + params = env.get('pywb.template_params') if params: kwargs.update(params) kwargs['env'] = env diff --git a/pywb/warcserver/resource/responseloader.py b/pywb/warcserver/resource/responseloader.py index 445f680c..51fcae34 100644 --- a/pywb/warcserver/resource/responseloader.py +++ b/pywb/warcserver/resource/responseloader.py @@ -50,15 +50,15 @@ class BaseLoader(object): source = self._get_source_id(cdx) out_headers = {} - out_headers['WebAgg-Type'] = 'warc' + out_headers['Warcserver-Type'] = 'warc' out_headers['Content-Type'] = 'application/warc-record' if params.get('recorder_skip'): out_headers['Recorder-Skip'] = '1' cdx['recorder_skip'] = '1' - out_headers['WebAgg-Cdx'] = to_native_str(cdx.to_cdxj().rstrip()) - out_headers['WebAgg-Source-Coll'] = to_native_str(source) + out_headers['Warcserver-Cdx'] = to_native_str(cdx.to_cdxj().rstrip()) + out_headers['Warcserver-Source-Coll'] = to_native_str(source) if not warc_headers: if other_headers: @@ -304,9 +304,9 @@ class LiveWebLoader(BaseLoader): # then its an error return None - agg_type = upstream_res.headers.get('WebAgg-Type') + agg_type = upstream_res.headers.get('Warcserver-Type') if agg_type == 'warc': - cdx['source'] = unquote(upstream_res.headers.get('WebAgg-Source-Coll')) + cdx['source'] = unquote(upstream_res.headers.get('Warcserver-Source-Coll')) return None, upstream_res.headers, upstream_res if upstream_res.version == 11: diff --git a/pywb/warcserver/test/test_handlers.py b/pywb/warcserver/test/test_handlers.py index 7057c77f..79e2075c 100644 --- a/pywb/warcserver/test/test_handlers.py +++ b/pywb/warcserver/test/test_handlers.py @@ -148,7 +148,7 @@ class TestBaseWarcServer(MementoOverrideTests, FakeRedisTests, BaseTestClass): headers = {'foo': 'bar'} resp = self.testapp.get('/live/resource?url=http://httpbin.org/get?foo=bar', headers=headers) - assert resp.headers['WebAgg-Source-Coll'] == 'live' + assert resp.headers['Warcserver-Source-Coll'] == 'live' self._check_uri_date(resp, 'http://httpbin.org/get?foo=bar', True) @@ -164,7 +164,7 @@ class TestBaseWarcServer(MementoOverrideTests, FakeRedisTests, BaseTestClass): resp = self.testapp.post('/live/resource?url=http://httpbin.org/post', OrderedDict([('foo', 'bar')])) - assert resp.headers['WebAgg-Source-Coll'] == 'live' + assert resp.headers['Warcserver-Source-Coll'] == 'live' self._check_uri_date(resp, 'http://httpbin.org/post', True) @@ -180,7 +180,7 @@ class TestBaseWarcServer(MementoOverrideTests, FakeRedisTests, BaseTestClass): def test_agg_select_mem_1(self): resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20141001') - assert resp.headers['WebAgg-Source-Coll'] == 'rhiz' + assert resp.headers['Warcserver-Source-Coll'] == 'rhiz' self._check_uri_date(resp, 'http://www.vvork.com/', '2014-10-06T18:43:57Z') @@ -195,7 +195,7 @@ class TestBaseWarcServer(MementoOverrideTests, FakeRedisTests, BaseTestClass): def test_agg_select_mem_2(self): resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=20151231') - assert resp.headers['WebAgg-Source-Coll'] == 'ia' + assert resp.headers['Warcserver-Source-Coll'] == 'ia' self._check_uri_date(resp, 'http://vvork.com/', '2016-01-10T13:48:55Z') @@ -209,7 +209,7 @@ class TestBaseWarcServer(MementoOverrideTests, FakeRedisTests, BaseTestClass): def test_agg_select_mem_unrewrite_headers(self): resp = self.testapp.get('/cdx_api/resource?closest=20161103124134&url=http://iana.org/') - assert resp.headers['WebAgg-Source-Coll'] == 'ia-cdx' + assert resp.headers['Warcserver-Source-Coll'] == 'ia-cdx' buff = BytesIO(resp.body) record = ArcWarcRecordLoader().parse_record_stream(buff, no_record_parse=False) @@ -221,7 +221,7 @@ class TestBaseWarcServer(MementoOverrideTests, FakeRedisTests, BaseTestClass): def test_agg_select_live(self): resp = self.testapp.get('/many/resource?url=http://vvork.com/&closest=now') - assert resp.headers['WebAgg-Source-Coll'] == 'live' + assert resp.headers['Warcserver-Source-Coll'] == 'live' self._check_uri_date(resp, 'http://vvork.com/', True) @@ -234,7 +234,7 @@ class TestBaseWarcServer(MementoOverrideTests, FakeRedisTests, BaseTestClass): def test_agg_select_local(self): resp = self.testapp.get('/many/resource?url=http://iana.org/&closest=20140126200624') - assert resp.headers['WebAgg-Source-Coll'] == 'local:iana.cdxj' + assert resp.headers['Warcserver-Source-Coll'] == 'local:iana.cdxj' self._check_uri_date(resp, 'http://www.iana.org/', '2014-01-26T20:06:24Z') @@ -254,7 +254,7 @@ Host: iana.org resp = self.testapp.post('/many/resource/postreq?url=http://iana.org/&closest=20140126200624', req_data) - assert resp.headers['WebAgg-Source-Coll'] == 'local:iana.cdxj' + assert resp.headers['Warcserver-Source-Coll'] == 'local:iana.cdxj' self._check_uri_date(resp, 'http://www.iana.org/', '2014-01-26T20:06:24Z') @@ -274,7 +274,7 @@ Host: httpbin.org resp = self.testapp.post('/many/resource/postreq?url=http://httpbin.org/get?foo=bar&closest=now', req_data) - assert resp.headers['WebAgg-Source-Coll'] == 'live' + assert resp.headers['Warcserver-Source-Coll'] == 'live' self._check_uri_date(resp, 'http://httpbin.org/get?foo=bar', True) @@ -299,7 +299,7 @@ foo=bar&test=abc""" resp = self.testapp.post('/posttest/resource/postreq?url=http://httpbin.org/post', req_data) - assert resp.headers['WebAgg-Source-Coll'] == 'post' + assert resp.headers['Warcserver-Source-Coll'] == 'post' self._check_uri_date(resp, 'http://httpbin.org/post', True) @@ -318,7 +318,7 @@ foo=bar&test=abc""" resp = self.testapp.post('/fallback/resource?url=http://httpbin.org/post', req_data) - assert resp.headers['WebAgg-Source-Coll'] == 'post' + assert resp.headers['Warcserver-Source-Coll'] == 'post' self._check_uri_date(resp, 'http://httpbin.org/post', True) @@ -334,7 +334,7 @@ foo=bar&test=abc""" def test_agg_seq_fallback_1(self): resp = self.testapp.get('/fallback/resource?url=http://httpbin.org/status/200') - assert resp.headers['WebAgg-Source-Coll'] == 'live' + assert resp.headers['Warcserver-Source-Coll'] == 'live' self._check_uri_date(resp, 'http://httpbin.org/status/200', True) @@ -347,7 +347,7 @@ foo=bar&test=abc""" def test_agg_seq_fallback_2(self): resp = self.testapp.get('/fallback/resource?url=http://www.example.com/') - assert resp.headers['WebAgg-Source-Coll'] == 'example' + assert resp.headers['Warcserver-Source-Coll'] == 'example' self._check_uri_date(resp, 'http://example.com/', '2016-02-25T04:23:29Z') @@ -364,7 +364,7 @@ foo=bar&test=abc""" resp = self.testapp.get('/allredis/resource?url=http://www.example.com/') - assert resp.headers['WebAgg-Source-Coll'] == 'example' + assert resp.headers['Warcserver-Source-Coll'] == 'example' def test_url_agnost(self): f = FakeStrictRedis.from_url('redis://localhost/2') @@ -375,7 +375,7 @@ foo=bar&test=abc""" assert resp.status_int == 200 assert resp.headers['Link'] == MementoUtils.make_link('http://test@example.com/', 'original') - assert resp.headers['WebAgg-Source-Coll'] == 'url-agnost' + assert resp.headers['Warcserver-Source-Coll'] == 'url-agnost' assert resp.headers['Memento-Datetime'] == 'Mon, 29 Jul 2013 19:51:51 GMT' def test_live_video_loader(self): @@ -385,7 +385,7 @@ foo=bar&test=abc""" resp = self.testapp.get('/live/resource', params=params) - assert resp.headers['WebAgg-Source-Coll'] == 'live' + assert resp.headers['Warcserver-Source-Coll'] == 'live' self._check_uri_date(resp, 'metadata://www.youtube.com/v/BfBgWtAIbRc', True) @@ -409,7 +409,7 @@ host: www.youtube.com\ resp = self.testapp.post('/live/resource/postreq?&' + urlencode(params), req_data) - assert resp.headers['WebAgg-Source-Coll'] == 'live' + assert resp.headers['Warcserver-Source-Coll'] == 'live' self._check_uri_date(resp, 'metadata://www.youtube.com/v/BfBgWtAIbRc', True) @@ -451,7 +451,7 @@ host: www.youtube.com\ def test_agg_local_revisit(self): resp = self.testapp.get('/many/resource?url=http://www.example.com/&closest=20140127171251&sources=local') - assert resp.headers['WebAgg-Source-Coll'] == 'local:dupes.cdxj' + assert resp.headers['Warcserver-Source-Coll'] == 'local:dupes.cdxj' buff = BytesIO(resp.body) status_headers = StatusAndHeadersParser(['WARC/1.0']).parse(buff) diff --git a/pywb/warcserver/test/test_upstream.py b/pywb/warcserver/test/test_upstream.py index 6b3b689d..b66d68a6 100644 --- a/pywb/warcserver/test/test_upstream.py +++ b/pywb/warcserver/test/test_upstream.py @@ -48,7 +48,7 @@ class TestUpstream(LiveServerTests, BaseTestClass): def test_live_1(self): resp = requests.get(self.base_url + '/live/resource?url=http://httpbin.org/get', stream=True) - assert resp.headers['WebAgg-Source-Coll'] == 'live' + assert resp.headers['Warcserver-Source-Coll'] == 'live' record = ArcWarcRecordLoader().parse_record_stream(resp.raw, no_record_parse=False) assert record.rec_headers.get_header('WARC-Target-URI') == 'http://httpbin.org/get' @@ -56,7 +56,7 @@ class TestUpstream(LiveServerTests, BaseTestClass): def test_upstream_1(self): resp = self.testapp.get('/upstream/resource?url=http://httpbin.org/get') - assert resp.headers['WebAgg-Source-Coll'] == 'upstream:live' + assert resp.headers['Warcserver-Source-Coll'] == 'upstream:live' raw = BytesIO(resp.body) @@ -66,7 +66,7 @@ class TestUpstream(LiveServerTests, BaseTestClass): def test_upstream_2(self): resp = self.testapp.get('/upstream_opt/resource?url=http://httpbin.org/get') - assert resp.headers['WebAgg-Source-Coll'] == 'upstream_opt:live', resp.headers + assert resp.headers['Warcserver-Source-Coll'] == 'upstream_opt:live', resp.headers raw = BytesIO(resp.body) diff --git a/setup.py b/setup.py index e9a14770..a3b5d3ae 100755 --- a/setup.py +++ b/setup.py @@ -69,7 +69,7 @@ setup( url='https://github.com/ikreymer/pywb', author='Ilya Kreymer', author_email='ikreymer@gmail.com', - description='Python WayBack for web archive replay and live web proxy', + description='Pywb Webrecorder web archive replay and capture tools', long_description=long_description, license='GPL', packages=find_packages(), @@ -77,17 +77,14 @@ setup( provides=[ 'pywb', 'pywb.utils', - 'pywb.cdx', - 'pywb.warc', - 'pywb.rewrite', - 'pywb.framework', - 'pywb.manager', - 'pywb.perms', - 'pywb.webapp', - 'pywb.apps', - 'pywb.webagg', + 'pywb.warcserver', + 'pywb.warcserver.index', + 'pywb.warcserver.resource', 'pywb.recorder', - 'pywb.urlrewrite' + 'pywb.rewrite', + 'pywb.indexer', + 'pywb.manager', + 'pywb.apps', ], package_data={ 'pywb': ['static/flowplayer/*', 'static/*.*', 'templates/*', '*.yaml'], @@ -123,8 +120,7 @@ setup( live-rewrite-server = pywb.apps.cli:live_rewrite_server cdx-indexer = pywb.indexer.cdxindexer:main wb-manager = pywb.manager.manager:main_wrap_exc - webagg-server = pywb.apps.cli:webagg - new-wayback = pywb.apps.cli:new_wayback + warcserver = pywb.apps.cli:warcserver """, classifiers=[ 'Development Status :: 5 - Production/Stable',