from .base_config_test import BaseConfigTest, fmod

from pywb.warcserver.index.cdxobject import CDXObject


# ============================================================================
class TestWbIntegration(BaseConfigTest):
    @classmethod
    def setup_class(cls):
        super(TestWbIntegration, cls).setup_class('config_test.yaml')

    def test_home(self):
        resp = self.testapp.get('/')
        self._assert_basic_html(resp)
        assert '/pywb' in resp.text

    def test_pywb_root(self):
        resp = self.testapp.get('/pywb/')
        self._assert_basic_html(resp)
        assert 'Search' in resp.text

    def test_pywb_root_head(self):
        resp = self.testapp.head('/pywb/')
        assert resp.content_type == 'text/html'
        assert resp.status_int == 200

    def test_pywb_invalid_path(self):
        resp = self.testapp.head('/blah/', status=404)
        assert resp.content_type == 'text/html'
        assert resp.status_int == 404

    def test_calendar_query(self):
        resp = self.testapp.get('/pywb/*/iana.org')
        self._assert_basic_html(resp)
        # 3 Captures + header
        assert len(resp.html.find_all('tr')) == 4

    def test_calendar_query_2(self):
        # unfiltered collection
        resp = self.testapp.get('/pywb/*/http://www.iana.org/_css/2013.1/screen.css')
        self._assert_basic_html(resp)
        # 17 Captures + header
        assert len(resp.html.find_all('tr')) == 18

        # filtered collection
        #resp = self.testapp.get('/pywb-filt/*/http://www.iana.org/_css/2013.1/screen.css')
        #self._assert_basic_html(resp)
        # 1 Capture (filtered) + header
        #assert len(resp.html.find_all('tr')) == 2

    def test_calendar_query_fuzzy_match(self):
        # fuzzy match removing _= according to standard rules.yaml
        resp = self.testapp.get('/pywb/*/http://www.iana.org/_css/2013.1/screen.css?_=3141592653')
        self._assert_basic_html(resp)
        # 17 Captures + header
        assert len(resp.html.find_all('tr')) == 18

    def test_calendar_query_fuzzy_match_add_slash(self):
        # fuzzy match removing _= according to standard rules.yaml
        resp = self.testapp.get('/pywb/*/http://www.iana.org/_css/2013.1/screen.css/?_=3141592653')
        self._assert_basic_html(resp)
        # 17 Captures + header
        assert len(resp.html.find_all('tr')) == 18

    def test_calendar_not_found(self):
        # query with no results
        resp = self.testapp.get('/pywb/*/http://not-exist.example.com')
        self._assert_basic_html(resp)
        assert 'No captures found' in resp.text, resp.text
        assert len(resp.html.find_all('tr')) == 0

    def _test_cdx_query(self):
        resp = self.testapp.get('/pywb/cdx_/*/http://www.iana.org/')
        self._assert_basic_text(resp)

        assert '20140127171238 http://www.iana.org/ warc/revisit - OSSAPWJ23L56IYVRW3GFEAR4MCJMGPTB' in resp
        # check for 3 cdx lines (strip final newline)
        actual_len = len(str(resp.text).rstrip().split('\n'))
        assert actual_len == 3, actual_len

    def test_replay_top_frame(self):
        resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/')

        assert 'new ContentFrame' in resp.text
        assert '"20140127171238"' in resp.text
        assert 'http://www.iana.org/' in resp.text, resp.text

        assert 'Content-Security-Policy' not in resp.headers

    def test_replay_content(self, fmod):
        resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/', fmod)
        self._assert_basic_html(resp)

        assert '"20140127171238"' in resp.text, resp.text
        assert 'wombat.js' in resp.text
        assert 'new _WBWombat' in resp.text, resp.text
        assert '/pywb/20140127171238{0}/http://www.iana.org/time-zones"'.format(fmod) in resp.text

        assert ('wbinfo.is_framed = ' + ('true' if fmod else 'false')) in resp.text

        csp = "default-src 'unsafe-eval' 'unsafe-inline' 'self' data: blob: mediastream: ws: wss: ; form-action 'self'"
        assert resp.headers['Content-Security-Policy'] == csp

    def test_replay_fuzzy_1(self, fmod):
        resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/?_=123', fmod)
        assert resp.status_int == 200
        assert resp.headers['Content-Location'].endswith('/pywb/20140126200624{0}/http://www.iana.org/'.format(fmod))

    def test_replay_no_fuzzy_match(self, fmod):
        resp = self.get('/pywb/20140127171238{0}/http://www.iana.org/?foo=bar', fmod, status=404)
        assert resp.status_int == 404

    def test_no_slash_redir_1(self, fmod):
        resp = self.get('/pywb/20140103030321{0}/http://example.com', fmod)
        assert resp.status_int == 307
        assert resp.headers['Location'].endswith('/pywb/20140103030321{0}/http://example.com/'.format(fmod))

    def test_no_slash_redir_2(self, fmod):
        resp = self.get('/pywb/20140103030321{0}/http://example.com?example=1', fmod)
        assert resp.status_int == 307
        assert resp.headers['Location'].endswith('/pywb/20140103030321{0}/http://example.com/?example=1'.format(fmod))

    def test_replay_cdxj(self, fmod):
        resp = self.get('/pywb-cdxj/20140103030321{0}/http://example.com/?example=1', fmod)
        self._assert_basic_html(resp)

        assert '"20140103030321"' in resp.text
        assert 'wombat.js' in resp.text
        assert '/pywb-cdxj/20140103030321{0}/http://www.iana.org/domains/example'.format(fmod) in resp.text

    def test_replay_cdxj_revisit(self, fmod):
        resp = self.get('/pywb-cdxj/20140103030341{0}/http://example.com/?example=1', fmod)
        self._assert_basic_html(resp)

        assert '"20140103030341"' in resp.text
        assert 'wombat.js' in resp.text
        assert '/pywb-cdxj/20140103030341{0}/http://www.iana.org/domains/example'.format(fmod) in resp.text

    def test_zero_len_revisit(self, fmod):
        resp = self.get('/pywb/20140603030341{0}/http://example.com/?example=2', fmod)
        self._assert_basic_html(resp)

        assert '"20140603030341"' in resp.text
        assert 'wombat.js' in resp.text
        assert '/pywb/20140603030341{0}/http://www.iana.org/domains/example'.format(fmod) in resp.text

    def test_replay_url_agnostic_revisit(self, fmod):
        resp = self.get('/pywb/20130729195151{0}/http://www.example.com/', fmod)
        self._assert_basic_html(resp)

        assert '"20130729195151"' in resp.text
        assert 'wombat.js' in resp.text
        assert '/pywb/20130729195151{0}/http://www.iana.org/domains/example"'.format(fmod) in resp.text

    def test_video_info_not_found(self):
        # not actually archived, but ensure video info path is tested
        resp = self.testapp.get('/pywb/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M', status=404)
        assert resp.status_int == 404

    def _test_replay_cdx_mod(self):
        resp = self.testapp.get('/pywb/20140127171239cdx_/http://www.iana.org/_css/2013.1/print.css')
        self._assert_basic_text(resp)

        lines = resp.text.rstrip().split('\n')
        assert len(lines) == 17
        assert lines[0].startswith('org,iana)/_css/2013.1/print.css 20140127171239')

    def test_replay_banner_only(self):
        resp = self.testapp.get('/pywb/20140126201054bn_/http://www.iana.org/domains/reserved')

        # wombat.js header insertion
        assert 'wombat.js' in resp.text

        # no wombat present
        assert '_WBWombat' not in resp.text

        # url not rewritten
        #assert '"http://www.iana.org/domains/example"' in resp.text
        assert '"/_css/2013.1/screen.css"' in resp.text

    def test_replay_identity_1(self):
        resp = self.testapp.get('/pywb/20140127171251id_/http://example.com/')

        # no wb header insertion
        assert 'wombat.js' not in resp.text

        assert resp.content_length == 1270, resp.content_length

        # original unrewritten url present
        assert '"http://www.iana.org/domains/example"' in resp.text

    def _test_replay_range_cache_content(self):
        headers = [('Range', 'bytes=0-200')]
        resp = self.testapp.get('/pywb/20140127171250id_/http://example.com', headers=headers)

        assert resp.status_int == 206
        assert resp.headers['Accept-Ranges'] == 'bytes'
        assert resp.headers['Content-Range'] == 'bytes 0-200/1270', resp.headers['Content-Range']
        assert resp.content_length == 201, resp.content_length

        assert 'wombat.js' not in resp.text

    def _test_replay_content_ignore_range(self):
        headers = [('Range', 'bytes=0-200')]
        resp = self.testapp.get('/pywb-norange/20140127171251id_/http://example.com', headers=headers)

        # range request ignored
        assert resp.status_int == 200

        # full response
        assert resp.content_length == 1270, resp.content_length

        # identity, no header insertion
        assert 'wombat.js' not in resp.text

    def _test_replay_range_cache_content_bound_end(self):
        headers = [('Range', 'bytes=10-10000')]
        resp = self.testapp.get('/pywb/20140127171251id_/http://example.com', headers=headers)

        assert resp.status_int == 206
        assert resp.headers['Accept-Ranges'] == 'bytes'
        assert resp.headers['Content-Range'] == 'bytes 10-1269/1270', resp.headers['Content-Range']
        assert resp.content_length == 1260, resp.content_length
        assert len(resp.text) == resp.content_length

        assert 'wombat.js' not in resp.text

    def _test_replay_redir_no_cache(self):
        headers = [('Range', 'bytes=10-10000')]
        # Range ignored
        resp = self.testapp.get('/pywb/20140126200927/http://www.iana.org/domains/root/db/', headers=headers)
        assert resp.status_int == 302
        assert resp.content_length == 0

    def test_replay_identity_2_arcgz(self):
        resp = self.testapp.get('/pywb/20140216050221id_/http://arc.gz.test.example.com/')

        # no wb header insertion
        assert 'wombat.js' not in resp.text

        # original unrewritten url present
        assert '"http://www.iana.org/domains/example"' in resp.text

    def test_replay_identity_2_arc(self):
        resp = self.testapp.get('/pywb/20140216050221id_/http://arc.test.example.com/')

        # no wb header insertion
        assert 'wombat.js' not in resp.text

        # original unrewritten url present
        assert '"http://www.iana.org/domains/example"' in resp.text

    def test_replay_content_length_1(self, fmod):
        # test larger file, rewritten file (svg!)
        resp = self.get('/pywb/20140126200654{0}/http://www.iana.org/_img/2013.1/rir-map.svg', fmod)
        assert resp.headers['Content-Length'] == str(len(resp.text))

    def test_replay_css_mod(self):
        resp = self.testapp.get('/pywb/20140127171239cs_/http://www.iana.org/_css/2013.1/screen.css')
        assert resp.status_int == 200
        assert resp.content_type == 'text/css'

    def test_replay_js_mod_no_obj_proxy(self):
        # an empty js file, (ie11 UA no js obj proxy)
        resp = self.testapp.get('/pywb/20140126201054js_/http://www.iana.org/_js/2013.1/iana.js',
            headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'})

        assert resp.status_int == 200
        assert resp.content_length == 0
        assert resp.content_type == 'application/x-javascript'

    def test_replay_js_obj_proxy(self, fmod):
        # test js proxy obj with jquery -- no user agent
        resp = self.get('/pywb/20140126200625{0}/http://www.iana.org/_js/2013.1/jquery.js', fmod)

        assert resp.status_int == 200
        assert resp.content_length != 0
        assert resp.content_type == 'application/x-javascript'

        # test with Chrome user agent
        resp = self.get('/pywb/20140126200625{0}/http://www.iana.org/_js/2013.1/jquery.js', fmod,
                        headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'})
        assert 'let window = _____WB$wombat$assign$function_____(' in resp.text

    def test_replay_js_ie11_no_obj_proxy(self, fmod):
        # IE11 user-agent, no proxy
        resp = self.get('/pywb/20140126200625{0}/http://www.iana.org/_js/2013.1/jquery.js', fmod,
                        headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'})

        assert 'let window = _____WB$wombat$assign$function_____(' not in resp.text

    def test_replay_non_exact(self, fmod):
        # non-exact mode, don't redirect to exact capture
        resp = self.get('/pywb/20140127171237{0}/http://www.iana.org/', fmod)
        assert resp.status_int == 200

        self._assert_basic_html(resp)
        assert '"20140127171237"' in resp.text
        # actual timestamp set in JS
        assert 'timestamp = "20140127171238"' in resp.text
        assert '/pywb/20140127171237{0}/http://www.iana.org/about/'.format(fmod) in resp.text

    def test_replay_remote_ait(self, fmod):
        resp = self.get('/ait:1068/2011{0}/http://www.iana.org/domains/example/', fmod)
        self._assert_basic_html(resp)

        assert '"20120119230023"' in resp.text, resp.text
        assert '<h1>Example Domains</h1>' in resp.text
        assert 'new _WBWombat' in resp.text, resp.text

    def test_latest_replay(self, fmod):
        fmod_slash = fmod + '/' if fmod else ''
        resp = self.get('/pywb/{0}http://example.com/', fmod_slash)
        self._assert_basic_html(resp)

        assert resp.headers['Content-Location'].endswith('/20140127171251{0}/http://example.com'.format(fmod))

        assert '"20140127171251"' in resp.text
        assert '/pywb/{0}http://www.iana.org/domains/example'.format(fmod_slash) in resp.text, resp.text

    def test_replay_non_latest_content_location_ts(self, fmod):
        fmod_slash = fmod + '/' if fmod else ''
        resp = self.get('/pywb/{0}http://example.com/', fmod_slash)
        assert resp.status_int == 200

        assert resp.headers['Content-Location'].endswith('/http://example.com')

        # extract ts, which should be current time
        ts = resp.headers['Content-Location'].rsplit('/http://')[0].rsplit('/', 1)[-1]
        assert ts == '20140127171251{0}'.format(fmod)

        if fmod:
            ts = ts.replace(fmod, '')

        # ensure the current ts is present in the links
        assert '"{0}"'.format(ts) in resp.text
        assert '/pywb/{0}http://www.iana.org/domains/example'.format(fmod_slash) in resp.text

        # ensure ts is current ts
        #assert timestamp_now() >= ts, ts

    def test_refer_redirect(self, fmod):
        # webtest uses Host: localhost:80 by default
        target = 'http://localhost:80/pywb/2014{0}/http://iana.org/_css/2013.1/screen.css'.format(fmod)

        resp = self.get('/_css/2013.1/screen.css', fmod, headers=[('Referer', 'http://localhost:80/pywb/2014{0}/http://iana.org/'.format(fmod))])
        assert resp.status_int == 307
        assert resp.headers['Location'] == target, resp.headers['Location']

        resp = resp.follow()
        assert resp.status_int == 200
        assert resp.headers['Content-Location'].endswith('/pywb/20140127171239{0}/http://www.iana.org/_css/2013.1/screen.css'.format(fmod))
        assert resp.content_type == 'text/css'

    def test_non_exact_replay_skip_self_redir(self, fmod):
        uri = '/pywb/20140126200927{0}/http://www.iana.org/domains/root/db'
        resp = self.get(uri, fmod)
        assert resp.status_int == 200
        assert resp.headers['Content-Location'].endswith('/pywb/20140126200928{0}/http://www.iana.org/domains/root/db'.format(fmod))

    def test_non_exact_replay_skip_self_redir_slash(self, fmod):
        uri = '/pywb/20140126200927{0}/http://www.iana.org/domains/root/db/'
        resp = self.get(uri, fmod)
        assert resp.status_int == 200
        assert resp.headers['Content-Location'].endswith('/pywb/20140126200928{0}/http://www.iana.org/domains/root/db'.format(fmod))

    def test_not_existant_warc_other_capture(self, fmod):
        resp = self.get('/pywb/20140703030321{0}/http://example.com/?example=2', fmod)
        assert resp.status_int == 200
        assert resp.headers['Content-Location'].endswith('/pywb/20140603030341{0}/http://example.com?example=2'.format(fmod))

    def test_missing_revisit_other_capture(self, fmod):
        resp = self.get('/pywb/20140603030351{0}/http://example.com/?example=2', fmod)
        assert resp.status_int == 200
        assert resp.headers['Content-Location'].endswith('/pywb/20140603030341{0}/http://example.com?example=2'.format(fmod))

    def test_not_existant_warc_no_other(self, fmod):
        resp = self.get('/pywb/20140703030321{0}/http://example.com/?example=3', fmod, status=503)
        assert resp.status_int == 503

    def test_missing_revisit_no_other(self, fmod):
        resp = self.get('/pywb/20140603030351{0}/http://example.com/?example=3', fmod, status=503)
        assert resp.status_int == 503

    def test_live_frame(self):
        resp = self.testapp.get('/live/http://example.com/?test=test')
        assert resp.status_int == 200

    def _test_live_redir_1(self):
        resp = self.testapp.get('/live/*/http://example.com/?test=test')
        assert resp.status_int == 302
        assert resp.headers['Location'].endswith('/live/http://example.com/?test=test')

    def _test_live_redir_2(self):
        resp = self.testapp.get('/live/2010-2011/http://example.com/?test=test')
        assert resp.status_int == 302
        assert resp.headers['Location'].endswith('/live/http://example.com/?test=test')

    def test_live_fallback(self, fmod):
        fmod_slash = fmod + '/' if fmod else ''
        resp = self.get('/pywb-fallback/{0}http://example.com/?test=test', fmod_slash)
        assert resp.status_int == 200

    def test_post_1(self, fmod):
        fmod_slash = fmod + '/' if fmod else ''
        resp = self.post('/pywb/{0}httpbin.org/post', fmod_slash, {'foo': 'bar', 'test': 'abc'})

        assert resp.status_int == 200
        assert '"foo": "bar"' in resp.text
        assert '"test": "abc"' in resp.text

    def test_post_2(self, fmod):
        resp = self.post('/pywb/20140610001255{0}/http://httpbin.org/post?foo=bar', fmod, {'data': '^'})
        assert resp.status_int == 200
        assert '"data": "^"' in resp.text

    def test_post_invalid(self, fmod):
        # not json
        resp = self.post_json('/pywb/20140610001255{0}/http://httpbin.org/post?foo=bar', fmod, {'data': '^'}, status=404)
        assert resp.status_int == 404

    def test_post_referer_redirect(self):
        # allowing 307 redirects
        resp = self.post('/post', fmod,
                         {'foo': 'bar', 'test': 'abc'},
                         headers=[('Referer', 'http://localhost:80/pywb/2014{0}/http://httpbin.org/foo'.format(fmod))])

        assert resp.status_int == 307
        assert resp.headers['Location'].endswith('/pywb/2014{0}/http://httpbin.org/post'.format(fmod))

    def _test_excluded_content(self):
        fmod_slash = fmod + '/' if fmod else ''
        resp = self.get('/pywb/{0}http://www.iana.org/_img/bookmark_icon.ico', fmod_slash, status=403)
        assert resp.status_int == 403
        assert 'Excluded' in resp.text

    def test_replay_not_found(self, fmod):
        fmod_slash = fmod + '/' if fmod else ''
        resp = self.head('/pywb/{0}http://not-exist.example.com/', fmod_slash, status=404)
        assert resp.content_type == 'text/html'
        assert resp.status_int == 404

    def test_static_content(self):
        resp = self.testapp.get('/static/default_banner.css')
        assert resp.status_int == 200
        assert resp.content_type == 'text/css'
        assert resp.content_length > 0

    def test_static_content_filewrapper(self):
        from wsgiref.util import FileWrapper
        resp = self.testapp.get('/static/default_banner.css', extra_environ = {'wsgi.file_wrapper': FileWrapper})
        assert resp.status_int == 200
        assert resp.content_type == 'text/css'
        assert resp.content_length > 0

    def test_static_not_found(self):
        resp = self.testapp.get('/static/notfound.css', status = 404)
        assert resp.status_int == 404

    def test_cdx_server_filters(self):
        resp = self.testapp.get('/pywb/cdx?url=http://www.iana.org/_css/2013.1/screen.css&filter=mime:warc/revisit&filter=filename:dupes.warc.gz')
        assert resp.content_type == 'text/x-cdxj'
        actual_len = len(resp.text.rstrip().split('\n'))
        assert actual_len == 1, actual_len

    def test_cdx_server_advanced(self):
        # combine collapsing, reversing and revisit resolving
        resp = self.testapp.get('/pywb/cdx?url=http://www.iana.org/_css/2013.1/print.css&collapseTime=11&resolveRevisits=true&reverse=true')

        # convert back to CDXObject
        cdxs = list(map(CDXObject, resp.body.rstrip().split(b'\n')))
        assert len(cdxs) == 3, len(cdxs)

        # verify timestamps
        timestamps = list(map(lambda cdx: cdx['timestamp'], cdxs))
        assert timestamps == ['20140127171239', '20140126201054', '20140126200625']

        # verify orig filenames (2 revisits, one non)
        origfilenames = list(map(lambda cdx: cdx['orig.filename'], cdxs))
        assert origfilenames == ['iana.warc.gz', 'iana.warc.gz', '-']


    # surt() no longer errors on this in 0.3b
    #def test_error(self):
    #    resp = self.testapp.get('/pywb/?abc', status = 400)
    #    assert resp.status_int == 400
    #    assert 'Invalid Url: http://?abc' in resp.text


    def test_coll_info_json(self):
        resp = self.testapp.get('/collinfo.json')
        assert resp.content_type == 'application/json'
        value = resp.json
        assert len(value['fixed']) == 5
        assert len(value['dynamic']) == 0

   #def test_invalid_config(self):
    #    with raises(IOError):
    #        init_app(create_wb_router,
    #                 load_yaml=True,
    #                 config_file='x-invalid-x')