diff --git a/pywb/apps/rewriterapp.py b/pywb/apps/rewriterapp.py index ebb60555..b23e948e 100644 --- a/pywb/apps/rewriterapp.py +++ b/pywb/apps/rewriterapp.py @@ -539,41 +539,19 @@ class RewriterApp(object): content_type=content_type, status=status) - def handle_query(self, environ, wb_url, kwargs, full_prefix): - res = self.do_query(wb_url, kwargs) - + def handle_timemap(self, wb_url, kwargs, full_prefix): output = kwargs.get('output') - if output: - return self.make_timemap(wb_url, res, full_prefix, output) - - def format_cdx(text): - cdx_lines = text.rstrip().split('\n') - for cdx in cdx_lines: - if not cdx: - continue - - cdx = json.loads(cdx) - self.process_query_cdx(cdx, wb_url, kwargs) - yield cdx + res = self.do_query(wb_url, kwargs) + return self.make_timemap(wb_url, res, full_prefix, output) + def handle_query(self, environ, wb_url, kwargs, full_prefix): prefix = self.get_full_prefix(environ) params = dict(url=wb_url.url, - prefix=prefix, - cdx_lines=list(format_cdx(res.text))) - - extra_params = self.get_query_params(wb_url, kwargs) - if extra_params: - params.update(extra_params) + prefix=prefix) return self.query_view.render_to_string(environ, **params) - def process_query_cdx(self, cdx, wb_url, kwargs): - return - - def get_query_params(self, wb_url, kwargs): - return None - def get_host_prefix(self, environ): scheme = environ['wsgi.url_scheme'] + '://' @@ -648,7 +626,10 @@ class RewriterApp(object): return None def handle_custom_response(self, environ, wb_url, full_prefix, host_prefix, kwargs): - if wb_url.is_query() or kwargs.get('output'): + if kwargs.get('output'): + return self.handle_timemap(wb_url, kwargs, full_prefix) + + if wb_url.is_query(): return self.handle_query(environ, wb_url, kwargs, full_prefix) if self.is_framed_replay(wb_url): diff --git a/pywb/static/query.js b/pywb/static/query.js new file mode 100644 index 00000000..9de8d728 --- /dev/null +++ b/pywb/static/query.js @@ -0,0 +1,147 @@ +var Text = { + months: { + '01': "January", + '02': "February", + '03': "March", + '04': "April", + '05': "May", + '06': "June", + '07': "July", + '08': "August", + '09': "September", + '10': "October", + '11': "November", + '12': "December", + } +}; + +function RenderCalendar(prefix, url) { + var years = []; + + function ts_to_date(ts, is_gmt) + { + if (ts.length < 14) { + return ts; + } + + var datestr = (ts.substring(0, 4) + "-" + + ts.substring(4, 6) + "-" + + ts.substring(6, 8) + "T" + + ts.substring(8, 10) + ":" + + ts.substring(10, 12) + ":" + + ts.substring(12, 14) + "-00:00"); + + var date = new Date(datestr); + if (is_gmt) { + return date.toGMTString(); + } else { + return date.toLocaleString(); + } + } + + function getYearTs(ts){ + return ts.substring(0, 4); + } + + function getMonthTs(ts){ + return ts.substring(4, 6); + } + + function getDayTs(ts){ + var day = ts.substring(6, 8); + if (day.charAt(0) == '0') { + day = day.charAt(1); + } + return day; + } + + function getHoursMinutesTs(ts){ + return ts.substring(8, 10) + ':' + ts.substring(10, 12); + } + + + function getMonthName(ts){ + var month = getMonthTs(ts); + return Text.months[month]; + } + + /*Get unique values from array*/ + function uniques(arr) { + var a = []; + for (var i = 0, l = arr.length; i < l; i++) { + if (a.indexOf(arr[i]) === -1 && arr[i] !== '') { + a.push(arr[i]); + } + } + return a; + } + + function init() { + $.ajax(prefix + "cdx", { + data: {"url": url, "output": "json"}, + dataType: "text", + success: function(data) { + processAll(data.trim().split("\n")); + } + }); + } + + function processAll(cdxLines) { + $("#count").text(cdxLines.length); + for (var i = 0; i < cdxLines.length; i++) { + var obj = JSON.parse(cdxLines[i]); + processUrl(prefix, obj.timestamp, obj.url); + } + yearCount(); + handleClicks(); + } + + function processUrl(prefix, ts, url) { + var currentYear = getYearTs(ts); + years.push(currentYear); + + var currentMonth = getMonthName(ts); + var currentDay = getDayTs(ts); + var currentHoursMinutes = getHoursMinutesTs(ts); + + if (! $('#year_' + currentYear).length){ + $("#captureYears").append('

' + getYearTs(ts) + '

'); /*insert year div if it does not exist*/ + } + + if (! $('#month_' + currentYear + '_' + currentMonth).length){ + $('#months_' + currentYear).append('
'+ currentMonth + '
'); /*insert month div if it does not exist*/ + } + + //always insert current capture, assuming no duplicates + $('#days_' + currentYear + '_' + currentMonth).append('
' + currentDay + ' ' + currentMonth + ' at ' + currentHoursMinutes + '
'); /*insert month div if it does not exist*/ + } + + function yearCount() { + //Insert number of versions for each year + years = uniques(years); //get list of years with versions + var numberofVersions; + var versionsString; + + for (var i = 0; i < years.length; i++) { + numberofVersions = $('#year_' + years[i].toString()).parent().next().find(".day").length; + numberofVersionsString = numberofVersions == 1 ? numberofVersions + " version " : numberofVersions + " versions "; + $('#' + years[i] + '_right').prepend(numberofVersionsString); + } + } + + // Init + function handleClicks() { + $(".year").click(function() { + $(this).find(".yearCarret").toggleClass('fa-caret-up fa-caret-down'); + $(this).parent().next().slideToggle( "fast", "linear" ); + }); + $(".month").click(function() { + $(this).find(".monthCarret").toggleClass('fa-caret-up fa-caret-down'); + $(this).parent().next().slideToggle( "fast", "linear" ); + }); + }; + + init(); + +}; + diff --git a/pywb/templates/query.html b/pywb/templates/query.html index 5ead175f..d45081f5 100644 --- a/pywb/templates/query.html +++ b/pywb/templates/query.html @@ -6,134 +6,12 @@ - +

pywb Query Results

- {% if cdx_lines | length > 0 %} -

{{ cdx_lines | length }} captures of {{ url }}

-
- - {% for cdx in cdx_lines %} - - {% endfor %} - -
- {% else %} - No captures found for {{ url }} - {% endif %} - - +

captures of {{ url }}

+
+ diff --git a/tests/test_integration.py b/tests/test_integration.py index 3191dec0..4a9de768 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -33,14 +33,14 @@ class TestWbIntegration(BaseConfigTest): resp = self.testapp.get('/pywb/*/iana.org') self._assert_basic_html(resp) # 3 Captures + header - assert len(resp.html.find_all('tr')) == 4 + #assert len(resp.html.find_all('tr')) == 4 def test_calendar_query_2(self): # unfiltered collection resp = self.testapp.get('/pywb/*/http://www.iana.org/_css/2013.1/screen.css') self._assert_basic_html(resp) # 17 Captures + header - assert len(resp.html.find_all('tr')) == 18 + #assert len(resp.html.find_all('tr')) == 18 # filtered collection #resp = self.testapp.get('/pywb-filt/*/http://www.iana.org/_css/2013.1/screen.css') @@ -48,35 +48,29 @@ class TestWbIntegration(BaseConfigTest): # 1 Capture (filtered) + header #assert len(resp.html.find_all('tr')) == 2 - def test_calendar_query_fuzzy_match(self): + def test_cdxj_query_fuzzy_match(self): # fuzzy match removing _= according to standard rules.yaml - resp = self.testapp.get('/pywb/*/http://www.iana.org/_css/2013.1/screen.css?_=3141592653') - self._assert_basic_html(resp) - # 17 Captures + header - assert len(resp.html.find_all('tr')) == 18 + resp = self.testapp.get('/pywb/cdx?url=http://www.iana.org/_css/2013.1/screen.css%3F_=3141592653') + assert len(resp.text.rstrip().split('\n')) == 17 - def test_calendar_query_fuzzy_match_add_slash(self): + def test_cdxj_query_fuzzy_match_add_slash(self): # fuzzy match removing _= according to standard rules.yaml - resp = self.testapp.get('/pywb/*/http://www.iana.org/_css/2013.1/screen.css/?_=3141592653') - self._assert_basic_html(resp) + resp = self.testapp.get('/pywb/cdx?url=http://www.iana.org/_css/2013.1/screen.css/%3F_=3141592653') # 17 Captures + header - assert len(resp.html.find_all('tr')) == 18 + assert len(resp.text.rstrip().split('\n')) == 17 - def test_calendar_not_found(self): + def test_cdxj_not_found(self): # query with no results - resp = self.testapp.get('/pywb/*/http://not-exist.example.com') - self._assert_basic_html(resp) - assert 'No captures found' in resp.text, resp.text - assert len(resp.html.find_all('tr')) == 0 + resp = self.testapp.get('/pywb/cdx?url=http://not-exist.example.com') + assert resp.text == '' - def _test_cdx_query(self): - resp = self.testapp.get('/pywb/cdx_/*/http://www.iana.org/') - self._assert_basic_text(resp) + def test_cdxj_query(self): + resp = self.testapp.get('/pywb/cdx?url=http://www.iana.org/') + + assert 'org,iana)/ 20140126200624 {"url": "http://www.iana.org/", "mime": "text/html", "status": "200", "digest": "OSSAPWJ23L56IYVRW3GFEAR4MCJMGPTB"' in resp.text - assert '20140127171238 http://www.iana.org/ warc/revisit - OSSAPWJ23L56IYVRW3GFEAR4MCJMGPTB' in resp # check for 3 cdx lines (strip final newline) - actual_len = len(str(resp.text).rstrip().split('\n')) - assert actual_len == 3, actual_len + assert len(resp.text.rstrip().split('\n')) == 3 def test_replay_top_frame(self): resp = self.testapp.get('/pywb/20140127171238/http://www.iana.org/') @@ -166,14 +160,6 @@ class TestWbIntegration(BaseConfigTest): resp = self.testapp.get('/pywb/vi_/https://www.youtube.com/watch?v=DjFZyFWSt1M', status=404) assert resp.status_int == 404 - def _test_replay_cdx_mod(self): - resp = self.testapp.get('/pywb/20140127171239cdx_/http://www.iana.org/_css/2013.1/print.css') - self._assert_basic_text(resp) - - lines = resp.text.rstrip().split('\n') - assert len(lines) == 17 - assert lines[0].startswith('org,iana)/_css/2013.1/print.css 20140127171239') - def test_replay_banner_only(self): resp = self.testapp.get('/pywb/20140126201054bn_/http://www.iana.org/domains/reserved')