From fb07775d380b28ea7338a7830fc62d41d533f297 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 25 Jun 2014 12:32:57 -0700 Subject: [PATCH] tests: add 'bad.cdx' for testing cdx lines with missing original for revisit, missing/non-existant warc --- pywb/cdx/test/test_cdxops.py | 2 +- sample_archive/cdx/bad.cdx | 5 +++++ tests/test_integration.py | 17 +++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 sample_archive/cdx/bad.cdx diff --git a/pywb/cdx/test/test_cdxops.py b/pywb/cdx/test/test_cdxops.py index 81ab4660..86c2fce8 100644 --- a/pywb/cdx/test/test_cdxops.py +++ b/pywb/cdx/test/test_cdxops.py @@ -54,7 +54,7 @@ com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 20 com,example)/?example=1 20140103030341 http://example.com?example=1 warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 553 1864 example.warc.gz # Filter exact invert ->>> cdx_ops_test(url = 'http://example.com', sources = [test_cdx_dir], matchType = 'prefix', filter = ['!=urlkey:com,example)/?example=1', '!=urlkey:com,example)/?example=2']) +>>> cdx_ops_test(url = 'http://example.com', sources = [test_cdx_dir], matchType = 'prefix', filter = ['!=urlkey:com,example)/?example=1', '!=urlkey:com,example)/?example=2', '!=urlkey:com,example)/?example=3']) com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 example-url-agnostic-revisit.warc.gz com,example)/ 20140127171200 http://example.com text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1046 334 dupes.warc.gz com,example)/ 20140127171251 http://example.com warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 553 11875 dupes.warc.gz diff --git a/sample_archive/cdx/bad.cdx b/sample_archive/cdx/bad.cdx new file mode 100644 index 00000000..deba0e96 --- /dev/null +++ b/sample_archive/cdx/bad.cdx @@ -0,0 +1,5 @@ + CDX N b a m s k r M S V g +com,example)/?example=2 20140703030321 http://example.com?example=2 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1987 0 non-existent.warc +com,example)/?example=2 20140603030351 http://example.com?example=2 warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36B - - 504 2701 example-extra.warc +com,example)/?example=3 20140703030321 http://example.com?example=3 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1987 0 non-existent.warc +com,example)/?example=3 20140603030351 http://example.com?example=3 warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36B - - 504 2701 example-extra.warc diff --git a/tests/test_integration.py b/tests/test_integration.py index 107ae2bb..7e915acd 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -240,6 +240,23 @@ class TestWb: resp = self.testapp.get(uri, headers = [('Referer', referrer), ('Host', host)], status = 302) assert resp.status_int == 302 + def test_not_existant_warc_other_capture(self): + resp = self.testapp.get('/pywb/20140703030321mp_/http://example.com?example=2') + assert resp.status_int == 302 + assert resp.headers['Location'].endswith('/pywb/20140603030341mp_/http://example.com?example=2') + + def test_missing_revisit_other_capture(self): + resp = self.testapp.get('/pywb/20140603030351mp_/http://example.com?example=2') + assert resp.status_int == 302 + assert resp.headers['Location'].endswith('/pywb/20140603030341mp_/http://example.com?example=2') + + def test_not_existant_warc_no_other(self): + resp = self.testapp.get('/pywb/20140703030321mp_/http://example.com?example=3', status = 503) + assert resp.status_int == 503 + + def test_missing_revisit_no_other(self): + resp = self.testapp.get('/pywb/20140603030351mp_/http://example.com?example=3', status = 503) + assert resp.status_int == 503 def test_post_1(self): resp = self.testapp.post('/pywb/mp_/httpbin.org/post', {'foo': 'bar', 'test': 'abc'})