mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
tests: add tests for indexing http custom status/verbs with and without verify #99
This commit is contained in:
parent
08064f3806
commit
5028901a17
@ -134,6 +134,20 @@ org,httpbin)/post?data=^&foo=bar 20140610001255 http://httpbin.org/post?foo=bar
|
||||
org,httpbin)/post?data=^&foo=bar 20140610001255 http://httpbin.org/post?foo=bar application/x-www-form-urlencoded - - - - 475 3118 post-test.warc.gz
|
||||
|
||||
|
||||
# Test with custom verbs/protocol
|
||||
#================================================================
|
||||
# no validation
|
||||
>>> print_cdx_index('example-extra.warc')
|
||||
CDX N b a m s k r M S V g
|
||||
com,example)/?example=2 20140103030321 http://example.com?example=2 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1987 0 example-extra.warc
|
||||
com,example)/?example=2 20140603030341 http://example.com?example=2 warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 504 2701 example-extra.warc
|
||||
com,example)/?example=2 20140103030321 http://example.com?example=2 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1987 3207 example-extra.warc
|
||||
com,example)/?example=2 20140603030341 http://example.com?example=2 warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 504 5910 example-extra.warc
|
||||
|
||||
>>> print_cdx_index('example-extra.warc', verify_http=True)
|
||||
Traceback (most recent call last):
|
||||
StatusAndHeadersParserException: Expected Status Line starting with ['HTTP/1.0', 'HTTP/1.1'] - Found: HTTPX/1.1 200 OK
|
||||
|
||||
|
||||
# Test CLI interface -- (check for num lines)
|
||||
#=================================================================
|
||||
@ -142,19 +156,19 @@ org,httpbin)/post?data=^&foo=bar 20140610001255 http://httpbin.org/post?foo=bar
|
||||
>>> cli_lines(['--sort', '-', TEST_WARC_DIR])
|
||||
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 example-url-agnostic-revisit.warc.gz
|
||||
urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 example-wpull.warc.gz
|
||||
Total: 208
|
||||
Total: 210
|
||||
|
||||
# test sort, multiple inputs, recursive, from base test dir
|
||||
>>> cli_lines(['--sort', '-r', '-', get_test_dir()])
|
||||
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 warcs/example-url-agnostic-revisit.warc.gz
|
||||
urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 warcs/example-wpull.warc.gz
|
||||
Total: 208
|
||||
Total: 210
|
||||
|
||||
# test sort, 9-field, multiple inputs, all records + post query
|
||||
>>> cli_lines(['--sort', '-a', '-p', '-9', TEST_WARC_DIR])
|
||||
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - 355 example-url-agnostic-revisit.warc.gz
|
||||
urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - 3181 example-wpull.warc.gz
|
||||
Total: 401
|
||||
Total: 404
|
||||
|
||||
# test writing to stdout
|
||||
>>> cli_lines(['-', TEST_WARC_DIR + 'example.warc.gz'])
|
||||
@ -178,7 +192,7 @@ Total: 4
|
||||
>>> cli_lines(['--sort', '--dir-root', get_test_dir() + 'other/', TEST_WARC_DIR])
|
||||
com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 ../warcs/example-url-agnostic-revisit.warc.gz
|
||||
urn:X-wpull:log 20150330235046 urn:X-wpull:log text/plain - Q32A3PBAN6S7I26HWZDX5CDCB6MN6UN6 - - 557 3181 ../warcs/example-wpull.warc.gz
|
||||
Total: 208
|
||||
Total: 210
|
||||
|
||||
# test writing to temp dir, also use unicode filename
|
||||
>>> cli_lines_with_dir(unicode(TEST_WARC_DIR + 'example.warc.gz'))
|
||||
|
@ -105,3 +105,110 @@ WARC-Refers-To-Date: 2014-01-03T03:03:21Z
|
||||
Content-Length: 0
|
||||
|
||||
|
||||
WARC/1.0
|
||||
WARC-Type: response
|
||||
WARC-Record-ID: <urn:uuid:6d058047-ede2-4a13-be79-90c17c631dd4>
|
||||
WARC-Date: 2014-01-03T03:03:21Z
|
||||
Content-Length: 1610
|
||||
Content-Type: application/http; msgtype=response
|
||||
WARC-Payload-Digest: sha1:B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A
|
||||
WARC-Target-URI: http://example.com?example=2
|
||||
WARC-Warcinfo-ID: <urn:uuid:fbd6cf0a-6160-4550-b343-12188dc05234>
|
||||
|
||||
HTTPX/1.1 200 OK
|
||||
Accept-Ranges: bytes
|
||||
Cache-Control: max-age=604800
|
||||
Content-Type: text/html
|
||||
Date: Fri, 03 Jan 2014 03:03:21 GMT
|
||||
Etag: "359670651"
|
||||
Expires: Fri, 10 Jan 2014 03:03:21 GMT
|
||||
Last-Modified: Fri, 09 Aug 2013 23:54:35 GMT
|
||||
Server: ECS (sjc/4FCE)
|
||||
X-Cache: HIT
|
||||
x-ec-custom-error: 1
|
||||
Content-Length: 1270
|
||||
Connection: close
|
||||
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Example Domain</title>
|
||||
|
||||
<meta charset="utf-8" />
|
||||
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<style type="text/css">
|
||||
body {
|
||||
background-color: #f0f0f2;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
|
||||
|
||||
}
|
||||
div {
|
||||
width: 600px;
|
||||
margin: 5em auto;
|
||||
padding: 50px;
|
||||
background-color: #fff;
|
||||
border-radius: 1em;
|
||||
}
|
||||
a:link, a:visited {
|
||||
color: #38488f;
|
||||
text-decoration: none;
|
||||
}
|
||||
@media (max-width: 700px) {
|
||||
body {
|
||||
background-color: #fff;
|
||||
}
|
||||
div {
|
||||
width: auto;
|
||||
margin: 0 auto;
|
||||
border-radius: 0;
|
||||
padding: 1em;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div>
|
||||
<h1>Example Domain</h1>
|
||||
<p>This domain is established to be used for illustrative examples in documents. You may use this
|
||||
domain in examples without prior coordination or asking for permission.</p>
|
||||
<p><a href="http://www.iana.org/domains/example">More information...</a></p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
||||
WARC/1.0
|
||||
WARC-Type: request
|
||||
WARC-Record-ID: <urn:uuid:9a3ffea5-9556-4790-a6bf-c15231fd6b97>
|
||||
WARC-Date: 2014-01-03T03:03:21Z
|
||||
Content-Length: 323
|
||||
Content-Type: application/http; msgtype=request
|
||||
WARC-Concurrent-To: <urn:uuid:6d058047-ede2-4a13-be79-90c17c631dd4>
|
||||
WARC-Target-URI: http://example.com?example=2
|
||||
WARC-Warcinfo-ID: <urn:uuid:fbd6cf0a-6160-4550-b343-12188dc05234>
|
||||
|
||||
GETX /?example=2 HTTP/1.1
|
||||
Connection: close
|
||||
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
|
||||
Accept-Language: en-US,en;q=0.8
|
||||
User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36 (via Wayback Save Page)
|
||||
Host: example.com
|
||||
|
||||
|
||||
WARC/1.0
|
||||
WARC-Type: revisit
|
||||
WARC-Record-ID: <urn:uuid:3619f5b0-d967-44be-8f24-762098d427c4>
|
||||
WARC-Date: 2014-06-03T03:03:41Z
|
||||
WARC-Payload-Digest: sha1:B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A
|
||||
WARC-Target-URI: http://example.com?example=2
|
||||
WARC-Warcinfo-ID: <urn:uuid:fbd6cf0a-6160-4550-b343-12188dc05234>
|
||||
WARC-Profile: http://netpreserve.org/warc/0.18/revisit/identical-payload-digest
|
||||
WARC-Refers-To-Target-URI: http://example.com?example=2
|
||||
WARC-Refers-To-Date: 2014-01-03T03:03:21Z
|
||||
Content-Length: 0
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user