From 531464902fd01392a14982ff8ce8d623177ca7f1 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 19 Feb 2014 00:14:23 -0800 Subject: [PATCH] add uncompressed warc --- sample_archive/warcs/example.warc | 187 ++++++++++++++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 sample_archive/warcs/example.warc diff --git a/sample_archive/warcs/example.warc b/sample_archive/warcs/example.warc new file mode 100644 index 00000000..ecc23fc4 --- /dev/null +++ b/sample_archive/warcs/example.warc @@ -0,0 +1,187 @@ +WARC/1.0 +WARC-Type: warcinfo +WARC-Record-ID: +WARC-Date: 2014-01-03T03:03:22Z +Content-Length: 196 +Content-Type: application/warc-fields +WARC-Filename: live-20140103030321-wwwb-app5.us.archive.org.warc.gz + +software: LiveWeb Warc Writer 1.0 +host: wwwb-app5.us.archive.org +isPartOf: liveweb +format: WARC file version 1.0 +conformsTo: http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Date: 2014-01-03T03:03:21Z +Content-Length: 1610 +Content-Type: application/http; msgtype=response +WARC-Payload-Digest: sha1:B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A +WARC-Target-URI: http://example.com?example=1 +WARC-Warcinfo-ID: + +HTTP/1.1 200 OK +Accept-Ranges: bytes +Cache-Control: max-age=604800 +Content-Type: text/html +Date: Fri, 03 Jan 2014 03:03:21 GMT +Etag: "359670651" +Expires: Fri, 10 Jan 2014 03:03:21 GMT +Last-Modified: Fri, 09 Aug 2013 23:54:35 GMT +Server: ECS (sjc/4FCE) +X-Cache: HIT +x-ec-custom-error: 1 +Content-Length: 1270 +Connection: close + + + + + Example Domain + + + + + + + + +
+

Example Domain

+

This domain is established to be used for illustrative examples in documents. You may use this + domain in examples without prior coordination or asking for permission.

+

More information...

+
+ + + + +WARC/1.0 +WARC-Type: request +WARC-Record-ID: +WARC-Date: 2014-01-03T03:03:21Z +Content-Length: 323 +Content-Type: application/http; msgtype=request +WARC-Concurrent-To: +WARC-Target-URI: http://example.com?example=1 +WARC-Warcinfo-ID: + +GET /?example=1 HTTP/1.1 +Connection: close +Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 +Accept-Language: en-US,en;q=0.8 +User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36 (via Wayback Save Page) +Host: example.com + + +WARC/1.0 +WARC-Type: revisit +WARC-Record-ID: +WARC-Date: 2014-01-03T03:03:41Z +Content-Length: 340 +Content-Type: application/http; msgtype=response +WARC-Payload-Digest: sha1:B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A +WARC-Target-URI: http://example.com?example=1 +WARC-Warcinfo-ID: +WARC-Profile: http://netpreserve.org/warc/0.18/revisit/identical-payload-digest +WARC-Refers-To-Target-URI: http://example.com?example=1 +WARC-Refers-To-Date: 2014-01-03T03:03:21Z + +HTTP/1.1 200 OK +Accept-Ranges: bytes +Cache-Control: max-age=604800 +Content-Type: text/html +Date: Fri, 03 Jan 2014 03:03:41 GMT +Etag: "359670651" +Expires: Fri, 10 Jan 2014 03:03:41 GMT +Last-Modified: Fri, 09 Aug 2013 23:54:35 GMT +Server: ECS (sjc/4FCE) +X-Cache: HIT +x-ec-custom-error: 1 +Content-Length: 1270 +Connection: close + + + +WARC/1.0 +WARC-Type: request +WARC-Record-ID: +WARC-Date: 2014-01-03T03:03:41Z +Content-Length: 323 +Content-Type: application/http; msgtype=request +WARC-Concurrent-To: +WARC-Target-URI: http://example.com?example=1 +WARC-Warcinfo-ID: + +GET /?example=1 HTTP/1.1 +Connection: close +Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 +Accept-Language: en-US,en;q=0.8 +User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36 (via Wayback Save Page) +Host: example.com + + +WARC/1.0 +WARC-Type: response +WARC-Record-ID: +WARC-Date: 2014-01-28T05:15:39Z +Content-Length: 471 +Content-Type: application/http; msgtype=response +WARC-Payload-Digest: sha1:JZ622UA23G5ZU6Y3XAKH4LINONUEICEG +WARC-Target-URI: http://www.iana.org/domains/example +WARC-Warcinfo-ID: + +HTTP/1.1 302 Found +Server: Apache +Location: /domains/reserved +Content-Type: text/html; charset=iso-8859-1 +Content-Length: 201 +Accept-Ranges: bytes +Date: Tue, 28 Jan 2014 05:15:39 GMT +X-Varnish: 774901408 774900872 +Age: 80 +Via: 1.1 varnish +Connection: close + + + +302 Found + +

Found

+

The document has moved here.

+ + +