From 9b8b4d83882b126dd061492069695535b789aad3 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 10 Dec 2015 12:31:58 -0800 Subject: [PATCH] tests/typo fix: add tests for truncated record detection (see: ikreymer/webarchiveplayer#14) fix typo, closes #161 --- pywb/warc/archiveiterator.py | 1 + pywb/warc/test/test_indexing.py | 3 ++- sample_archive/warcs/example.warc | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pywb/warc/archiveiterator.py b/pywb/warc/archiveiterator.py index b6341860..e419c619 100644 --- a/pywb/warc/archiveiterator.py +++ b/pywb/warc/archiveiterator.py @@ -9,6 +9,7 @@ import hashlib import base64 import re +import sys try: # pragma: no cover from collections import OrderedDict diff --git a/pywb/warc/test/test_indexing.py b/pywb/warc/test/test_indexing.py index bb3f110e..1c8f4554 100644 --- a/pywb/warc/test/test_indexing.py +++ b/pywb/warc/test/test_indexing.py @@ -25,12 +25,13 @@ com,example)/?example=1 20140103030341 http://example.com?example=1 warc/revisit org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 854 4771 example.warc # warc all +# note: length of request record set to 1 byte less then record to test truncation handling >>> print_cdx_index('example.warc', include_all=True) CDX N b a m s k r M S V g com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1987 460 example.warc com,example)/?example=1 20140103030321 http://example.com?example=1 - - - - - 706 2451 example.warc com,example)/?example=1 20140103030341 http://example.com?example=1 warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 896 3161 example.warc -com,example)/?example=1 20140103030341 http://example.com?example=1 - - - - - 706 4061 example.warc +com,example)/?example=1 20140103030341 http://example.com?example=1 - - - - - 703 4061 example.warc org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 854 4771 example.warc # arc.gz diff --git a/sample_archive/warcs/example.warc b/sample_archive/warcs/example.warc index ecc23fc4..ccd93b21 100644 --- a/sample_archive/warcs/example.warc +++ b/sample_archive/warcs/example.warc @@ -140,7 +140,7 @@ WARC/1.0 WARC-Type: request WARC-Record-ID: WARC-Date: 2014-01-03T03:03:41Z -Content-Length: 323 +Content-Length: 320 Content-Type: application/http; msgtype=request WARC-Concurrent-To: WARC-Target-URI: http://example.com?example=1