Merge branch 'develop' for 0.6.4

2025-03-15 00:03:28 +01:00 · 2014-11-06 00:34:32 -08:00 · 2014-11-06 00:34:32 -08:00 · 71a8abe9c3
commit 71a8abe9c3
parent a4f9138cb4 f6053a977b
24 changed files with 171 additions and 64 deletions
--- a/CHANGES.rst
+++ b/CHANGES.rst
@ -1,3 +1,19 @@
+pywb 0.6.4 changelist
+~~~~~~~~~~~~~~~~~~~~~
+
+* Ignore bad multiline headers in warc.
+
+* Rewrite fix: Don't parse html entities in HTML rewriter.
+
+* Ensure cdx iterator closed when reeading.
+
+* Rewrite fix: remove pywb prefix from any query params.
+
+* Rewrite fix: better JS rewriting, avoid // comments when matching protocol-relative urls.
+
+* WARC metadata and resource records include in cdx from cdx-indexer by default
+
+
 pywb 0.6.3 changelist
 ~~~~~~~~~~~~~~~~~~~~~

--- a/README.rst
+++ b/README.rst
@ -1,10 +1,10 @@
-PyWb 0.6.3
+PyWb 0.6.4
 ==========

 .. image:: https://travis-ci.org/ikreymer/pywb.png?branch=master
      :target: https://travis-ci.org/ikreymer/pywb
-.. image:: https://coveralls.io/repos/ikreymer/pywb/badge.png?branch=master
-      :target: https://coveralls.io/r/ikreymer/pywb?branch=master
+.. image:: https://coveralls.io/repos/ikreymer/pywb/badge.png?branch=develop
+      :target: https://coveralls.io/r/ikreymer/pywb?branch=develop
 .. image:: https://img.shields.io/gratipay/ikreymer.svg
      :target: https://www.gratipay.com/ikreymer/
      
--- a/pywb/cdx/cdxsource.py
+++ b/pywb/cdx/cdxsource.py
@ -28,8 +28,17 @@ class CDXFile(CDXSource):
        self.filename = filename

    def load_cdx(self, query):
-        source = open(self.filename)
-        return iter_range(source, query.key, query.end_key)
+        def do_open():
+            try:
+                source = open(self.filename)
+                gen = iter_range(source, query.key, query.end_key)
+                for line in gen:
+                    yield line
+            finally:
+                source.close()
+
+        return do_open()
+        #return iter_range(do_open(), query.key, query.end_key)

    def __str__(self):
        return 'CDX File - ' + self.filename
--- a/pywb/framework/wbrequestresponse.py
+++ b/pywb/framework/wbrequestresponse.py
@ -78,6 +78,8 @@ class WbRequest(object):
                                                 rel_prefix,
                                                 env.get('SCRIPT_NAME', '/'),
                                                 cookie_scope)
+
+            self.urlrewriter.deprefix_url()
        else:
        # no wb_url, just store blank wb_url
            self.wb_url = None
--- a/pywb/framework/wsgi_wrappers.py
+++ b/pywb/framework/wsgi_wrappers.py
@ -136,9 +136,9 @@ class WSGIApp(object):
            err_details = None

        if error_view:
-            if err_url:
+            if err_url and isinstance(err_url, str):
                err_url = err_url.decode('utf-8', 'ignore')
-            if err_msg:
+            if err_msg and isinstance(err_msg, str):
                err_msg = err_msg.decode('utf-8', 'ignore')

            return error_view.render_response(exc_type=type(exc).__name__,
--- a/pywb/rewrite/html_rewriter.py
+++ b/pywb/rewrite/html_rewriter.py
@ -263,10 +263,20 @@ class HTMLRewriterMixin(object):

 #=================================================================
 class HTMLRewriter(HTMLRewriterMixin, HTMLParser):
+    PARSETAG = re.compile('[<]')
+
    def __init__(self, *args, **kwargs):
        HTMLParser.__init__(self)
        super(HTMLRewriter, self).__init__(*args, **kwargs)

+    def reset(self):
+        HTMLParser.reset(self)
+        self.interesting = self.PARSETAG
+
+    def clear_cdata_mode(self):
+        HTMLParser.clear_cdata_mode(self)
+        self.interesting = self.PARSETAG
+
    def feed(self, string):
        try:
            HTMLParser.feed(self, string)
@ -311,11 +321,12 @@ class HTMLRewriter(HTMLRewriterMixin, HTMLParser):
    def handle_data(self, data):
        self.parse_data(data)

-    def handle_entityref(self, data):
-        self.out.write('&' + data + ';')
-
-    def handle_charref(self, data):
-        self.out.write('&#' + data + ';')
+    # overriding regex so that these are no longer called
+    #def handle_entityref(self, data):
+    #    self.out.write('&' + data + ';')
+    #
+    #def handle_charref(self, data):
+    #    self.out.write('&#' + data + ';')

    def handle_comment(self, data):
        self.out.write('<!--')
--- a/pywb/rewrite/regex_rewriters.py
+++ b/pywb/rewrite/regex_rewriters.py
@ -111,7 +111,8 @@ class JSLinkOnlyRewriter(RegexRewriter):
    JS Rewriter which rewrites absolute http://, https:// and // urls
    at the beginning of a string
    """
-    JS_HTTPX = r'(?<="|\'|;)(?:https?:)?\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-]+'
+    #JS_HTTPX = r'(?:(?:(?<=["\';])https?:)|(?<=["\']))\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.-]+.*(?=["\s\';&\\])'
+    JS_HTTPX = r'(?<=["\';])(?:https?:)?\\{0,4}/\\{0,4}/[A-Za-z0-9:_@.\-/\\?&#]+(?=["\';&\\])'

    def __init__(self, rewriter, rules=[]):
        rules = rules + [
--- a/pywb/rewrite/test/test_html_rewriter.py
+++ b/pywb/rewrite/test/test_html_rewriter.py
@ -28,8 +28,11 @@ ur"""
 <base href="/web/20131226101010/http://example.com/some/path/static/"/><img src="/web/20131226101010im_/http://example.com/some/path/static/image.gif"/>

 # HTML Entities
->>> parse('<a href="">&rsaquo; &nbsp; &#62;</div>')
-<a href="">&rsaquo; &nbsp; &#62;</div>
+>>> parse('<a href="">&rsaquo; &nbsp; &#62; &#63</div>')
+<a href="">&rsaquo; &nbsp; &#62; &#63</div>
+
+>>> parse('<div>X&Y</div> </div>X&Y;</div>')
+<div>X&Y</div> </div>X&Y;</div>

 # Don't rewrite anchors
 >>> parse('<HTML><A Href="#abc">Text</a></hTmL>')
--- a/pywb/rewrite/test/test_regex_rewriters.py
+++ b/pywb/rewrite/test/test_regex_rewriters.py
@ -61,6 +61,9 @@ r"""
 >>> _test_js('&quot;http:\\/\\/www.example.com\\/some\\/path\\/?query=1&quot;')
 '&quot;/web/20131010/http:\\/\\/www.example.com\\/some\\/path\\/?query=1&quot;'

+>>> _test_js('"http:\/\/sub-site.example.com\/path-dashes\/path_other\/foo_bar.txt"')
+'"/web/20131010/http:\\/\\/sub-site.example.com\\/path-dashes\\/path_other\\/foo_bar.txt"'
+

 #=================================================================
 # XML Rewriting
--- a/pywb/rewrite/test/test_rewrite_live.py
+++ b/pywb/rewrite/test/test_rewrite_live.py
@ -105,10 +105,10 @@ def test_example_1():
    assert '/pywb/20131226101010/http://www.iana.org/domains/example' in buff, buff

 def test_example_2_redirect():
-    status_headers, buff = get_rewritten('http://facebook.com/', urlrewriter)
+    status_headers, buff = get_rewritten('http://httpbin.org/redirect-to?url=http://example.com/', urlrewriter)

    # redirect, no content
-    assert status_headers.get_statuscode() == '301'
+    assert status_headers.get_statuscode() == '302'
    assert len(buff) == 0


--- a/pywb/rewrite/test/test_url_rewriter.py
+++ b/pywb/rewrite/test/test_url_rewriter.py
@ -74,6 +74,18 @@
 >>> UrlRewriter('2013id_/example.com/file/path/blah.html', '/123/').get_new_url(timestamp='20131024')
 '/123/20131024id_/http://example.com/file/path/blah.html'

+# deprefix tests
+>>> do_deprefix('2013id_/http://example.com/file/path/blah.html?param=http://localhost:8080/pywb/20141226/http://example.com/', '/pywb/', 'http://localhost:8080/pywb/')
+'http://example.com/file/path/blah.html?param=http://example.com/'
+
+>>> do_deprefix('2013id_/http://example.com/file/path/blah.html?param=http://localhost:8080/pywb/if_/https://example.com/filename.html', '/pywb/', 'http://localhost:8080/pywb/')
+'http://example.com/file/path/blah.html?param=https://example.com/filename.html'
+
+>>> do_deprefix('2013id_/http://example.com/file/path/blah.html?param=http://localhost:8080/pywb/https://example.com/filename.html', '/pywb/', 'http://localhost:8080/pywb/')
+'http://example.com/file/path/blah.html?param=https://example.com/filename.html'
+
+>>> do_deprefix('http://example.com/file.html?param=http://localhost:8080/pywb/https%3A//example.com/filename.html&other=value&a=b&param2=http://localhost:8080/pywb/http://test.example.com', '/pywb/', 'http://localhost:8080/pywb/')
+'http://example.com/file.html?param=https://example.com/filename.html&other=value&a=b&param2=http://test.example.com'

 # HttpsUrlRewriter tests
 >>> HttpsUrlRewriter('http://example.com/', None).rewrite('https://example.com/abc')
@ -86,13 +98,22 @@


 from pywb.rewrite.url_rewriter import UrlRewriter, HttpsUrlRewriter
-
+import urllib

 def do_rewrite(rel_url, base_url, prefix, mod=None, full_prefix=None):
    rewriter = UrlRewriter(base_url, prefix, full_prefix=full_prefix)
    return rewriter.rewrite(rel_url, mod)


+def do_deprefix(url, rel_prefix, full_prefix):
+    encoded = urllib.quote_plus(full_prefix)
+    url = url.replace(full_prefix, encoded)
+
+    rewriter = UrlRewriter(url, rel_prefix, full_prefix)
+    url = rewriter.deprefix_url()
+    return urllib.unquote_plus(url)
+
+
 if __name__ == "__main__":
    import doctest
    doctest.testmod()
--- a/pywb/rewrite/url_rewriter.py
+++ b/pywb/rewrite/url_rewriter.py
@ -1,4 +1,3 @@
-import copy
 import urlparse

 from wburl import WbUrl
@ -88,6 +87,9 @@ class UrlRewriter(object):
        cls = get_cookie_rewriter(scope)
        return cls(self)

+    def deprefix_url(self):
+        return self.wburl.deprefix_url(self.full_prefix)
+
    def __repr__(self):
        return "UrlRewriter('{0}', '{1}')".format(self.wburl, self.prefix)

@ -150,3 +152,6 @@ class HttpsUrlRewriter(UrlRewriter):

    def get_cookie_rewriter(self, scope=None):
        return None
+
+    def deprefix_url(self):
+        return self.wburl.url
--- a/pywb/rewrite/wburl.py
+++ b/pywb/rewrite/wburl.py
@ -39,7 +39,7 @@ wayback url format.
 """

 import re
-
+import urllib

 #=================================================================
 class BaseWbUrl(object):
@ -149,6 +149,14 @@ class WbUrl(BaseWbUrl):
        self.timestamp = timestamp
        self.type = self.REPLAY

+
+    def deprefix_url(self, prefix):
+        prefix = urllib.quote_plus(prefix)
+        rex_query = '=' + re.escape(prefix) + '([0-9])*([\w]{2}_)?/?'
+        new_url = re.sub(rex_query, '=', self.url)
+        self.url = new_url
+        return self.url
+
    # Str Representation
    # ====================
    def to_str(self, **overrides):
--- a/pywb/rules.yaml
+++ b/pywb/rules.yaml
@ -11,18 +11,18 @@ rules:
    # facebook rules
    #=================================================================
    - url_prefix: 'com,facebook)/ajax/pagelet/generic.php/'
-        
+
      fuzzy_lookup: 'com,facebook\)/.*[?&]data=(.*?(?:[&]|query_type[^,]+))'
-              
+
    - url_prefix: 'com,facebook)/ajax/ufi/'
-      
+
      fuzzy_lookup:
          - ft_ent_identifier
          - lsd

    - url_prefix: 'com,facebook)/ajax/chat/hovercard/sidebar.php'

-      fuzzy_lookup: 
+      fuzzy_lookup:
          - ids[0]

    - url_prefix: 'com,facebook)/login.php'
@ -82,20 +82,21 @@ rules:
    #=================================================================

    - url_prefix: 'com,google,plus)/_/stream/getactivities'
-    
-      fuzzy_lookup: '(egk[^"]+).*(f.sid=[^&]+)'
- 
+
+    #      fuzzy_lookup: '(egk[^"]+)?.*(f.sid=[^&]+)'
+      fuzzy_lookup: 'f.req=.*\]\]\]\,\"([^"]+).*(f.sid=[^&]+)'
+
    - url_prefix: 'com,google,plus)/_/stream/squarestream'
-    
+
      fuzzy_lookup: '(cai[^"]+).*(f.sid=[^&]+)'
- 
+
    - url_prefix: 'com,google,plus)/_/communities/rt/landing'
-    
+
      fuzzy_lookup: 'com,google,plus\)/_/.*?.*\,(\d{13}\])&.*(f.sid=[^&]+).*'
-    
+

    - url_prefix: 'com,google,plus)/_/'
-     
+
      fuzzy_lookup: 'com,google,plus\)/_/.*?.*(f.sid=[^&]+)'


--- a/pywb/static/wombat.js
+++ b/pywb/static/wombat.js
@ -708,11 +708,11 @@ WB_wombat_init = (function() {
    }

    //============================================
-    function wombat_init(replay_prefix, capture_date, orig_scheme, orig_host, timestamp) {
+    function wombat_init(replay_prefix, capture_date, orig_scheme, orig_host, timestamp, mod) {
        wb_replay_prefix = replay_prefix;
        
        if (wb_replay_prefix) {
-            wb_replay_date_prefix = replay_prefix + capture_date + "/";
+            wb_replay_date_prefix = replay_prefix + capture_date + mod + "/";
            
            if (capture_date.length > 0) {
                wb_capture_date_part = "/" + capture_date + "/";
--- a/pywb/ui/head_insert.html
+++ b/pywb/ui/head_insert.html
@ -7,7 +7,8 @@
                 "{{ cdx['timestamp'] if include_ts else ''}}",
                 "{{ urlsplit.scheme }}",
                 "{{ urlsplit.netloc }}",
-                 "{{ cdx.timestamp | format_ts('%s') }}");
+                 "{{ cdx.timestamp | format_ts('%s') }}",
+                 "{{ wbrequest.wb_url.mod }}");
 </script>
 {% endif %}
 <script>
--- a/pywb/utils/statusandheaders.py
+++ b/pywb/utils/statusandheaders.py
@ -169,7 +169,8 @@ class StatusAndHeadersParser(object):

            # append continuation lines, if any
            while next_line and next_line.startswith((' ', '\t')):
-                value += next_line
+                if value is not None:
+                    value += next_line
                next_line, total_read = _strip_count(stream.readline(),
                                                     total_read)

--- a/pywb/utils/test/test_statusandheaders.py
+++ b/pywb/utils/test/test_statusandheaders.py
@ -32,6 +32,10 @@ False
 # empty
 >>> st2 = StatusAndHeadersParser(['HTTP/1.0']).parse(BytesIO(status_headers_2)); x = st2.validate_statusline('204 No Content'); st2
 StatusAndHeaders(protocol = '', statusline = '204 No Content', headers = [])
+
+
+>>> StatusAndHeadersParser(['HTTP/1.0']).parse(BytesIO(status_headers_3))
+StatusAndHeaders(protocol = 'HTTP/1.0', statusline = '204 Empty', headers = [('Content-Type', 'Value'), ('Content-Length', '0')])
 """


@ -54,6 +58,14 @@ status_headers_2 = """

 """

+status_headers_3 = "\
+HTTP/1.0 204 Empty\r\n\
+Content-Type: Value\r\n\
+%Invalid%\r\n\
+\tMultiline\r\n\
+Content-Length: 0\r\n\
+\r\n"
+

 if __name__ == "__main__":
    import doctest
--- a/pywb/warc/archiveiterator.py
+++ b/pywb/warc/archiveiterator.py
@ -32,12 +32,11 @@ class ArchiveIterator(object):

        self.member_info = None

-    def iter_records(self):
+    def iter_records(self, block_size=16384):
        """ iterate over each record
        """

        decomp_type = 'gzip'
-        block_size = 16384

        self.reader = DecompressingBufferedReader(self.fh,
                                                  block_size=block_size)
@ -168,6 +167,8 @@ class ArchiveIndexEntry(object):
        self.status = status_headers.get_statuscode()
        if not self.status:
            self.status = '-'
+        if self.status == '204' and 'Error' in status_headers.statusline:
+            self.status = '-'

    def set_rec_info(self, offset, length, digest):
        self.offset = str(offset)
@ -202,8 +203,9 @@ class ArchiveIndexEntry(object):
 def create_record_iter(arcv_iter, options):
    append_post = options.get('append_post')
    include_all = options.get('include_all')
+    block_size = options.get('block_size', 16384)

-    for record in arcv_iter.iter_records():
+    for record in arcv_iter.iter_records(block_size):
        entry = None

        if not include_all and (record.status_headers.get_statuscode() == '-'):
@ -314,11 +316,11 @@ def parse_warc_record(record):
                           get_header('Content-Type'),
                           def_mime)

-    # status
-    if record.rec_type in ('request', 'revisit'):
-        entry.status = '-'
-    else:
+    # status -- only for response records (by convention):
+    if record.rec_type == 'response':
        entry.extract_status(record.status_headers)
+    else:
+        entry.status = '-'

    # digest
    entry.digest = record.rec_headers.get_header('WARC-Payload-Digest')
--- a/pywb/warc/recordloader.py
+++ b/pywb/warc/recordloader.py
@ -36,7 +36,7 @@ class ArchiveLoadFailed(WbException):
 #=================================================================
 class ArcWarcRecordLoader:
    # Standard ARC v1.0 headers
-    # TODO: support ARV v2.0 also?
+    # TODO: support ARC v2.0 also?
    ARC_HEADERS = ["uri", "ip-address", "archive-date",
                   "content-type", "length"]

@ -128,9 +128,14 @@ class ArcWarcRecordLoader:
        # limit stream to the length for all valid records
        stream = LimitReader.wrap_stream(stream, length)

-        # if empty record (error or otherwise) set status to -
+        # if empty record (error or otherwise) set status to 204
        if length == 0:
-            status_headers = StatusAndHeaders('- None', [])
+            if is_err:
+                msg = '204 Possible Error'
+            else:
+                msg = '204 No Content'
+
+            status_headers = StatusAndHeaders(msg, [])

        # response record or non-empty revisit: parse HTTP status and headers!
        elif (rec_type in ('response', 'revisit') and
@ -144,8 +149,10 @@ class ArcWarcRecordLoader:

        # everything else: create a no-status entry, set content-type
        else:
-            content_type_header = [('Content-Type', content_type)]
-            status_headers = StatusAndHeaders('- OK', content_type_header)
+            content_type_header = [('Content-Type', content_type),
+                                   ('Content-Length', length)]
+
+            status_headers = StatusAndHeaders('200 OK', content_type_header)

        return ArcWarcRecord(the_format, rec_type,
                             rec_headers, stream, status_headers,
--- a/pywb/warc/test/test_indexing.py
+++ b/pywb/warc/test/test_indexing.py
@ -43,12 +43,16 @@ com,example)/ 20140216050221 http://example.com/ text/html 200 B2LTWWPUOYAH7UIPQ
 CDX N b a m s k r M S V g
 com,example)/ 20140216050221 http://example.com/ text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1656 151 example.arc

-# wget warc, just responses
+# wget warc, includes metadata by default
 >>> print_cdx_index('example-wget-1-14.warc.gz')
 CDX N b a m s k r M S V g
 com,example)/ 20140216012908 http://example.com/ text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1151 792 example-wget-1-14.warc.gz
+metadata)/gnu.org/software/wget/warc/manifest.txt 20140216012908 metadata://gnu.org/software/wget/warc/MANIFEST.txt text/plain - SWUF4CK2XMZSOKSA7SDT7M7NUGWH2TRE - - 315 1943 example-wget-1-14.warc.gz
+metadata)/gnu.org/software/wget/warc/wget_arguments.txt 20140216012908 metadata://gnu.org/software/wget/warc/wget_arguments.txt text/plain - UCXDCGORD6K4RJT5NUQGKE2PKEG4ZZD6 - - 340 2258 example-wget-1-14.warc.gz
+metadata)/gnu.org/software/wget/warc/wget.log 20140216012908 metadata://gnu.org/software/wget/warc/wget.log text/plain - 2ULE2LD5UOWDXGACCT624TU5BVKACRQ4 - - 599 2598 example-wget-1-14.warc.gz

-# wget warc include all w/ metadata
+
+# wget warc, includes metadata and request
 >>> print_cdx_index('example-wget-1-14.warc.gz', include_all=True)
 CDX N b a m s k r M S V g
 com,example)/ 20140216012908 http://example.com/ - - - - - 394 398 example-wget-1-14.warc.gz
@ -110,32 +114,32 @@ org,httpbin)/post?data=^&foo=bar 20140610001255 http://httpbin.org/post?foo=bar
 >>> cli_lines(['--sort', '-',  TEST_WARC_DIR])
 com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 591 355 example-url-agnostic-revisit.warc.gz
 org,iana,example)/ 20130702195402 http://example.iana.org/ text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1001 353 example-url-agnostic-orig.warc.gz
-200
+Total: 206

 # test sort, multiple inputs, all records + post query
 >>> cli_lines(['--sort', '-a', '-p', '-9', TEST_WARC_DIR])
 com,example)/ 20130729195151 http://test@example.com/ warc/revisit - B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - 355 example-url-agnostic-revisit.warc.gz
 org,iana,example)/ 20130702195402 http://example.iana.org/ text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - 353 example-url-agnostic-orig.warc.gz
-398
+Total: 398

 # test writing to stdout
 >>> cli_lines(['-', TEST_WARC_DIR + 'example.warc.gz'])
 com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1043 333 example.warc.gz
 org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz
-4
+Total: 4

 # test writing to stdout ('-' omitted)
 >>> cli_lines([TEST_WARC_DIR + 'example.warc.gz'])
 com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1043 333 example.warc.gz
 org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz
-4
+Total: 4

 # test writing to temp dir, also use unicode filename
 >>> cli_lines_with_dir(unicode(TEST_WARC_DIR + 'example.warc.gz'))
 example.cdx
 com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1043 333 example.warc.gz
 org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz
-4
+Total: 4
 """

 from pywb import get_test_dir
@ -191,9 +195,9 @@ def cli_lines(cmds):
    lines = buff.getvalue().rstrip().split('\n')

    # print first, last, num lines
-    print (lines[1])
-    print (lines[-1])
-    print len(lines)
+    print(lines[1])
+    print(lines[-1])
+    print('Total: ' + str(len(lines)))

 def cli_lines_with_dir(input_):
    try:
@ -224,6 +228,6 @@ def cli_lines_with_dir(input_):
    # print first, last, num lines
    print (lines[1])
    print (lines[-1])
-    print len(lines)
+    print('Total: ' + str(len(lines)))


--- a/pywb/webapp/handlers.py
+++ b/pywb/webapp/handlers.py
@ -74,8 +74,8 @@ class SearchPageWbUrlHandler(WbUrlHandler):

        return self.handle_request(wbrequest)

-    def get_top_frame_params(self, wbrequest):
-        embed_url = wbrequest.wb_url.to_str(mod='')
+    def get_top_frame_params(self, wbrequest, mod=''):
+        embed_url = wbrequest.wb_url.to_str(mod=mod)

        if wbrequest.wb_url.timestamp:
            timestamp = wbrequest.wb_url.timestamp
--- a/setup.py
+++ b/setup.py
@ -34,7 +34,7 @@ class PyTest(TestCommand):

 setup(
    name='pywb',
-    version='0.6.3',
+    version='0.6.4',
    url='https://github.com/ikreymer/pywb',
    author='Ilya Kreymer',
    author_email='ikreymer@gmail.com',
--- a/tests/test_live_rewriter.py
+++ b/tests/test_live_rewriter.py
@ -14,8 +14,8 @@ class TestLiveRewriter:
        assert resp.status_int == 200

    def test_live_rewrite_redirect_2(self):
-        resp = self.testapp.get('/rewrite/http://facebook.com/')
-        assert resp.status_int == 301
+        resp = self.testapp.get('/rewrite/http://httpbin.org/redirect-to?url=http://example.com/')
+        assert resp.status_int == 302

    def test_live_rewrite_post(self):
        resp = self.testapp.post('/rewrite/httpbin.org/post', {'foo': 'bar', 'test': 'abc'})