From 5aefa3ad4dca9333ac00ed1cd3f34169535de75f Mon Sep 17 00:00:00 2001 From: Ilya Date: Sun, 10 Jun 2018 17:47:55 +0000 Subject: [PATCH] changes for auto scalar prototype: - add cache headers for proxy - wsgiprox 1.5.0 - decompresss brotli for now --- pywb/rewrite/content_rewriter.py | 6 ++++++ pywb/rewrite/header_rewriter.py | 5 ++++- pywb/rules.yaml | 3 +++ requirements.txt | 2 +- 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/pywb/rewrite/content_rewriter.py b/pywb/rewrite/content_rewriter.py index 295e11e4..9ab9334b 100644 --- a/pywb/rewrite/content_rewriter.py +++ b/pywb/rewrite/content_rewriter.py @@ -205,6 +205,12 @@ class BaseContentRewriter(object): if rwinfo.is_chunked: stream = ChunkedDataReader(rwinfo.record.raw_stream, decomp_type=None) + elif rw_http_headers.get_header('Content-Encoding') == 'br': + rw_http_headers.remove_header('Content-Encoding') + rw_http_headers.remove_header('Content-Length') + print('DECOMP') + stream = rwinfo.content_stream + else: stream = rwinfo.record.raw_stream diff --git a/pywb/rewrite/header_rewriter.py b/pywb/rewrite/header_rewriter.py index c2b1060c..366bc20b 100644 --- a/pywb/rewrite/header_rewriter.py +++ b/pywb/rewrite/header_rewriter.py @@ -94,6 +94,9 @@ class DefaultHeaderRewriter(object): else: new_headers_list.append(new_header) + if not self.rwinfo.is_url_rw(): + self._add_cache_headers(new_headers_list, 100000) + return StatusAndHeaders(self.http_headers.statusline, headers=new_headers_list, protocol=self.http_headers.protocol) @@ -159,7 +162,7 @@ class DefaultHeaderRewriter(object): else: dt = datetime.utcnow() dt = dt + timedelta(seconds=age) - new_headers.append(('Cache-Control', 'max-age=' + str(age))) + new_headers.append(('Cache-Control', 'public; max-age=' + str(age))) new_headers.append(('Expires', datetime_to_http_date(dt))) diff --git a/pywb/rules.yaml b/pywb/rules.yaml index 0c46919d..2246ea44 100644 --- a/pywb/rules.yaml +++ b/pywb/rules.yaml @@ -376,6 +376,9 @@ rules: - action_load_comments - filter + - url_prefix: 'com,youtube)/embed/' + + fuzzy_lookup: '()' - url_prefix: 'com,googlevideo,' diff --git a/requirements.txt b/requirements.txt index 4702d5df..f07be18b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,4 @@ webencodings gevent==1.2.2 webassets==0.12.1 portalocker -wsgiprox>=1.4.1 +wsgiprox>=1.5.0