diff --git a/pywb/rewrite/test/test_url_rewriter.py b/pywb/rewrite/test/test_url_rewriter.py index 3d324069..a78a5529 100644 --- a/pywb/rewrite/test/test_url_rewriter.py +++ b/pywb/rewrite/test/test_url_rewriter.py @@ -103,12 +103,17 @@ 'http://example.com/file.html?param=https://example.com/filename.html&other=value&a=b¶m2=http://test.example.com' # HttpsUrlRewriter tests ->>> HttpsUrlRewriter('http://example.com/', None).rewrite('https://example.com/abc') +>>> httpsrewriter = HttpsUrlRewriter('http://example.com/', None) +>>> httpsrewriter.rewrite('https://example.com/abc') 'http://example.com/abc' ->>> HttpsUrlRewriter('http://example.com/', None).rewrite('http://example.com/abc') +>>> httpsrewriter.rewrite('http://example.com/abc') 'http://example.com/abc' +# rebase is identity +>>> httpsrewriter.rebase_rewriter('https://example.com/') == httpsrewriter +True + """ diff --git a/pywb/utils/bufferedreaders.py b/pywb/utils/bufferedreaders.py index 7e461dee..7ca89780 100644 --- a/pywb/utils/bufferedreaders.py +++ b/pywb/utils/bufferedreaders.py @@ -46,9 +46,6 @@ class BufferedReader(object): self.buff_size = 0 def set_decomp(self, decomp_type): - if self.num_read > 0: - raise Exception('Attempting to change decompression mid-stream') - self._init_decomp(decomp_type) def _init_decomp(self, decomp_type): diff --git a/pywb/utils/loaders.py b/pywb/utils/loaders.py index 34eca14b..affae74f 100644 --- a/pywb/utils/loaders.py +++ b/pywb/utils/loaders.py @@ -49,12 +49,12 @@ def extract_post_query(method, mime, length, stream): not mime.lower().startswith('application/x-www-form-urlencoded'))): return None - if not length or length == '0': - return None - try: length = int(length) - except ValueError: + except (ValueError, TypeError): + return None + + if length <= 0: return None #todo: encoding issues? diff --git a/pywb/utils/test/test_loaders.py b/pywb/utils/test/test_loaders.py index 312af81b..e43cdc41 100644 --- a/pywb/utils/test/test_loaders.py +++ b/pywb/utils/test/test_loaders.py @@ -56,6 +56,32 @@ True >>> extract_client_cookie(dict(HTTP_COOKIE='x'), 'x') >>> extract_client_cookie({}, 'y') + + +# extract_post_query tests + +# correct POST data +>>> post_data = 'foo=bar&dir=%2Fbaz' +>>> extract_post_query('POST', 'application/x-www-form-urlencoded', len(post_data), BytesIO(post_data)) +'foo=bar&dir=/baz' + +# unsupported method +>>> extract_post_query('PUT', 'application/x-www-form-urlencoded', len(post_data), BytesIO(post_data)) + +# unsupported type +>>> extract_post_query('POST', 'text/plain', len(post_data), BytesIO(post_data)) + +# invalid length +>>> extract_post_query('POST', 'application/x-www-form-urlencoded', 'abc', BytesIO(post_data)) +>>> extract_post_query('POST', 'application/x-www-form-urlencoded', 0, BytesIO(post_data)) + +# length too short +>>> extract_post_query('POST', 'application/x-www-form-urlencoded', len(post_data) - 4, BytesIO(post_data)) +'foo=bar&dir=%2' + +# length too long +>>> extract_post_query('POST', 'application/x-www-form-urlencoded', len(post_data) + 4, BytesIO(post_data)) +'foo=bar&dir=/baz' """ @@ -64,7 +90,7 @@ import re import os from io import BytesIO from pywb.utils.loaders import BlockLoader, HMACCookieMaker, to_file_url -from pywb.utils.loaders import LimitReader, extract_client_cookie +from pywb.utils.loaders import LimitReader, extract_client_cookie, extract_post_query from pywb import get_test_dir