From 71e8ada57d3e1ef884424ae49067bf2b68094f10 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 4 Aug 2014 20:45:02 -0700 Subject: [PATCH 1/3] rewrite: add test for banner-only mode, rewriting w/o a head using local 'sample_no_head' file. query.html: use client side rewriting for calendar dates rewrite: remove unused decode stuff --- pywb/rewrite/rewrite_content.py | 63 ++----------------- pywb/rewrite/rewrite_live.py | 9 ++- pywb/rewrite/test/test_html_rewriter.py | 1 + pywb/rewrite/test/test_rewrite_live.py | 53 ++++++++++++++++ pywb/rewrite/test/test_url_rewriter.py | 3 + pywb/rewrite/url_rewriter.py | 3 +- pywb/ui/query.html | 30 ++++++++- pywb/utils/loaders.py | 2 +- .../text_content/sample_no_head.html | 8 +++ 9 files changed, 108 insertions(+), 64 deletions(-) create mode 100644 sample_archive/text_content/sample_no_head.html diff --git a/pywb/rewrite/rewrite_content.py b/pywb/rewrite/rewrite_content.py index 3a635d4e..3cbcd362 100644 --- a/pywb/rewrite/rewrite_content.py +++ b/pywb/rewrite/rewrite_content.py @@ -105,16 +105,6 @@ class RewriteContent: else: stream = DecompressingBufferedReader(stream) - #if self.decode_stream: - # if rewritten_headers.charset: - # encoding = rewritten_headers.charset - # else: - # (encoding, first_buff) = self._detect_charset(stream) - - # if encoding not set or chardet thinks its ascii, use utf-8 - # if not encoding or encoding == 'ascii': - # encoding = 'utf-8' - rule = self.ruleset.get_first_match(urlkey) rewriter_class = rule.rewriters[text_type] @@ -145,8 +135,11 @@ class RewriteContent: rewriter = rewriter_class(urlrewriter) # Create rewriting generator - gen = self._rewriting_stream_gen(rewriter, encoding, - stream, first_buff) + gen = self.stream_to_gen(stream, + rewrite_func=rewriter.rewrite, + final_read_func=rewriter.close, + first_buff=first_buff) + return (status_headers, gen, True) @@ -175,32 +168,6 @@ class RewriteContent: for buff in self.stream_to_gen(stream): yield buff - - # Create rewrite stream, may even be chunked by front-end - def _rewriting_stream_gen(self, rewriter, encoding, - stream, first_buff=None): - - def do_rewrite(buff): - if encoding: - buff = self._decode_buff(buff, stream, encoding) - buff = rewriter.rewrite(buff) - if encoding: - buff = buff.encode(encoding) - - return buff - - def do_finish(): - result = rewriter.close() - if encoding: - result = result.encode(encoding) - - return result - - return self.stream_to_gen(stream, - rewrite_func=do_rewrite, - final_read_func=do_finish, - first_buff=first_buff) - @staticmethod def _decode_buff(buff, stream, encoding): # pragma: no coverage try: @@ -219,26 +186,6 @@ class RewriteContent: return buff - def _detect_charset(self, stream): # pragma: no coverage - full_buff = stream.read(8192) - io_buff = BytesIO(full_buff) - - detector = UniversalDetector() - - try: - buff = io_buff.read(256) - while buff: - detector.feed(buff) - if detector.done: - break - - buff = io_buff.read(256) - finally: - detector.close() - - print "chardet result: ", str(detector.result) - return (detector.result['encoding'], full_buff) - # Create a generator reading from a stream, # with optional rewriting and final read call @staticmethod diff --git a/pywb/rewrite/rewrite_live.py b/pywb/rewrite/rewrite_live.py index cbd3f106..5d77ff52 100644 --- a/pywb/rewrite/rewrite_live.py +++ b/pywb/rewrite/rewrite_live.py @@ -9,7 +9,7 @@ import logging from urlparse import urlsplit -from pywb.utils.loaders import is_http, LimitReader +from pywb.utils.loaders import is_http, LimitReader, BlockLoader from pywb.utils.timeutils import datetime_to_timestamp from pywb.utils.statusandheaders import StatusAndHeaders from pywb.utils.canonicalize import canonicalize @@ -30,7 +30,8 @@ class LiveRewriter(object): logging.debug('Live Rewrite Direct (no proxy)') def fetch_local_file(self, uri): - fh = open(uri) + #fh = open(uri) + fh = BlockLoader().load_file_or_resource(uri) content_type, _ = mimetypes.guess_type(uri) @@ -135,12 +136,14 @@ class LiveRewriter(object): ts_err = url.split('///') - if len(ts_err) > 1: + if len(ts_err) > 1 and ts_err[0] != 'file:': url = 'http://' + ts_err[1] if url.startswith('//'): url = 'http:' + url + print 'URL ', url + if is_http(url): (status_headers, stream) = self.fetch_http(url, env, req_headers, follow_redirects, diff --git a/pywb/rewrite/test/test_html_rewriter.py b/pywb/rewrite/test/test_html_rewriter.py index ae9b24e2..9ea8edc0 100644 --- a/pywb/rewrite/test/test_html_rewriter.py +++ b/pywb/rewrite/test/test_html_rewriter.py @@ -99,6 +99,7 @@ ur""" >>> parse('
SomeTest
', head_insert = '')
SomeTest
+# doctype >>> parse('') diff --git a/pywb/rewrite/test/test_rewrite_live.py b/pywb/rewrite/test/test_rewrite_live.py index 24f76da1..af25762b 100644 --- a/pywb/rewrite/test/test_rewrite_live.py +++ b/pywb/rewrite/test/test_rewrite_live.py @@ -1,5 +1,6 @@ from pywb.rewrite.rewrite_live import LiveRewriter from pywb.rewrite.url_rewriter import UrlRewriter +from pywb.rewrite.wburl import WbUrl from pywb import get_test_dir @@ -33,6 +34,58 @@ def test_local_1(): assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff +def test_local_no_head(): + wb_url = WbUrl('file://' + get_test_dir() + 'text_content/sample_no_head.html') + status_headers, buff = get_rewritten(wb_url, + urlrewriter, + head_insert_func, + 'com,example,test)/') + + # wombat insert added + assert '' in buff + + # location rewritten + assert 'window.WB_wombat_location = "/other.html"' in buff + + # link rewritten + assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff + +def test_local_no_head_banner_only(): + wb_url = WbUrl('file://' + get_test_dir() + 'text_content/sample_no_head.html') + wb_url.mod = 'bn_' + + status_headers, buff = get_rewritten(wb_url, + urlrewriter, + head_insert_func, + 'com,example,test)/') + + # wombat insert added + assert '' in buff + + # location NOT rewritten + assert 'window.location = "/other.html"' in buff + + # link NOT rewritten + assert '"another.html"' in buff + +def test_local_banner_only(): + wb_url = WbUrl('file://' + get_test_dir() + 'text_content/sample.html') + wb_url.mod = 'bn_' + + status_headers, buff = get_rewritten(wb_url, + urlrewriter, + head_insert_func, + 'com,example,test)/') + + # wombat insert added + assert '' in buff + + # location NOT rewritten + assert 'window.location = "/other.html"' in buff + + # link NOT rewritten + assert '"another.html"' in buff + def test_local_2_no_js_location_rewrite(): status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html', urlrewriter, diff --git a/pywb/rewrite/test/test_url_rewriter.py b/pywb/rewrite/test/test_url_rewriter.py index a4173d3a..345c4faf 100644 --- a/pywb/rewrite/test/test_url_rewriter.py +++ b/pywb/rewrite/test/test_url_rewriter.py @@ -65,6 +65,9 @@ >>> do_rewrite('mailto:example@example.com', '20131010/http://example.com/path/page.html', 'https://web.archive.org/web/') 'mailto:example@example.com' +>>> do_rewrite('file:///some/path/', '20131010/http://example.com/path/page.html', 'https://web.archive.org/web/') +'file:///some/path/' + >>> UrlRewriter('19960708im_/http://domain.example.com/path.txt', '/abc/').get_abs_url() '/abc/19960708im_/' diff --git a/pywb/rewrite/url_rewriter.py b/pywb/rewrite/url_rewriter.py index d5593a22..d9b42c1b 100644 --- a/pywb/rewrite/url_rewriter.py +++ b/pywb/rewrite/url_rewriter.py @@ -13,7 +13,8 @@ class UrlRewriter(object): instance and an optional full path prefix """ - NO_REWRITE_URI_PREFIX = ['#', 'javascript:', 'data:', 'mailto:', 'about:'] + NO_REWRITE_URI_PREFIX = ['#', 'javascript:', 'data:', + 'mailto:', 'about:', 'file:'] PROTOCOLS = ['http:', 'https:', 'ftp:', 'mms:', 'rtsp:', 'wais:'] diff --git a/pywb/ui/query.html b/pywb/ui/query.html index c78e1b49..2d1f5c86 100644 --- a/pywb/ui/query.html +++ b/pywb/ui/query.html @@ -1,3 +1,28 @@ + + + +

pywb Sample Calendar Results

{{ cdx_lines | length }} captures of {{ url }} @@ -10,7 +35,9 @@ {% for cdx in cdx_lines %} - {{ cdx['timestamp'] | format_ts}} + + + {{ cdx['statuscode'] }} {{ cdx['original'] }} {{ cdx['filename'] }} @@ -21,3 +48,4 @@ * Unique captures are bold. Other captures are duplicates of a previous capture.

+ diff --git a/pywb/utils/loaders.py b/pywb/utils/loaders.py index 6b383493..107379a2 100644 --- a/pywb/utils/loaders.py +++ b/pywb/utils/loaders.py @@ -96,7 +96,7 @@ class BlockLoader(object): else: return self.load_file_or_resource(url, offset, length) - def load_file_or_resource(self, url, offset, length): + def load_file_or_resource(self, url, offset=0, length=-1): """ Load a file-like reader from the local file system """ diff --git a/sample_archive/text_content/sample_no_head.html b/sample_archive/text_content/sample_no_head.html new file mode 100644 index 00000000..ed4bc4f3 --- /dev/null +++ b/sample_archive/text_content/sample_no_head.html @@ -0,0 +1,8 @@ + +Test Content +Some Link From 2792a92ff6484c5cefda495c6e9426d2ac879af9 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 4 Aug 2014 21:11:46 -0700 Subject: [PATCH 2/3] rewrite: remove extra wb_url param from rewrite_content(), the wb_url will come from the urlrewriter, to get the 'mod' --- pywb/rewrite/rewrite_content.py | 4 +++- pywb/rewrite/rewrite_live.py | 11 ++--------- pywb/rewrite/test/test_rewrite_live.py | 21 +++++++-------------- pywb/rewrite/test/test_url_rewriter.py | 4 ++-- pywb/rewrite/url_rewriter.py | 5 +---- pywb/webapp/live_rewrite_handler.py | 8 ++++++-- pywb/webapp/replay_views.py | 3 +-- 7 files changed, 22 insertions(+), 34 deletions(-) diff --git a/pywb/rewrite/rewrite_content.py b/pywb/rewrite/rewrite_content.py index 3cbcd362..207d879e 100644 --- a/pywb/rewrite/rewrite_content.py +++ b/pywb/rewrite/rewrite_content.py @@ -58,10 +58,12 @@ class RewriteContent: return (rewritten_headers, stream) - def rewrite_content(self, wb_url, urlrewriter, headers, stream, + def rewrite_content(self, urlrewriter, headers, stream, head_insert_func=None, urlkey='', cdx=None): + wb_url = urlrewriter.wburl + if (wb_url.is_identity or (not head_insert_func and wb_url.is_banner_only)): status_headers, stream = self.sanitize_content(headers, stream) diff --git a/pywb/rewrite/rewrite_live.py b/pywb/rewrite/rewrite_live.py index 5d77ff52..41313738 100644 --- a/pywb/rewrite/rewrite_live.py +++ b/pywb/rewrite/rewrite_live.py @@ -119,7 +119,7 @@ class LiveRewriter(object): return (status_headers, stream) - def fetch_request(self, wb_url, urlrewriter, + def fetch_request(self, url, urlrewriter, head_insert_func=None, urlkey=None, env=None, @@ -128,12 +128,6 @@ class LiveRewriter(object): follow_redirects=False, proxies=None): - if isinstance(wb_url, str): - url = wb_url - wb_url = WbUrl(url) - else: - url = wb_url.url - ts_err = url.split('///') if len(ts_err) > 1 and ts_err[0] != 'file:': @@ -167,8 +161,7 @@ class LiveRewriter(object): } result = (self.rewriter. - rewrite_content(wb_url, - urlrewriter, + rewrite_content(urlrewriter, status_headers, stream, head_insert_func=head_insert_func, diff --git a/pywb/rewrite/test/test_rewrite_live.py b/pywb/rewrite/test/test_rewrite_live.py index af25762b..fcb51ea3 100644 --- a/pywb/rewrite/test/test_rewrite_live.py +++ b/pywb/rewrite/test/test_rewrite_live.py @@ -10,6 +10,7 @@ from io import BytesIO # As such, the content may change and the test may break urlrewriter = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/pywb/') +bn_urlrewriter = UrlRewriter('20131226101010bn_/http://example.com/some/path/index.html', '/pywb/') def head_insert_func(rule, cdx): if rule.js_rewrite_location == True: @@ -35,8 +36,7 @@ def test_local_1(): def test_local_no_head(): - wb_url = WbUrl('file://' + get_test_dir() + 'text_content/sample_no_head.html') - status_headers, buff = get_rewritten(wb_url, + status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head.html', urlrewriter, head_insert_func, 'com,example,test)/') @@ -51,11 +51,8 @@ def test_local_no_head(): assert '"/pywb/20131226101010/http://example.com/some/path/another.html"' in buff def test_local_no_head_banner_only(): - wb_url = WbUrl('file://' + get_test_dir() + 'text_content/sample_no_head.html') - wb_url.mod = 'bn_' - - status_headers, buff = get_rewritten(wb_url, - urlrewriter, + status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample_no_head.html', + bn_urlrewriter, head_insert_func, 'com,example,test)/') @@ -69,11 +66,8 @@ def test_local_no_head_banner_only(): assert '"another.html"' in buff def test_local_banner_only(): - wb_url = WbUrl('file://' + get_test_dir() + 'text_content/sample.html') - wb_url.mod = 'bn_' - - status_headers, buff = get_rewritten(wb_url, - urlrewriter, + status_headers, buff = get_rewritten(get_test_dir() + 'text_content/sample.html', + bn_urlrewriter, head_insert_func, 'com,example,test)/') @@ -129,8 +123,7 @@ def test_example_4_rewrite_err(): assert status_headers.get_statuscode() == '200' def test_example_domain_specific_3(): - urlrewriter2 = UrlRewriter('20131226101010/http://example.com/some/path/index.html', '/pywb/') - status_headers, buff = get_rewritten('http://facebook.com/digitalpreservation', urlrewriter2, follow_redirects=True) + status_headers, buff = get_rewritten('http://facebook.com/digitalpreservation', urlrewriter, follow_redirects=True) # comment out bootloader assert '/* Bootloader.configurePage' in buff diff --git a/pywb/rewrite/test/test_url_rewriter.py b/pywb/rewrite/test/test_url_rewriter.py index 345c4faf..73340c95 100644 --- a/pywb/rewrite/test/test_url_rewriter.py +++ b/pywb/rewrite/test/test_url_rewriter.py @@ -76,10 +76,10 @@ # HttpsUrlRewriter tests ->>> HttpsUrlRewriter(None, None).rewrite('https://example.com/abc') +>>> HttpsUrlRewriter('http://example.com/', None).rewrite('https://example.com/abc') 'http://example.com/abc' ->>> HttpsUrlRewriter(None, None).rewrite('http://example.com/abc') +>>> HttpsUrlRewriter('http://example.com/', None).rewrite('http://example.com/abc') 'http://example.com/abc' """ diff --git a/pywb/rewrite/url_rewriter.py b/pywb/rewrite/url_rewriter.py index d9b42c1b..5b2f8e7b 100644 --- a/pywb/rewrite/url_rewriter.py +++ b/pywb/rewrite/url_rewriter.py @@ -126,7 +126,7 @@ class UrlRewriter(object): #================================================================= -class HttpsUrlRewriter(object): +class HttpsUrlRewriter(UrlRewriter): """ A url rewriter which urls that start with https:// to http:// Other urls/input is unchanged. @@ -135,9 +135,6 @@ class HttpsUrlRewriter(object): HTTP = 'http://' HTTPS = 'https://' - def __init__(self, wburl, prefix, full_prefix=None): - pass - def rewrite(self, url, mod=None): if url.startswith(self.HTTPS): result = self.HTTP + url[len(self.HTTPS):] diff --git a/pywb/webapp/live_rewrite_handler.py b/pywb/webapp/live_rewrite_handler.py index a1b602d4..cb279beb 100644 --- a/pywb/webapp/live_rewrite_handler.py +++ b/pywb/webapp/live_rewrite_handler.py @@ -38,6 +38,10 @@ class RewriteHandler(SearchPageWbUrlHandler): return self.render_content(wbrequest) except Exception as exc: + import traceback + err_details = traceback.format_exc(exc) + print err_details + url = wbrequest.wb_url.url msg = 'Could not load the url from the live web: ' + url raise LiveResourceException(msg=msg, url=url) @@ -53,8 +57,8 @@ class RewriteHandler(SearchPageWbUrlHandler): if ref_wburl_str: wbrequest.env['REL_REFERER'] = WbUrl(ref_wburl_str).url - wb_url = wbrequest.wb_url - result = self.rewriter.fetch_request(wb_url, wbrequest.urlrewriter, + result = self.rewriter.fetch_request(wbrequest.wb_url.url, + wbrequest.urlrewriter, head_insert_func=head_insert_func, req_headers=req_headers, env=wbrequest.env) diff --git a/pywb/webapp/replay_views.py b/pywb/webapp/replay_views.py index 5002a18d..9f32ad5d 100644 --- a/pywb/webapp/replay_views.py +++ b/pywb/webapp/replay_views.py @@ -130,8 +130,7 @@ class ReplayView(object): create_insert_func(wbrequest)) result = (self.content_rewriter. - rewrite_content(wbrequest.wb_url, - urlrewriter, + rewrite_content(urlrewriter, headers=status_headers, stream=stream, head_insert_func=head_insert_func, From e1e8f679b26ef9e215964a451426fb45d9ab7ebe Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 4 Aug 2014 21:59:46 -0700 Subject: [PATCH 3/3] rewrite/testing: add additional test for live rewrite post, invalid post htmlrewrite: annotate untestable sections (unimplemented, 2.6 only exceptions) --- pywb/framework/wbrequestresponse.py | 7 ------- pywb/rewrite/html_rewriter.py | 27 ++++++++++++--------------- pywb/rewrite/rewrite_live.py | 4 ++-- pywb/utils/test/test_loaders.py | 8 ++++++-- tests/test_integration.py | 6 +++++- tests/test_live_rewriter.py | 7 +++++++ 6 files changed, 32 insertions(+), 27 deletions(-) diff --git a/pywb/framework/wbrequestresponse.py b/pywb/framework/wbrequestresponse.py index da456474..a6f1908b 100644 --- a/pywb/framework/wbrequestresponse.py +++ b/pywb/framework/wbrequestresponse.py @@ -179,13 +179,6 @@ class WbResponse(object): return WbResponse(StatusAndHeaders(status, redir_headers)) def __call__(self, env, start_response): - - # PERF - perfstats = env.get('X_PERF') - if perfstats: - self.status_headers.headers.append(('X-Archive-Perf-Stats', - str(perfstats))) - start_response(self.status_headers.statusline, self.status_headers.headers) diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py index 08b1e997..5a316016 100644 --- a/pywb/rewrite/html_rewriter.py +++ b/pywb/rewrite/html_rewriter.py @@ -101,12 +101,9 @@ class HTMLRewriterMixin(object): if not m: return meta_refresh - try: - meta_refresh = (meta_refresh[:m.start(1)] + - self._rewrite_url(m.group(1)) + - meta_refresh[m.end(1):]) - except Exception: - pass + meta_refresh = (meta_refresh[:m.start(1)] + + self._rewrite_url(m.group(1)) + + meta_refresh[m.end(1):]) return meta_refresh # =========================== @@ -136,7 +133,7 @@ class HTMLRewriterMixin(object): return value.lower() == attr_value.lower() return False - def _rewrite_tag_attrs(self, tag, tag_attrs, escape=False): + def _rewrite_tag_attrs(self, tag, tag_attrs): # special case: script or style parse context if ((tag in self.STATE_TAGS) and not self._wb_parse_context): self._wb_parse_context = tag @@ -197,7 +194,7 @@ class HTMLRewriterMixin(object): rebase_rewriter(attr_value)) # write the attr! - self._write_attr(attr_name, attr_value, escape=escape) + self._write_attr(attr_name, attr_value) return True @@ -217,12 +214,10 @@ class HTMLRewriterMixin(object): return True - def _write_attr(self, name, value, escape=False): + def _write_attr(self, name, value): # parser doesn't differentiate between 'attr=""' and just 'attr' # 'attr=""' is more common, so use that form if value: - if escape: - value = cgi.escape(value, quote=True) self.out.write(' ' + name + '="' + value + '"') else: self.out.write(' ' + name + '=""') @@ -259,8 +254,8 @@ class HTMLRewriterMixin(object): return result - def _internal_close(self): - pass + def _internal_close(self): # pragma: no cover + raise NotImplementedError('Base method') #================================================================= @@ -272,7 +267,8 @@ class HTMLRewriter(HTMLRewriterMixin, HTMLParser): def feed(self, string): try: HTMLParser.feed(self, string) - except HTMLParseError: + except HTMLParseError: # pragma: no cover + # only raised in 2.6 self.out.write(string) def _internal_close(self): @@ -283,7 +279,8 @@ class HTMLRewriter(HTMLRewriterMixin, HTMLParser): try: HTMLParser.close(self) - except HTMLParseError: + except HTMLParseError: # pragma: no cover + # only raised in 2.6 pass # called to unescape attrs -- do not unescape! diff --git a/pywb/rewrite/rewrite_live.py b/pywb/rewrite/rewrite_live.py index 41313738..be891498 100644 --- a/pywb/rewrite/rewrite_live.py +++ b/pywb/rewrite/rewrite_live.py @@ -130,14 +130,14 @@ class LiveRewriter(object): ts_err = url.split('///') + # fixup for accidental erroneous rewrite which has /// + # (unless file:///) if len(ts_err) > 1 and ts_err[0] != 'file:': url = 'http://' + ts_err[1] if url.startswith('//'): url = 'http:' + url - print 'URL ', url - if is_http(url): (status_headers, stream) = self.fetch_http(url, env, req_headers, follow_redirects, diff --git a/pywb/utils/test/test_loaders.py b/pywb/utils/test/test_loaders.py index b64f2419..322b9169 100644 --- a/pywb/utils/test/test_loaders.py +++ b/pywb/utils/test/test_loaders.py @@ -1,5 +1,5 @@ #================================================================= -""" +r""" # LimitReader Tests >>> LimitReader(BytesIO('abcdefghjiklmnopqrstuvwxyz'), 10).read(26) 'abcdefghji' @@ -32,10 +32,14 @@ True >>> BlockLoader(HMACCookieMaker('test', 'test', 5)).load('http://example.com', 41, 14).read() 'Example Domain' -# fixed cookie +# fixed cookie, range request >>> BlockLoader('some=value').load('http://example.com', 41, 14).read() 'Example Domain' +# range request +>>> BlockLoader().load('http://example.com', 1262).read() +'\n' + # test with extra id, ensure 4 parts of the A-B=C-D form are present >>> len(re.split('[-=]', HMACCookieMaker('test', 'test', 5).make('extra'))) 4 diff --git a/tests/test_integration.py b/tests/test_integration.py index 92c06323..8c9ee900 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -301,6 +301,11 @@ class TestWb: assert resp.status_int == 200 assert '"data": "^"' in resp.body + def test_post_invalid(self): + # not json + resp = self.testapp.post_json('/pywb/20140610001255mp_/http://httpbin.org/post?foo=bar', {'data': '^'}, status=404) + assert resp.status_int == 404 + def test_post_redirect(self): # post handled without redirect (since 307 not allowed) resp = self.testapp.post('/post', {'foo': 'bar', 'test': 'abc'}, headers=[('Referer', 'http://localhost:8080/pywb/2014mp_/http://httpbin.org/post')]) @@ -308,7 +313,6 @@ class TestWb: assert '"foo": "bar"' in resp.body assert '"test": "abc"' in resp.body - def test_excluded_content(self): resp = self.testapp.get('/pywb/mp_/http://www.iana.org/_img/bookmark_icon.ico', status = 403) assert resp.status_int == 403 diff --git a/tests/test_live_rewriter.py b/tests/test_live_rewriter.py index 5ce19414..331eaa69 100644 --- a/tests/test_live_rewriter.py +++ b/tests/test_live_rewriter.py @@ -17,6 +17,13 @@ class TestLiveRewriter: resp = self.testapp.get('/rewrite/mp_/http://facebook.com/') assert resp.status_int == 301 + def test_live_rewrite_post(self): + resp = self.testapp.post('/rewrite/mp_/httpbin.org/post', {'foo': 'bar', 'test': 'abc'}) + assert resp.status_int == 200 + assert '"foo": "bar"' in resp.body + assert '"test": "abc"' in resp.body + assert resp.status_int == 200 + def test_live_rewrite_frame(self): resp = self.testapp.get('/rewrite/http://example.com/') assert resp.status_int == 200