From 4e7f95081f3a2894f47d2b5d7801cea0a75702a7 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Tue, 10 Feb 2015 15:05:15 -0800 Subject: [PATCH] url_rewriter: catch exception when encoding to utf-8, may not be properly encoded, in which case treat as bytes --- pywb/rewrite/url_rewriter.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pywb/rewrite/url_rewriter.py b/pywb/rewrite/url_rewriter.py index 77f0acc4..3af62819 100644 --- a/pywb/rewrite/url_rewriter.py +++ b/pywb/rewrite/url_rewriter.py @@ -75,7 +75,10 @@ class UrlRewriter(object): url=new_url, iri=not ascii_urls_only) if not ascii_urls_only: - final_url = final_url.encode('utf-8') + try: + final_url = final_url.encode('utf-8') + except UnicodeDecodeError: + pass return final_url