From ddcde369825c3cfb66164905007191bf09fef9e3 Mon Sep 17 00:00:00 2001 From: Vangelis Banos Date: Thu, 2 May 2019 07:29:27 +0000 Subject: [PATCH] Increase urllib parse cache size In python2/3, urllib parse caches in memory URL parsing results to avoid repeating the process for the same URL. The problem is that the default in memory cache size is just 20. https://github.com/python/cpython/blob/3.7/Lib/urllib/parse.py#L80 Since we do a lot of URL parsing, it makes sense to increase cache size. --- warcprox/mitmproxy.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/warcprox/mitmproxy.py b/warcprox/mitmproxy.py index 705589e..51b80e9 100644 --- a/warcprox/mitmproxy.py +++ b/warcprox/mitmproxy.py @@ -35,6 +35,13 @@ try: import urllib.parse as urllib_parse except ImportError: import urlparse as urllib_parse +# In python2/3, urllib parse caches in memory URL parsing results to avoid +# repeating the process for the same URL. The problem is that the default +# in memory cache size is just 20. +# https://github.com/python/cpython/blob/3.7/Lib/urllib/parse.py#L80 +# since we do a lot of URL parsing, it makes sense to increase cache size. +urllib_parse.MAX_CACHE_SIZE = 2000 + try: import http.client as http_client # In python3 http.client.parse_headers() enforces http_client._MAXLINE