From 181c18a1b8792575e180afa1193e1379e494b55a Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Tue, 23 Dec 2014 15:14:03 -0800 Subject: [PATCH] pep8 pass: fix spacing, line length, issues also remove references to obsolete cached_replay, hostnames in pywb_init --- pywb/cdx/cdxdomainspecific.py | 5 ++-- pywb/cdx/cdxobject.py | 2 +- pywb/cdx/cdxops.py | 2 +- pywb/framework/certauth.py | 2 +- pywb/framework/proxy.py | 4 +-- pywb/framework/wbrequestresponse.py | 3 ++- pywb/rewrite/cookie_rewriter.py | 2 ++ pywb/rewrite/header_rewriter.py | 3 ++- pywb/rewrite/html_rewriter.py | 4 +-- pywb/rewrite/regex_rewriters.py | 40 ++++++++++++++--------------- pywb/rewrite/rewrite_content.py | 2 +- pywb/rewrite/rewriterules.py | 3 ++- pywb/rewrite/url_rewriter.py | 8 +++--- pywb/rewrite/wburl.py | 5 ++-- pywb/utils/canonicalize.py | 2 +- pywb/warc/archiveiterator.py | 8 +++--- pywb/webapp/cdx_api_handler.py | 2 +- pywb/webapp/handlers.py | 9 +++---- pywb/webapp/live_rewrite_handler.py | 13 +++++++--- pywb/webapp/pywb_init.py | 23 ++++------------- pywb/webapp/query_handler.py | 2 +- pywb/webapp/rangecache.py | 4 +-- pywb/webapp/replay_views.py | 6 ++--- 23 files changed, 75 insertions(+), 79 deletions(-) diff --git a/pywb/cdx/cdxdomainspecific.py b/pywb/cdx/cdxdomainspecific.py index 4609c454..3fb55862 100644 --- a/pywb/cdx/cdxdomainspecific.py +++ b/pywb/cdx/cdxdomainspecific.py @@ -106,7 +106,6 @@ class FuzzyQuery: if inx > 0: url = url[:inx + 1] - if matched_rule.match_type == 'domain': host = urlparse.urlsplit(url).netloc # remove the subdomain @@ -174,8 +173,8 @@ class CDXDomainSpecificRule(BaseRule): @staticmethod def make_query_match_regex(params_list): r""" - >>> CDXDomainSpecificRule.make_query_match_regex(['param1', 'id', 'abc']) - '[?&](abc=[^&]+).*[?&](id=[^&]+).*[?&](param1=[^&]+)' + >>> CDXDomainSpecificRule.make_query_match_regex(['para', 'id', 'abc']) + '[?&](abc=[^&]+).*[?&](id=[^&]+).*[?&](para=[^&]+)' >>> CDXDomainSpecificRule.make_query_match_regex(['id[0]', 'abc()']) '[?&](abc\\(\\)=[^&]+).*[?&](id\\[0\\]=[^&]+)' diff --git a/pywb/cdx/cdxobject.py b/pywb/cdx/cdxobject.py index 7a190be4..89a40be0 100644 --- a/pywb/cdx/cdxobject.py +++ b/pywb/cdx/cdxobject.py @@ -44,7 +44,7 @@ class CDXObject(OrderedDict): ["urlkey", "timestamp", "original", "mimetype", "statuscode", "digest", "redirect", "offset", "filename", "orig.length", "orig.offset", "orig.filename"] - ] + ] def __init__(self, cdxline=''): OrderedDict.__init__(self) diff --git a/pywb/cdx/cdxops.py b/pywb/cdx/cdxops.py index 46aae478..4aa4fc17 100644 --- a/pywb/cdx/cdxops.py +++ b/pywb/cdx/cdxops.py @@ -169,8 +169,8 @@ def cdx_filter(cdx_iter, filter_strings): # no field set, apply filter to entire cdx if len(parts) == 1: self.field = '' - else: # apply filter to cdx[field] + else: self.field = parts[0] string = parts[1] diff --git a/pywb/framework/certauth.py b/pywb/framework/certauth.py index ea5df6eb..b9f70b06 100644 --- a/pywb/framework/certauth.py +++ b/pywb/framework/certauth.py @@ -194,7 +194,7 @@ def main(args=None): help=('use specified root cert (.pem file) ' + 'to create signed cert')) - parser.add_argument('-n', '--name', action='store', default=CERT_NAME, + parser.add_argument('-n', '--name', action='store', default=CERT_NAME, help='name for root certificate') parser.add_argument('-d', '--certs-dir', default=CERTS_DIR) diff --git a/pywb/framework/proxy.py b/pywb/framework/proxy.py index 471794fe..57a081e8 100644 --- a/pywb/framework/proxy.py +++ b/pywb/framework/proxy.py @@ -159,7 +159,7 @@ class ProxyRouter(object): if env['pywb.proxy_host'] == self.magic_name: env['REL_REQUEST_URI'] = env['pywb.proxy_req_uri'] - # special case for proxy install + # special case for proxy install response = self.handle_cert_install(env) if response: return response @@ -307,7 +307,7 @@ class ProxyRouter(object): name = name.replace('-', '_').upper() - if not name in ('CONTENT_LENGTH', 'CONTENT_TYPE'): + if name not in ('CONTENT_LENGTH', 'CONTENT_TYPE'): name = 'HTTP_' + name env[name] = value diff --git a/pywb/framework/wbrequestresponse.py b/pywb/framework/wbrequestresponse.py index d4990217..12182362 100644 --- a/pywb/framework/wbrequestresponse.py +++ b/pywb/framework/wbrequestresponse.py @@ -83,8 +83,8 @@ class WbRequest(object): rewrite_opts) self.urlrewriter.deprefix_url() - else: # no wb_url, just store blank wb_url + else: self.wb_url = None self.urlrewriter = None @@ -113,6 +113,7 @@ class WbRequest(object): return False RANGE_ARG_RX = re.compile('.*.googlevideo.com/videoplayback.*([&?]range=(\d+)-(\d+))') + RANGE_HEADER = re.compile('bytes=(\d+)-(\d+)?') def extract_range(self): diff --git a/pywb/rewrite/cookie_rewriter.py b/pywb/rewrite/cookie_rewriter.py index e9dd80ac..63db9a93 100644 --- a/pywb/rewrite/cookie_rewriter.py +++ b/pywb/rewrite/cookie_rewriter.py @@ -73,6 +73,8 @@ class ExactPathCookieRewriter(WbUrlBaseCookieRewriter): self._remove_age_opts(morsel) return morsel + + #================================================================= class RootScopeCookieRewriter(WbUrlBaseCookieRewriter): """ diff --git a/pywb/rewrite/header_rewriter.py b/pywb/rewrite/header_rewriter.py index a9b78b66..c609fba4 100644 --- a/pywb/rewrite/header_rewriter.py +++ b/pywb/rewrite/header_rewriter.py @@ -33,7 +33,8 @@ class HeaderRewriter: 'xml': ['/xml', '+xml', '.xml', '.rss'], } - PROXY_HEADERS = ['content-type', 'content-disposition', 'content-range', 'accept-ranges'] + PROXY_HEADERS = ['content-type', 'content-disposition', 'content-range', + 'accept-ranges'] URL_REWRITE_HEADERS = ['location', 'content-location', 'content-base'] diff --git a/pywb/rewrite/html_rewriter.py b/pywb/rewrite/html_rewriter.py index ab923891..878b0dcc 100644 --- a/pywb/rewrite/html_rewriter.py +++ b/pywb/rewrite/html_rewriter.py @@ -172,7 +172,7 @@ class HTMLRewriterMixin(object): # special case: inline JS/event handler if ((attr_value and attr_value.startswith('javascript:')) - or attr_name.startswith('on')): + or attr_name.startswith('on')): attr_value = self._rewrite_script(attr_value) # special case: inline CSS/style attribute @@ -193,7 +193,7 @@ class HTMLRewriterMixin(object): # don't rewrite rel=canonical elif tag == 'link' and attr_name == 'href': if (self.opts.get('rewrite_rel_canon', True) or - not self.has_attr(tag_attrs, ('rel', 'canonical'))): + not self.has_attr(tag_attrs, ('rel', 'canonical'))): rw_mod = handler.get(attr_name) attr_value = self._rewrite_url(attr_value, rw_mod) diff --git a/pywb/rewrite/regex_rewriters.py b/pywb/rewrite/regex_rewriters.py index 5d680068..2a194976 100644 --- a/pywb/rewrite/regex_rewriters.py +++ b/pywb/rewrite/regex_rewriters.py @@ -123,7 +123,6 @@ class JSLinkRewriterMixin(object): #================================================================= class JSLocationRewriterMixin(object): -#class JSLinkAndLocationRewriter(JSLinkOnlyRewriter): """ JS Rewriter mixin which rewrites location and domain to the specified prefix (default: 'WB_wombat_') @@ -131,23 +130,23 @@ class JSLocationRewriterMixin(object): def __init__(self, rewriter, rules=[], prefix='WB_wombat_'): rules = rules + [ - (r'(?