1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 15:09:54 +01:00

pep8 pass: fix spacing, line length, issues

also remove references to obsolete cached_replay, hostnames in pywb_init
This commit is contained in:
Ilya Kreymer 2014-12-23 15:14:03 -08:00
parent 51919ed1e7
commit 181c18a1b8
23 changed files with 75 additions and 79 deletions

View File

@ -106,7 +106,6 @@ class FuzzyQuery:
if inx > 0:
url = url[:inx + 1]
if matched_rule.match_type == 'domain':
host = urlparse.urlsplit(url).netloc
# remove the subdomain
@ -174,8 +173,8 @@ class CDXDomainSpecificRule(BaseRule):
@staticmethod
def make_query_match_regex(params_list):
r"""
>>> CDXDomainSpecificRule.make_query_match_regex(['param1', 'id', 'abc'])
'[?&](abc=[^&]+).*[?&](id=[^&]+).*[?&](param1=[^&]+)'
>>> CDXDomainSpecificRule.make_query_match_regex(['para', 'id', 'abc'])
'[?&](abc=[^&]+).*[?&](id=[^&]+).*[?&](para=[^&]+)'
>>> CDXDomainSpecificRule.make_query_match_regex(['id[0]', 'abc()'])
'[?&](abc\\(\\)=[^&]+).*[?&](id\\[0\\]=[^&]+)'

View File

@ -169,8 +169,8 @@ def cdx_filter(cdx_iter, filter_strings):
# no field set, apply filter to entire cdx
if len(parts) == 1:
self.field = ''
else:
# apply filter to cdx[field]
else:
self.field = parts[0]
string = parts[1]

View File

@ -307,7 +307,7 @@ class ProxyRouter(object):
name = name.replace('-', '_').upper()
if not name in ('CONTENT_LENGTH', 'CONTENT_TYPE'):
if name not in ('CONTENT_LENGTH', 'CONTENT_TYPE'):
name = 'HTTP_' + name
env[name] = value

View File

@ -83,8 +83,8 @@ class WbRequest(object):
rewrite_opts)
self.urlrewriter.deprefix_url()
else:
# no wb_url, just store blank wb_url
else:
self.wb_url = None
self.urlrewriter = None
@ -113,6 +113,7 @@ class WbRequest(object):
return False
RANGE_ARG_RX = re.compile('.*.googlevideo.com/videoplayback.*([&?]range=(\d+)-(\d+))')
RANGE_HEADER = re.compile('bytes=(\d+)-(\d+)?')
def extract_range(self):

View File

@ -73,6 +73,8 @@ class ExactPathCookieRewriter(WbUrlBaseCookieRewriter):
self._remove_age_opts(morsel)
return morsel
#=================================================================
class RootScopeCookieRewriter(WbUrlBaseCookieRewriter):
"""

View File

@ -33,7 +33,8 @@ class HeaderRewriter:
'xml': ['/xml', '+xml', '.xml', '.rss'],
}
PROXY_HEADERS = ['content-type', 'content-disposition', 'content-range', 'accept-ranges']
PROXY_HEADERS = ['content-type', 'content-disposition', 'content-range',
'accept-ranges']
URL_REWRITE_HEADERS = ['location', 'content-location', 'content-base']

View File

@ -123,7 +123,6 @@ class JSLinkRewriterMixin(object):
#=================================================================
class JSLocationRewriterMixin(object):
#class JSLinkAndLocationRewriter(JSLinkOnlyRewriter):
"""
JS Rewriter mixin which rewrites location and domain to the
specified prefix (default: 'WB_wombat_')
@ -161,6 +160,7 @@ class JSLocationOnlyRewriter(JSLocationRewriterMixin, RegexRewriter):
class JSLinkOnlyRewriter(JSLinkRewriterMixin, RegexRewriter):
pass
#=================================================================
class JSLinkAndLocationRewriter(JSLocationRewriterMixin,
JSLinkRewriterMixin,

View File

@ -1,7 +1,8 @@
from pywb.utils.dsrules import BaseRule
from regex_rewriters import RegexRewriter, CSSRewriter, XMLRewriter
from regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter, JSLocationOnlyRewriter
from regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter
from regex_rewriters import JSLocationOnlyRewriter
from header_rewriter import HeaderRewriter
from html_rewriter import HTMLRewriter

View File

@ -41,6 +41,7 @@ wayback url format.
import re
import urllib
#=================================================================
class BaseWbUrl(object):
QUERY = 'query'
@ -107,7 +108,8 @@ class WbUrl(BaseWbUrl):
m = self.PARTIAL_ENC_RX.match(self.url)
if m:
len_ = len(m.group(0))
self.url = urllib.unquote_plus(self.url[:len_]) + self.url[len_:]
self.url = (urllib.unquote_plus(self.url[:len_]) +
self.url[len_:])
inx = self.url.find(':/')
if inx < 0:
@ -160,7 +162,6 @@ class WbUrl(BaseWbUrl):
self.timestamp = timestamp
self.type = self.REPLAY
def deprefix_url(self, prefix):
prefix = urllib.quote_plus(prefix)
rex_query = '=' + re.escape(prefix) + '([0-9])*([\w]{2}_)?/?'

View File

@ -173,7 +173,7 @@ def calc_search_range(url, match_type, surt_ordered=True, url_canon=None):
# if tld, use com, as start_key
# otherwise, stick with com,example)/
if not ',' in host:
if ',' not in host:
start_key = host + ','
else:
start_key = host + ')/'

View File

@ -42,7 +42,7 @@ class CDXAPIHandler(BaseHandler):
if name != 'filter':
params[name] = val[0]
if not 'output' in params:
if 'output' not in params:
params['output'] = 'text'
elif params['output'] not in ('text'):
params['output'] = 'text'

View File

@ -16,7 +16,6 @@ from pywb.warc.resolvingloader import ResolvingLoader
from views import J2TemplateView
from replay_views import ReplayView
from cached_replay import CachedReplayView
from pywb.framework.memento import MementoResponse
from pywb.utils.timeutils import datetime_to_timestamp

View File

@ -89,7 +89,8 @@ class RewriteHandler(SearchPageWbUrlHandler):
if rangeres:
url, start, end, use_206 = rangeres
# if bytes=0- Range request, simply remove the range and still proxy
# if bytes=0- Range request,
# simply remove the range and still proxy
if start == 0 and not end and use_206:
wbrequest.wb_url.url = url
del wbrequest.env['HTTP_RANGE']
@ -111,10 +112,12 @@ class RewriteHandler(SearchPageWbUrlHandler):
wbresponse = self._make_response(wbrequest, *result)
if readd_range:
content_length = wbresponse.status_headers.get_header('Content-Length')
content_length = (wbresponse.status_headers.
get_header('Content-Length'))
try:
content_length = int(content_length)
wbresponse.status_headers.add_range(0, content_length, content_length)
wbresponse.status_headers.add_range(0, content_length,
content_length)
except (ValueError, TypeError):
pass
@ -165,7 +168,8 @@ class RewriteHandler(SearchPageWbUrlHandler):
verify=False,
stream=True)
# don't actually read whole response, proxy response for writing it
# don't actually read whole response,
# proxy response for writing it
resp.close()
except:
del self._cache[key]
@ -176,6 +180,7 @@ class RewriteHandler(SearchPageWbUrlHandler):
resp = self.get_video_info(wbrequest,
info_url=referrer,
video_url=url)
def wrap_buff_gen(gen):
for x in gen:
yield x

View File

@ -24,7 +24,6 @@ import logging
#=================================================================
DEFAULTS = {
'hostpaths': ['http://localhost:8080'],
'collections': {'pywb': './sample_archive/cdx/'},
'archive_paths': './sample_archive/warcs/',
@ -153,13 +152,6 @@ def create_wb_router(passed_config={}):
routes = []
# TODO: examine this more
hostname = os.environ.get('PYWB_HOST_NAME')
if hostname:
hostpaths = [hostname]
else:
hostpaths = config.get('hostpaths')
port = config.get('port')
# collections based on cdx source
@ -241,7 +233,7 @@ def create_wb_router(passed_config={}):
config.get('proxy_select_html'),
'Proxy Coll Selector')
if not 'proxy_options' in passed_config:
if 'proxy_options' not in passed_config:
passed_config['proxy_options'] = {}
if view:
@ -257,11 +249,6 @@ def create_wb_router(passed_config={}):
# Finally, create wb router
return router(
routes,
# Specify hostnames that pywb will be running on
# This will help catch occasionally missed rewrites that
# fall-through to the host
# (See archivalrouter.ReferRedirect)
hostpaths=hostpaths,
port=port,
abs_path=config.get('absolute_paths', True),

View File

@ -28,7 +28,7 @@ class RangeCache(object):
url, start, end, use_206):
key = digest
if not key in self.cache:
if key not in self.cache:
wbrequest.custom_params['noredir'] = True
response = wbresponse_func()

View File

@ -230,7 +230,7 @@ class ReplayView(object):
if wbrequest.method == 'POST':
# FF shows a confirm dialog, so can't use 307 effectively
# statusline = '307 Same-Method Internal Redirect'
# was: statusline = '307 Same-Method Internal Redirect'
return None
else:
statusline = '302 Internal Redirect'