mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 15:09:54 +01:00
pep8 pass: fix spacing, line length, issues
also remove references to obsolete cached_replay, hostnames in pywb_init
This commit is contained in:
parent
51919ed1e7
commit
181c18a1b8
@ -106,7 +106,6 @@ class FuzzyQuery:
|
||||
if inx > 0:
|
||||
url = url[:inx + 1]
|
||||
|
||||
|
||||
if matched_rule.match_type == 'domain':
|
||||
host = urlparse.urlsplit(url).netloc
|
||||
# remove the subdomain
|
||||
@ -174,8 +173,8 @@ class CDXDomainSpecificRule(BaseRule):
|
||||
@staticmethod
|
||||
def make_query_match_regex(params_list):
|
||||
r"""
|
||||
>>> CDXDomainSpecificRule.make_query_match_regex(['param1', 'id', 'abc'])
|
||||
'[?&](abc=[^&]+).*[?&](id=[^&]+).*[?&](param1=[^&]+)'
|
||||
>>> CDXDomainSpecificRule.make_query_match_regex(['para', 'id', 'abc'])
|
||||
'[?&](abc=[^&]+).*[?&](id=[^&]+).*[?&](para=[^&]+)'
|
||||
|
||||
>>> CDXDomainSpecificRule.make_query_match_regex(['id[0]', 'abc()'])
|
||||
'[?&](abc\\(\\)=[^&]+).*[?&](id\\[0\\]=[^&]+)'
|
||||
|
@ -169,8 +169,8 @@ def cdx_filter(cdx_iter, filter_strings):
|
||||
# no field set, apply filter to entire cdx
|
||||
if len(parts) == 1:
|
||||
self.field = ''
|
||||
else:
|
||||
# apply filter to cdx[field]
|
||||
else:
|
||||
self.field = parts[0]
|
||||
string = parts[1]
|
||||
|
||||
|
@ -307,7 +307,7 @@ class ProxyRouter(object):
|
||||
|
||||
name = name.replace('-', '_').upper()
|
||||
|
||||
if not name in ('CONTENT_LENGTH', 'CONTENT_TYPE'):
|
||||
if name not in ('CONTENT_LENGTH', 'CONTENT_TYPE'):
|
||||
name = 'HTTP_' + name
|
||||
|
||||
env[name] = value
|
||||
|
@ -83,8 +83,8 @@ class WbRequest(object):
|
||||
rewrite_opts)
|
||||
|
||||
self.urlrewriter.deprefix_url()
|
||||
else:
|
||||
# no wb_url, just store blank wb_url
|
||||
else:
|
||||
self.wb_url = None
|
||||
self.urlrewriter = None
|
||||
|
||||
@ -113,6 +113,7 @@ class WbRequest(object):
|
||||
return False
|
||||
|
||||
RANGE_ARG_RX = re.compile('.*.googlevideo.com/videoplayback.*([&?]range=(\d+)-(\d+))')
|
||||
|
||||
RANGE_HEADER = re.compile('bytes=(\d+)-(\d+)?')
|
||||
|
||||
def extract_range(self):
|
||||
|
@ -73,6 +73,8 @@ class ExactPathCookieRewriter(WbUrlBaseCookieRewriter):
|
||||
|
||||
self._remove_age_opts(morsel)
|
||||
return morsel
|
||||
|
||||
|
||||
#=================================================================
|
||||
class RootScopeCookieRewriter(WbUrlBaseCookieRewriter):
|
||||
"""
|
||||
|
@ -33,7 +33,8 @@ class HeaderRewriter:
|
||||
'xml': ['/xml', '+xml', '.xml', '.rss'],
|
||||
}
|
||||
|
||||
PROXY_HEADERS = ['content-type', 'content-disposition', 'content-range', 'accept-ranges']
|
||||
PROXY_HEADERS = ['content-type', 'content-disposition', 'content-range',
|
||||
'accept-ranges']
|
||||
|
||||
URL_REWRITE_HEADERS = ['location', 'content-location', 'content-base']
|
||||
|
||||
|
@ -123,7 +123,6 @@ class JSLinkRewriterMixin(object):
|
||||
|
||||
#=================================================================
|
||||
class JSLocationRewriterMixin(object):
|
||||
#class JSLinkAndLocationRewriter(JSLinkOnlyRewriter):
|
||||
"""
|
||||
JS Rewriter mixin which rewrites location and domain to the
|
||||
specified prefix (default: 'WB_wombat_')
|
||||
@ -161,6 +160,7 @@ class JSLocationOnlyRewriter(JSLocationRewriterMixin, RegexRewriter):
|
||||
class JSLinkOnlyRewriter(JSLinkRewriterMixin, RegexRewriter):
|
||||
pass
|
||||
|
||||
|
||||
#=================================================================
|
||||
class JSLinkAndLocationRewriter(JSLocationRewriterMixin,
|
||||
JSLinkRewriterMixin,
|
||||
|
@ -1,7 +1,8 @@
|
||||
from pywb.utils.dsrules import BaseRule
|
||||
|
||||
from regex_rewriters import RegexRewriter, CSSRewriter, XMLRewriter
|
||||
from regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter, JSLocationOnlyRewriter
|
||||
from regex_rewriters import JSLinkAndLocationRewriter, JSLinkOnlyRewriter
|
||||
from regex_rewriters import JSLocationOnlyRewriter
|
||||
|
||||
from header_rewriter import HeaderRewriter
|
||||
from html_rewriter import HTMLRewriter
|
||||
|
@ -41,6 +41,7 @@ wayback url format.
|
||||
import re
|
||||
import urllib
|
||||
|
||||
|
||||
#=================================================================
|
||||
class BaseWbUrl(object):
|
||||
QUERY = 'query'
|
||||
@ -107,7 +108,8 @@ class WbUrl(BaseWbUrl):
|
||||
m = self.PARTIAL_ENC_RX.match(self.url)
|
||||
if m:
|
||||
len_ = len(m.group(0))
|
||||
self.url = urllib.unquote_plus(self.url[:len_]) + self.url[len_:]
|
||||
self.url = (urllib.unquote_plus(self.url[:len_]) +
|
||||
self.url[len_:])
|
||||
inx = self.url.find(':/')
|
||||
|
||||
if inx < 0:
|
||||
@ -160,7 +162,6 @@ class WbUrl(BaseWbUrl):
|
||||
self.timestamp = timestamp
|
||||
self.type = self.REPLAY
|
||||
|
||||
|
||||
def deprefix_url(self, prefix):
|
||||
prefix = urllib.quote_plus(prefix)
|
||||
rex_query = '=' + re.escape(prefix) + '([0-9])*([\w]{2}_)?/?'
|
||||
|
@ -173,7 +173,7 @@ def calc_search_range(url, match_type, surt_ordered=True, url_canon=None):
|
||||
|
||||
# if tld, use com, as start_key
|
||||
# otherwise, stick with com,example)/
|
||||
if not ',' in host:
|
||||
if ',' not in host:
|
||||
start_key = host + ','
|
||||
else:
|
||||
start_key = host + ')/'
|
||||
|
@ -42,7 +42,7 @@ class CDXAPIHandler(BaseHandler):
|
||||
if name != 'filter':
|
||||
params[name] = val[0]
|
||||
|
||||
if not 'output' in params:
|
||||
if 'output' not in params:
|
||||
params['output'] = 'text'
|
||||
elif params['output'] not in ('text'):
|
||||
params['output'] = 'text'
|
||||
|
@ -16,7 +16,6 @@ from pywb.warc.resolvingloader import ResolvingLoader
|
||||
|
||||
from views import J2TemplateView
|
||||
from replay_views import ReplayView
|
||||
from cached_replay import CachedReplayView
|
||||
from pywb.framework.memento import MementoResponse
|
||||
from pywb.utils.timeutils import datetime_to_timestamp
|
||||
|
||||
|
@ -89,7 +89,8 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
||||
if rangeres:
|
||||
url, start, end, use_206 = rangeres
|
||||
|
||||
# if bytes=0- Range request, simply remove the range and still proxy
|
||||
# if bytes=0- Range request,
|
||||
# simply remove the range and still proxy
|
||||
if start == 0 and not end and use_206:
|
||||
wbrequest.wb_url.url = url
|
||||
del wbrequest.env['HTTP_RANGE']
|
||||
@ -111,10 +112,12 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
||||
wbresponse = self._make_response(wbrequest, *result)
|
||||
|
||||
if readd_range:
|
||||
content_length = wbresponse.status_headers.get_header('Content-Length')
|
||||
content_length = (wbresponse.status_headers.
|
||||
get_header('Content-Length'))
|
||||
try:
|
||||
content_length = int(content_length)
|
||||
wbresponse.status_headers.add_range(0, content_length, content_length)
|
||||
wbresponse.status_headers.add_range(0, content_length,
|
||||
content_length)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
@ -165,7 +168,8 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
||||
verify=False,
|
||||
stream=True)
|
||||
|
||||
# don't actually read whole response, proxy response for writing it
|
||||
# don't actually read whole response,
|
||||
# proxy response for writing it
|
||||
resp.close()
|
||||
except:
|
||||
del self._cache[key]
|
||||
@ -176,6 +180,7 @@ class RewriteHandler(SearchPageWbUrlHandler):
|
||||
resp = self.get_video_info(wbrequest,
|
||||
info_url=referrer,
|
||||
video_url=url)
|
||||
|
||||
def wrap_buff_gen(gen):
|
||||
for x in gen:
|
||||
yield x
|
||||
|
@ -24,7 +24,6 @@ import logging
|
||||
|
||||
#=================================================================
|
||||
DEFAULTS = {
|
||||
'hostpaths': ['http://localhost:8080'],
|
||||
'collections': {'pywb': './sample_archive/cdx/'},
|
||||
'archive_paths': './sample_archive/warcs/',
|
||||
|
||||
@ -153,13 +152,6 @@ def create_wb_router(passed_config={}):
|
||||
|
||||
routes = []
|
||||
|
||||
# TODO: examine this more
|
||||
hostname = os.environ.get('PYWB_HOST_NAME')
|
||||
if hostname:
|
||||
hostpaths = [hostname]
|
||||
else:
|
||||
hostpaths = config.get('hostpaths')
|
||||
|
||||
port = config.get('port')
|
||||
|
||||
# collections based on cdx source
|
||||
@ -241,7 +233,7 @@ def create_wb_router(passed_config={}):
|
||||
config.get('proxy_select_html'),
|
||||
'Proxy Coll Selector')
|
||||
|
||||
if not 'proxy_options' in passed_config:
|
||||
if 'proxy_options' not in passed_config:
|
||||
passed_config['proxy_options'] = {}
|
||||
|
||||
if view:
|
||||
@ -257,11 +249,6 @@ def create_wb_router(passed_config={}):
|
||||
# Finally, create wb router
|
||||
return router(
|
||||
routes,
|
||||
# Specify hostnames that pywb will be running on
|
||||
# This will help catch occasionally missed rewrites that
|
||||
# fall-through to the host
|
||||
# (See archivalrouter.ReferRedirect)
|
||||
hostpaths=hostpaths,
|
||||
port=port,
|
||||
|
||||
abs_path=config.get('absolute_paths', True),
|
||||
|
@ -28,7 +28,7 @@ class RangeCache(object):
|
||||
url, start, end, use_206):
|
||||
|
||||
key = digest
|
||||
if not key in self.cache:
|
||||
if key not in self.cache:
|
||||
wbrequest.custom_params['noredir'] = True
|
||||
response = wbresponse_func()
|
||||
|
||||
|
@ -230,7 +230,7 @@ class ReplayView(object):
|
||||
|
||||
if wbrequest.method == 'POST':
|
||||
# FF shows a confirm dialog, so can't use 307 effectively
|
||||
# statusline = '307 Same-Method Internal Redirect'
|
||||
# was: statusline = '307 Same-Method Internal Redirect'
|
||||
return None
|
||||
else:
|
||||
statusline = '302 Internal Redirect'
|
||||
|
Loading…
x
Reference in New Issue
Block a user