From 14a12f95b2219312a91ad211593eb6960f1aae63 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Fri, 14 Mar 2014 11:02:03 -0700 Subject: [PATCH] pep8 fixes, improve docs for proxy move CaptureException into replay_views --- pywb/core/replay_views.py | 14 ++++++++++++-- pywb/framework/proxy.py | 28 +++++++++++++++++++--------- pywb/framework/wbexceptions.py | 8 -------- pywb/framework/wbrequestresponse.py | 1 + pywb/perms/perms_filter.py | 1 + pywb/perms/perms_handler.py | 1 + pywb/utils/canonicalize.py | 1 + pywb/utils/loaders.py | 1 + 8 files changed, 36 insertions(+), 19 deletions(-) delete mode 100644 pywb/framework/wbexceptions.py diff --git a/pywb/core/replay_views.py b/pywb/core/replay_views.py index 4a6ab4e4..18feb45c 100644 --- a/pywb/core/replay_views.py +++ b/pywb/core/replay_views.py @@ -3,14 +3,24 @@ from io import BytesIO from pywb.utils.bufferedreaders import ChunkedDataReader from pywb.utils.statusandheaders import StatusAndHeaders +from pywb.utils.wbexception import WbException +from pywb.utils.loaders import LimitReader from pywb.framework.wbrequestresponse import WbResponse from pywb.framework.memento import MementoResponse -from pywb.framework.wbexceptions import CaptureException from pywb.warc.recordloader import ArchiveLoadFailed -from pywb.utils.loaders import LimitReader + +#================================================================= +class CaptureException(WbException): + """ + raised to indicate an issue with a specific capture + and will be caught and result in a retry, if possible + if not, will result in a 502 + """ + def status(self): + return '502 Internal Server Error' #================================================================= diff --git a/pywb/framework/proxy.py b/pywb/framework/proxy.py index 4b53b8b4..2ab0c9bc 100644 --- a/pywb/framework/proxy.py +++ b/pywb/framework/proxy.py @@ -4,12 +4,15 @@ import urlparse from pywb.rewrite.url_rewriter import HttpsUrlRewriter -#================================================================= -# An experimental router which combines both archival and proxy modes -# http proxy mode support is very simple so far: -# only latest capture is available currently + #================================================================= class ProxyArchivalRouter(ArchivalRouter): + """ + A router which combines both archival and proxy modes support + First, request is treated as a proxy request using ProxyRouter + Second, if not handled by the router, it is treated as a regular + archival mode request. + """ def __init__(self, routes, **kwargs): super(ProxyArchivalRouter, self).__init__(routes, **kwargs) request_class = routes[0].request_class @@ -28,11 +31,18 @@ class ProxyArchivalRouter(ArchivalRouter): #================================================================= -# Simple router which routes http proxy requests -# Handles requests of the form: GET http://example.com -# Only supports latest capture replay at the moment -#================================================================= -class ProxyRouter: +class ProxyRouter(object): + """ + A router which supports http proxy mode requests + Handles requests of the form: GET http://example.com + + The router returns latest capture by default. + However, if Memento protocol support is enabled, + the memento Accept-Datetime header can be used + to select specific capture. + See: http://www.mementoweb.org/guide/rfc/#Pattern1.3 + for more details. + """ def __init__(self, handler, **kwargs): self.handler = handler self.hostpaths = kwargs.get('hostpaths') diff --git a/pywb/framework/wbexceptions.py b/pywb/framework/wbexceptions.py deleted file mode 100644 index dc7621c6..00000000 --- a/pywb/framework/wbexceptions.py +++ /dev/null @@ -1,8 +0,0 @@ -from pywb.utils.wbexception import WbException - - -# Exceptions that effect a specific capture and result in a retry -class CaptureException(WbException): - def status(self): - return '502 Internal Server Error' - diff --git a/pywb/framework/wbrequestresponse.py b/pywb/framework/wbrequestresponse.py index 34912e5d..a234c76e 100644 --- a/pywb/framework/wbrequestresponse.py +++ b/pywb/framework/wbrequestresponse.py @@ -2,6 +2,7 @@ from pywb.utils.statusandheaders import StatusAndHeaders import pprint + #================================================================= class WbRequest(object): """ diff --git a/pywb/perms/perms_filter.py b/pywb/perms/perms_filter.py index 4d6c1200..26996e39 100644 --- a/pywb/perms/perms_filter.py +++ b/pywb/perms/perms_filter.py @@ -13,6 +13,7 @@ def make_perms_cdx_filter(perms_policy, wbrequest): return _create_cdx_perms_filter(perms_checker) + #================================================================= def _create_cdx_perms_filter(perms_checker): """ diff --git a/pywb/perms/perms_handler.py b/pywb/perms/perms_handler.py index 58f4a21b..4ebd79a6 100644 --- a/pywb/perms/perms_handler.py +++ b/pywb/perms/perms_handler.py @@ -11,6 +11,7 @@ RESPONSE_TYPE = 'application/json' NOT_FOUND = 'Please specify a url to check for access' + #================================================================= class PermsHandler(WbUrlHandler): diff --git a/pywb/utils/canonicalize.py b/pywb/utils/canonicalize.py index 79aed38f..f40fcf5d 100644 --- a/pywb/utils/canonicalize.py +++ b/pywb/utils/canonicalize.py @@ -20,6 +20,7 @@ class UrlCanonicalizer(object): class UrlCanonicalizeException(BadRequestException): pass + #================================================================= def canonicalize(url, surt_ordered=True): """ diff --git a/pywb/utils/loaders.py b/pywb/utils/loaders.py index ffb15c55..f2d358f5 100644 --- a/pywb/utils/loaders.py +++ b/pywb/utils/loaders.py @@ -10,6 +10,7 @@ import time import pkg_resources from io import open + #================================================================= def is_http(filename): return filename.startswith(('http://', 'https://'))