diff --git a/.dockerignore b/.dockerignore index d8391b70..0af42f51 100755 --- a/.dockerignore +++ b/.dockerignore @@ -1,8 +1,6 @@ build/ dist/ karma-tests/ -sample_archive/ -tests/ tests_disabled/ venv/ collections/ diff --git a/pywb/apps/frontendapp.py b/pywb/apps/frontendapp.py index 80469e5c..cba2dff6 100644 --- a/pywb/apps/frontendapp.py +++ b/pywb/apps/frontendapp.py @@ -17,7 +17,7 @@ from pywb.recorder.recorderapp import RecorderApp from pywb.utils.loaders import load_yaml_config from pywb.utils.geventserver import GeventServer from pywb.utils.io import StreamIter -from pywb.utils.wbexception import NotFoundException, WbException +from pywb.utils.wbexception import NotFoundException, WbException, AppPageNotFound from pywb.warcserver.warcserver import WarcServer @@ -646,13 +646,6 @@ class FrontEndApp(object): return response -# ============================================================================ -class AppPageNotFound(WbException): - @property - def status_code(self): - return 404 - - # ============================================================================ class MetadataCache(object): """This class holds the collection medata template string and diff --git a/pywb/apps/rewriterapp.py b/pywb/apps/rewriterapp.py index 27b9a874..8cc6730c 100644 --- a/pywb/apps/rewriterapp.py +++ b/pywb/apps/rewriterapp.py @@ -17,8 +17,8 @@ from pywb.rewrite.url_rewriter import IdentityUrlRewriter, UrlRewriter from pywb.rewrite.wburl import WbUrl from pywb.rewrite.url_rewriter import UrlRewriter, IdentityUrlRewriter -from pywb.utils.wbexception import WbException, NotFoundException from pywb.rewrite.cookies import CookieTracker +from pywb.utils.wbexception import WbException, NotFoundException, UpstreamException from pywb.utils.canonicalize import canonicalize from pywb.utils.io import BUFF_SIZE, OffsetLimitReader, no_except_close from pywb.utils.memento import MementoUtils diff --git a/pywb/utils/loaders.py b/pywb/utils/loaders.py index f2bac05d..6ca5b613 100644 --- a/pywb/utils/loaders.py +++ b/pywb/utils/loaders.py @@ -16,7 +16,6 @@ from six.moves.urllib.parse import unquote_plus, urlsplit, urlencode import time import pkgutil import base64 -import yaml import cgi import re diff --git a/pywb/utils/test/test_loaders.py b/pywb/utils/test/test_loaders.py index 9372f822..c830253d 100644 --- a/pywb/utils/test/test_loaders.py +++ b/pywb/utils/test/test_loaders.py @@ -81,6 +81,7 @@ from six import StringIO from io import BytesIO import requests import yaml +from yaml import Loader from pywb.utils.loaders import BlockLoader, HMACCookieMaker, to_file_url from pywb.utils.loaders import extract_client_cookie @@ -182,7 +183,7 @@ collection: other: ${PYWB_NOT}/archive/${PYWB_FOO} """ - config_data = yaml.load(config) + config_data = yaml.load(config, Loader=Loader) assert config_data['collection']['coll']['index'] == './test/index' assert config_data['collection']['coll']['archive'] == './test/archive/bar' diff --git a/pywb/utils/wbexception.py b/pywb/utils/wbexception.py index 40648bbf..228df9f2 100644 --- a/pywb/utils/wbexception.py +++ b/pywb/utils/wbexception.py @@ -50,3 +50,21 @@ class LiveResourceException(WbException): def status_code(self): return 400 + +# ============================================================================ +class UpstreamException(WbException): + def __init__(self, status_code, url, details): + super(UpstreamException, self).__init__(url=url, msg=details) + self._status_code = status_code + + @property + def status_code(self): + return self._status_code + + +# ============================================================================ +class AppPageNotFound(WbException): + @property + def status_code(self): + return 404 + diff --git a/pywb/warcserver/index/indexsource.py b/pywb/warcserver/index/indexsource.py index c7b91d04..40b8e750 100644 --- a/pywb/warcserver/index/indexsource.py +++ b/pywb/warcserver/index/indexsource.py @@ -246,7 +246,7 @@ class XmlQueryIndexSource(BaseIndexSource): try: #OpenSearch API requires double-escaping #TODO: add option to not double escape if needed - query_url = self.query_api_url + '?q' + quote_plus(query + quote_plus(url)) + query_url = self.query_api_url + '?q=' + quote_plus(query + quote_plus(url)) self.logger.debug("Running query: %s" % query_url) response = self.session.get(query_url) response.raise_for_status() diff --git a/pywb/warcserver/index/test/test_xmlquery_indexsource.py b/pywb/warcserver/index/test/test_xmlquery_indexsource.py index 3a3da4bb..77a05823 100644 --- a/pywb/warcserver/index/test/test_xmlquery_indexsource.py +++ b/pywb/warcserver/index/test/test_xmlquery_indexsource.py @@ -3,6 +3,8 @@ from pywb.warcserver.test.testutils import BaseTestClass, key_ts_res from pywb.warcserver.index.indexsource import XmlQueryIndexSource from pywb.warcserver.index.aggregator import SimpleAggregator +from six.moves.urllib.parse import quote_plus + from mock import patch import pytest @@ -10,14 +12,14 @@ import pytest # ============================================================================ def mock_get(self, url): string = '' - if 'type%3Aurlquery' in url: - if 'http%253A%252F%252Fexample.com%252Fsome%252Fpath' in url: + if quote_plus(XmlQueryIndexSource.EXACT_QUERY) in url: + if quote_plus(quote_plus('http://example.com/some/path')) in url: string = URL_RESPONSE_2 - elif 'http%253A%252F%252Fexample.com%252F' in url: + elif quote_plus(quote_plus('http://example.com/')) in url: string = URL_RESPONSE_1 - elif 'type%3Aprefixquery' in url: + elif quote_plus(XmlQueryIndexSource.PREFIX_QUERY) in url: string = PREFIX_QUERY class MockResponse(object):