mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
refactor: split warcserver.utils into utils package:
- utils.io for stream/compression related utils - utils.format for string formatting - utils.memento for memento - load_config -> utils.loaders.load_overlay_config - also: use warcio.utils.to_native_str instead of utils.loaders.to_native_str
This commit is contained in:
parent
3bd682e3d3
commit
d12f715d81
@ -7,7 +7,9 @@ from werkzeug.wsgi import pop_path_info
|
|||||||
from six.moves.urllib.parse import urljoin
|
from six.moves.urllib.parse import urljoin
|
||||||
from six import iteritems
|
from six import iteritems
|
||||||
|
|
||||||
from pywb.utils.loaders import load_yaml_config, to_native_str
|
from warcio.utils import to_native_str
|
||||||
|
|
||||||
|
from pywb.utils.loaders import load_yaml_config
|
||||||
from pywb.utils.geventserver import GeventServer
|
from pywb.utils.geventserver import GeventServer
|
||||||
|
|
||||||
from pywb.warcserver.warcserver import WarcServer
|
from pywb.warcserver.warcserver import WarcServer
|
||||||
|
@ -14,6 +14,8 @@ from pywb.rewrite.url_rewriter import UrlRewriter, SchemeOnlyUrlRewriter
|
|||||||
from pywb.utils.wbexception import WbException
|
from pywb.utils.wbexception import WbException
|
||||||
from pywb.utils.canonicalize import canonicalize
|
from pywb.utils.canonicalize import canonicalize
|
||||||
from pywb.utils.loaders import extract_client_cookie
|
from pywb.utils.loaders import extract_client_cookie
|
||||||
|
from pywb.utils.io import BUFF_SIZE
|
||||||
|
from pywb.utils.memento import MementoUtils
|
||||||
|
|
||||||
from warcio.timeutils import http_date_to_timestamp
|
from warcio.timeutils import http_date_to_timestamp
|
||||||
from warcio.bufferedreaders import BufferedReader
|
from warcio.bufferedreaders import BufferedReader
|
||||||
@ -22,9 +24,6 @@ from warcio.recordloader import ArcWarcRecordLoader
|
|||||||
from pywb.warcserver.index.cdxobject import CDXObject
|
from pywb.warcserver.index.cdxobject import CDXObject
|
||||||
from pywb.apps.wbrequestresponse import WbResponse
|
from pywb.apps.wbrequestresponse import WbResponse
|
||||||
|
|
||||||
from pywb.warcserver.utils import BUFF_SIZE
|
|
||||||
from pywb.warcserver.utils import MementoUtils
|
|
||||||
|
|
||||||
from pywb.rewrite.rewriteinputreq import RewriteInputRequest
|
from pywb.rewrite.rewriteinputreq import RewriteInputRequest
|
||||||
from pywb.rewrite.templateview import JinjaEnv, HeadInsertView, TopFrameView, BaseInsertView
|
from pywb.rewrite.templateview import JinjaEnv, HeadInsertView, TopFrameView, BaseInsertView
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from pywb.utils.canonicalize import canonicalize
|
from pywb.utils.canonicalize import canonicalize
|
||||||
|
|
||||||
from pywb.warcserver.inputrequest import PostQueryExtractor
|
from pywb.warcserver.inputrequest import PostQueryExtractor
|
||||||
from pywb.warcserver.utils import BUFF_SIZE
|
from pywb.utils.io import BUFF_SIZE
|
||||||
|
|
||||||
from warcio.timeutils import iso_date_to_timestamp
|
from warcio.timeutils import iso_date_to_timestamp
|
||||||
from warcio.archiveiterator import ArchiveIterator
|
from warcio.archiveiterator import ArchiveIterator
|
||||||
|
@ -10,7 +10,7 @@ import portalocker
|
|||||||
from warcio.timeutils import timestamp20_now
|
from warcio.timeutils import timestamp20_now
|
||||||
from warcio.warcwriter import BaseWARCWriter
|
from warcio.warcwriter import BaseWARCWriter
|
||||||
|
|
||||||
from pywb.warcserver.utils import res_template
|
from pywb.utils.format import res_template
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from pywb.warcserver.utils import StreamIter, BUFF_SIZE
|
from pywb.utils.io import StreamIter, BUFF_SIZE
|
||||||
from pywb.warcserver.utils import ParamFormatter, res_template
|
from pywb.utils.format import ParamFormatter, res_template
|
||||||
from pywb.warcserver.inputrequest import DirectWSGIInputRequest
|
from pywb.warcserver.inputrequest import DirectWSGIInputRequest
|
||||||
|
|
||||||
from warcio.recordloader import ArcWarcRecordLoader
|
from warcio.recordloader import ArcWarcRecordLoader
|
||||||
|
@ -4,12 +4,13 @@ from io import BytesIO
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
from pywb.utils.canonicalize import calc_search_range
|
from pywb.utils.canonicalize import calc_search_range
|
||||||
|
from pywb.utils.format import res_template
|
||||||
|
|
||||||
from pywb.indexer.cdxindexer import write_cdx_index
|
from pywb.indexer.cdxindexer import write_cdx_index
|
||||||
|
|
||||||
from pywb.warcserver.index.cdxobject import CDXObject
|
from pywb.warcserver.index.cdxobject import CDXObject
|
||||||
from pywb.warcserver.index.indexsource import RedisIndexSource
|
from pywb.warcserver.index.indexsource import RedisIndexSource
|
||||||
from pywb.warcserver.index.aggregator import SimpleAggregator
|
from pywb.warcserver.index.aggregator import SimpleAggregator
|
||||||
from pywb.warcserver.utils import res_template
|
|
||||||
|
|
||||||
from pywb.recorder.filters import WriteRevisitDupePolicy
|
from pywb.recorder.filters import WriteRevisitDupePolicy
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@ from pywb.recorder.multifilewarcwriter import PerRecordWARCWriter, MultiFileWARC
|
|||||||
from pywb.recorder.filters import ExcludeSpecificHeaders, ExcludeHttpOnlyCookieHeaders
|
from pywb.recorder.filters import ExcludeSpecificHeaders, ExcludeHttpOnlyCookieHeaders
|
||||||
from pywb.recorder.filters import SkipDupePolicy, WriteDupePolicy, WriteRevisitDupePolicy
|
from pywb.recorder.filters import SkipDupePolicy, WriteDupePolicy, WriteRevisitDupePolicy
|
||||||
|
|
||||||
from pywb.warcserver.utils import MementoUtils
|
from pywb.utils.memento import MementoUtils
|
||||||
|
|
||||||
from pywb.warcserver.index.cdxobject import CDXObject
|
from pywb.warcserver.index.cdxobject import CDXObject
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ import re
|
|||||||
import webencodings
|
import webencodings
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
from pywb.warcserver.utils import StreamIter, BUFF_SIZE
|
from pywb.utils.io import StreamIter, BUFF_SIZE
|
||||||
|
|
||||||
from pywb.utils.loaders import load_yaml_config
|
from pywb.utils.loaders import load_yaml_config
|
||||||
|
|
||||||
|
@ -245,7 +245,7 @@ Exception: ('Invalid WbUrl: ', '')
|
|||||||
from pywb.rewrite.wburl import WbUrl
|
from pywb.rewrite.wburl import WbUrl
|
||||||
from six.moves.urllib.parse import quote_plus, unquote_plus
|
from six.moves.urllib.parse import quote_plus, unquote_plus
|
||||||
|
|
||||||
from pywb.utils.loaders import to_native_str
|
from warcio.utils import to_native_str
|
||||||
|
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
|
||||||
|
@ -44,7 +44,7 @@ import six
|
|||||||
from six.moves.urllib.parse import urlsplit, urlunsplit
|
from six.moves.urllib.parse import urlsplit, urlunsplit
|
||||||
from six.moves.urllib.parse import quote_plus, quote, unquote_plus
|
from six.moves.urllib.parse import quote_plus, quote, unquote_plus
|
||||||
|
|
||||||
from pywb.utils.loaders import to_native_str
|
from warcio.utils import to_native_str
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
@ -6,6 +6,7 @@ local and remote access
|
|||||||
import os
|
import os
|
||||||
import hmac
|
import hmac
|
||||||
import requests
|
import requests
|
||||||
|
import yaml
|
||||||
|
|
||||||
import six
|
import six
|
||||||
from six.moves.urllib.request import pathname2url, url2pathname
|
from six.moves.urllib.request import pathname2url, url2pathname
|
||||||
@ -18,7 +19,6 @@ import cgi
|
|||||||
|
|
||||||
from io import open, BytesIO
|
from io import open, BytesIO
|
||||||
from warcio.limitreader import LimitReader
|
from warcio.limitreader import LimitReader
|
||||||
from warcio.utils import to_native_str
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from boto import connect_s3
|
from boto import connect_s3
|
||||||
@ -46,9 +46,8 @@ def load(filename):
|
|||||||
return BlockLoader().load(filename)
|
return BlockLoader().load(filename)
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
# =============================================================================
|
||||||
def load_yaml_config(config_file):
|
def load_yaml_config(config_file):
|
||||||
import yaml
|
|
||||||
config = None
|
config = None
|
||||||
configdata = None
|
configdata = None
|
||||||
try:
|
try:
|
||||||
@ -61,6 +60,29 @@ def load_yaml_config(config_file):
|
|||||||
return config
|
return config
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
def load_overlay_config(main_env_var, main_default_file='',
|
||||||
|
overlay_env_var='', overlay_file=''):
|
||||||
|
|
||||||
|
configfile = os.environ.get(main_env_var, main_default_file)
|
||||||
|
config = None
|
||||||
|
|
||||||
|
if configfile:
|
||||||
|
configfile = os.path.expandvars(configfile)
|
||||||
|
|
||||||
|
config = load_yaml_config(configfile)
|
||||||
|
|
||||||
|
config = config or {}
|
||||||
|
|
||||||
|
overlay_configfile = os.environ.get(overlay_env_var, overlay_file)
|
||||||
|
|
||||||
|
if overlay_configfile:
|
||||||
|
overlay_configfile = os.path.expandvars(overlay_configfile)
|
||||||
|
config.update(load_yaml_config(overlay_configfile))
|
||||||
|
|
||||||
|
return config
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
def extract_client_cookie(env, cookie_name):
|
def extract_client_cookie(env, cookie_name):
|
||||||
cookie_header = env.get('HTTP_COOKIE')
|
cookie_header = env.get('HTTP_COOKIE')
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
from pywb.utils.wbexception import BadRequestException, WbException
|
from pywb.utils.wbexception import BadRequestException, WbException
|
||||||
from pywb.utils.wbexception import NotFoundException
|
from pywb.utils.wbexception import NotFoundException
|
||||||
|
from pywb.utils.memento import MementoUtils
|
||||||
|
|
||||||
from warcio.recordloader import ArchiveLoadFailed
|
from warcio.recordloader import ArchiveLoadFailed
|
||||||
|
|
||||||
from pywb.warcserver.index.fuzzymatcher import FuzzyMatcher
|
from pywb.warcserver.index.fuzzymatcher import FuzzyMatcher
|
||||||
from pywb.warcserver.resource.responseloader import WARCPathLoader, LiveWebLoader, VideoLoader
|
from pywb.warcserver.resource.responseloader import WARCPathLoader, LiveWebLoader, VideoLoader
|
||||||
from pywb.warcserver.utils import MementoUtils
|
|
||||||
|
|
||||||
import six
|
import six
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@ from collections import deque
|
|||||||
from itertools import chain
|
from itertools import chain
|
||||||
|
|
||||||
from pywb.utils.wbexception import NotFoundException, WbException
|
from pywb.utils.wbexception import NotFoundException, WbException
|
||||||
from pywb.warcserver.utils import ParamFormatter, res_template
|
from pywb.utils.format import ParamFormatter, res_template
|
||||||
|
|
||||||
from pywb.warcserver.index.indexsource import FileIndexSource, RedisIndexSource
|
from pywb.warcserver.index.indexsource import FileIndexSource, RedisIndexSource
|
||||||
from pywb.warcserver.index.cdxops import process_cdx
|
from pywb.warcserver.index.cdxops import process_cdx
|
||||||
|
@ -10,7 +10,7 @@ from six.moves.urllib.parse import urlencode, quote
|
|||||||
from six.moves.urllib.parse import parse_qs
|
from six.moves.urllib.parse import parse_qs
|
||||||
|
|
||||||
from pywb.utils.wbexception import WbException
|
from pywb.utils.wbexception import WbException
|
||||||
from pywb.utils.loaders import to_native_str
|
from warcio.utils import to_native_str
|
||||||
|
|
||||||
from json import loads as json_decode
|
from json import loads as json_decode
|
||||||
from json import dumps as json_encode
|
from json import dumps as json_encode
|
||||||
|
@ -7,8 +7,8 @@ from warcio.timeutils import timestamp_now, pad_timestamp, PAD_14_DOWN
|
|||||||
|
|
||||||
from pywb.warcserver.index.cdxobject import CDXObject
|
from pywb.warcserver.index.cdxobject import CDXObject
|
||||||
|
|
||||||
from pywb.warcserver.utils import ParamFormatter, res_template
|
from pywb.utils.format import ParamFormatter, res_template
|
||||||
from pywb.warcserver.utils import MementoUtils
|
from pywb.utils.memento import MementoUtils
|
||||||
|
|
||||||
import redis
|
import redis
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ from warcio.bufferedreaders import DecompressingBufferedReader
|
|||||||
from warcio.recordloader import ArcWarcRecordLoader
|
from warcio.recordloader import ArcWarcRecordLoader
|
||||||
|
|
||||||
from pywb.utils.loaders import BlockLoader
|
from pywb.utils.loaders import BlockLoader
|
||||||
from pywb.warcserver.utils import BUFF_SIZE
|
from pywb.utils.io import BUFF_SIZE
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import redis
|
import redis
|
||||||
|
|
||||||
|
from warcio.utils import to_native_str
|
||||||
from pywb.utils.binsearch import iter_exact
|
from pywb.utils.binsearch import iter_exact
|
||||||
from pywb.utils.loaders import to_native_str
|
|
||||||
|
|
||||||
from pywb.warcserver.index.indexsource import RedisIndexSource
|
from pywb.warcserver.index.indexsource import RedisIndexSource
|
||||||
|
|
||||||
|
@ -7,8 +7,9 @@ from warcio.statusandheaders import StatusAndHeaders, StatusAndHeadersParser
|
|||||||
|
|
||||||
from pywb.utils.wbexception import LiveResourceException, WbException
|
from pywb.utils.wbexception import LiveResourceException, WbException
|
||||||
|
|
||||||
from pywb.warcserver.utils import MementoUtils, StreamIter, compress_gzip_iter
|
from pywb.utils.memento import MementoUtils
|
||||||
from pywb.warcserver.utils import ParamFormatter
|
from pywb.utils.io import StreamIter, compress_gzip_iter
|
||||||
|
from pywb.utils.format import ParamFormatter
|
||||||
|
|
||||||
from pywb.warcserver.resource.resolvingloader import ResolvingLoader
|
from pywb.warcserver.resource.resolvingloader import ResolvingLoader
|
||||||
from pywb.warcserver.resource.pathresolvers import DefaultResolverMixin
|
from pywb.warcserver.resource.pathresolvers import DefaultResolverMixin
|
||||||
|
@ -23,7 +23,7 @@ from pywb.warcserver.index.aggregator import GeventTimeoutAggregator, SimpleAggr
|
|||||||
from pywb.warcserver.index.aggregator import DirectoryIndexSource
|
from pywb.warcserver.index.aggregator import DirectoryIndexSource
|
||||||
|
|
||||||
from pywb.warcserver.basewarcserver import BaseWarcServer
|
from pywb.warcserver.basewarcserver import BaseWarcServer
|
||||||
from pywb.warcserver.utils import MementoUtils
|
from pywb.utils.memento import MementoUtils
|
||||||
|
|
||||||
|
|
||||||
sources = {
|
sources = {
|
||||||
|
@ -5,7 +5,7 @@ from pywb.utils.wbexception import NotFoundException
|
|||||||
from pywb.warcserver.index.cdxobject import CDXObject
|
from pywb.warcserver.index.cdxobject import CDXObject
|
||||||
from pywb.warcserver.index.indexsource import BaseIndexSource, RemoteIndexSource
|
from pywb.warcserver.index.indexsource import BaseIndexSource, RemoteIndexSource
|
||||||
from pywb.warcserver.resource.responseloader import LiveWebLoader
|
from pywb.warcserver.resource.responseloader import LiveWebLoader
|
||||||
from pywb.warcserver.utils import ParamFormatter, res_template
|
from pywb.utils.format import ParamFormatter, res_template
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
#=============================================================================
|
||||||
|
@ -1,250 +0,0 @@
|
|||||||
import re
|
|
||||||
import six
|
|
||||||
import string
|
|
||||||
import yaml
|
|
||||||
import os
|
|
||||||
import zlib
|
|
||||||
|
|
||||||
from contextlib import closing
|
|
||||||
|
|
||||||
from warcio.timeutils import timestamp_to_http_date
|
|
||||||
from warcio.utils import BUFF_SIZE
|
|
||||||
|
|
||||||
from pywb.utils.wbexception import BadRequestException
|
|
||||||
from pywb.utils.loaders import load_yaml_config
|
|
||||||
|
|
||||||
from six.moves.urllib.parse import quote
|
|
||||||
from tempfile import SpooledTemporaryFile
|
|
||||||
|
|
||||||
|
|
||||||
LINK_SPLIT = re.compile(',\s*(?=[<])')
|
|
||||||
LINK_SEG_SPLIT = re.compile(';\s*')
|
|
||||||
LINK_URL = re.compile('<(.*)>')
|
|
||||||
LINK_PROP = re.compile('([\w]+)="([^"]+)')
|
|
||||||
|
|
||||||
#=============================================================================
|
|
||||||
class MementoException(BadRequestException):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
|
||||||
class MementoUtils(object):
|
|
||||||
@staticmethod
|
|
||||||
def parse_links(link_header, def_name='timemap'):
|
|
||||||
links = LINK_SPLIT.split(link_header)
|
|
||||||
results = {}
|
|
||||||
mementos = []
|
|
||||||
|
|
||||||
for link in links:
|
|
||||||
props = LINK_SEG_SPLIT.split(link)
|
|
||||||
m = LINK_URL.match(props[0])
|
|
||||||
if not m:
|
|
||||||
raise MementoException('Invalid Link Url: ' + props[0])
|
|
||||||
|
|
||||||
result = dict(url=m.group(1))
|
|
||||||
key = ''
|
|
||||||
is_mem = False
|
|
||||||
|
|
||||||
for prop in props[1:]:
|
|
||||||
m = LINK_PROP.match(prop)
|
|
||||||
if not m:
|
|
||||||
raise MementoException('Invalid prop ' + prop)
|
|
||||||
|
|
||||||
name = m.group(1)
|
|
||||||
value = m.group(2)
|
|
||||||
|
|
||||||
if name == 'rel':
|
|
||||||
if 'memento' in value:
|
|
||||||
is_mem = True
|
|
||||||
result[name] = value
|
|
||||||
elif value == 'self':
|
|
||||||
key = def_name
|
|
||||||
else:
|
|
||||||
key = value
|
|
||||||
else:
|
|
||||||
result[name] = value
|
|
||||||
|
|
||||||
if key:
|
|
||||||
results[key] = result
|
|
||||||
elif is_mem:
|
|
||||||
mementos.append(result)
|
|
||||||
|
|
||||||
results['mementos'] = mementos
|
|
||||||
return results
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def make_timemap_memento_link(cdx, datetime=None, rel='memento', end=',\n'):
|
|
||||||
url = cdx.get('load_url')
|
|
||||||
if not url:
|
|
||||||
url = 'file://{0}:{1}:{2}'.format(cdx.get('filename'), cdx.get('offset'), cdx.get('length'))
|
|
||||||
|
|
||||||
memento = '<{0}>; rel="{1}"; datetime="{2}"; src="{3}"' + end
|
|
||||||
|
|
||||||
if not datetime:
|
|
||||||
datetime = timestamp_to_http_date(cdx['timestamp'])
|
|
||||||
|
|
||||||
return memento.format(url, rel, datetime, cdx.get('source', ''))
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def make_timemap(cdx_iter):
|
|
||||||
# get first memento as it'll be used for 'from' field
|
|
||||||
try:
|
|
||||||
first_cdx = six.next(cdx_iter)
|
|
||||||
from_date = timestamp_to_http_date(first_cdx['timestamp'])
|
|
||||||
except StopIteration:
|
|
||||||
first_cdx = None
|
|
||||||
return
|
|
||||||
|
|
||||||
# first memento link
|
|
||||||
yield MementoUtils.make_timemap_memento_link(first_cdx, datetime=from_date)
|
|
||||||
|
|
||||||
prev_cdx = None
|
|
||||||
|
|
||||||
for cdx in cdx_iter:
|
|
||||||
if prev_cdx:
|
|
||||||
yield MementoUtils.make_timemap_memento_link(prev_cdx)
|
|
||||||
|
|
||||||
prev_cdx = cdx
|
|
||||||
|
|
||||||
# last memento link, if any
|
|
||||||
if prev_cdx:
|
|
||||||
yield MementoUtils.make_timemap_memento_link(prev_cdx, end='\n')
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def make_link(url, type):
|
|
||||||
return '<{0}>; rel="{1}"'.format(url, type)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def make_memento_link(url, type, dt):
|
|
||||||
return '<{0}>; rel="{1}"; datetime="{2}"'.format(url, type, dt)
|
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
|
||||||
class ParamFormatter(string.Formatter):
|
|
||||||
def __init__(self, params, name='', prefix='param.'):
|
|
||||||
self.params = params
|
|
||||||
self.prefix = prefix
|
|
||||||
self.name = name
|
|
||||||
|
|
||||||
def get_value(self, key, args, kwargs):
|
|
||||||
# First, try the named param 'param.{name}.{key}'
|
|
||||||
if self.name:
|
|
||||||
named_key = self.prefix + self.name + '.' + key
|
|
||||||
value = self.params.get(named_key)
|
|
||||||
if value is not None:
|
|
||||||
return value
|
|
||||||
|
|
||||||
# Then, try 'param.{key}'
|
|
||||||
named_key = self.prefix + key
|
|
||||||
value = self.params.get(named_key)
|
|
||||||
if value is not None:
|
|
||||||
return value
|
|
||||||
|
|
||||||
# try in extra params as just {key}
|
|
||||||
value = kwargs.get(key)
|
|
||||||
if value is not None:
|
|
||||||
return value
|
|
||||||
|
|
||||||
# try in params as just '{key}'
|
|
||||||
value = self.params.get(key, '')
|
|
||||||
return value
|
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
|
||||||
def res_template(template, params, **extra_params):
|
|
||||||
formatter = params.get('_formatter')
|
|
||||||
if not formatter:
|
|
||||||
formatter = ParamFormatter(params)
|
|
||||||
|
|
||||||
url = params.get('url', '')
|
|
||||||
qi = template.find('?')
|
|
||||||
if qi >= 0 and template.find('{url}') > qi:
|
|
||||||
url = quote(url)
|
|
||||||
|
|
||||||
res = formatter.format(template, url=url, **extra_params)
|
|
||||||
|
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
|
||||||
def StreamIter(stream, header1=None, header2=None, size=BUFF_SIZE):
|
|
||||||
with closing(stream):
|
|
||||||
if header1:
|
|
||||||
yield header1
|
|
||||||
|
|
||||||
if header2:
|
|
||||||
yield header2
|
|
||||||
|
|
||||||
while True:
|
|
||||||
buff = stream.read(size)
|
|
||||||
if not buff:
|
|
||||||
break
|
|
||||||
yield buff
|
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
|
||||||
def chunk_encode_iter(orig_iter):
|
|
||||||
for chunk in orig_iter:
|
|
||||||
if not len(chunk):
|
|
||||||
continue
|
|
||||||
chunk_len = b'%X\r\n' % len(chunk)
|
|
||||||
yield chunk_len
|
|
||||||
yield chunk
|
|
||||||
yield b'\r\n'
|
|
||||||
|
|
||||||
yield b'0\r\n\r\n'
|
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
|
||||||
def buffer_iter(status_headers, iterator, buff_size=BUFF_SIZE * 4):
|
|
||||||
out = SpooledTemporaryFile(buff_size)
|
|
||||||
size = 0
|
|
||||||
|
|
||||||
for buff in iterator:
|
|
||||||
size += len(buff)
|
|
||||||
out.write(buff)
|
|
||||||
|
|
||||||
content_length_str = str(size)
|
|
||||||
# remove existing content length
|
|
||||||
status_headers.replace_header('Content-Length',
|
|
||||||
content_length_str)
|
|
||||||
|
|
||||||
out.seek(0)
|
|
||||||
return StreamIter(out)
|
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
|
||||||
def compress_gzip_iter(orig_iter):
|
|
||||||
compressobj = zlib.compressobj(9, zlib.DEFLATED, zlib.MAX_WBITS + 16)
|
|
||||||
for chunk in orig_iter:
|
|
||||||
buff = compressobj.compress(chunk)
|
|
||||||
if len(buff) == 0:
|
|
||||||
continue
|
|
||||||
|
|
||||||
yield buff
|
|
||||||
|
|
||||||
yield compressobj.flush()
|
|
||||||
|
|
||||||
|
|
||||||
#=============================================================================
|
|
||||||
def load_config(main_env_var, main_default_file='',
|
|
||||||
overlay_env_var='', overlay_file=''):
|
|
||||||
|
|
||||||
configfile = os.environ.get(main_env_var, main_default_file)
|
|
||||||
config = None
|
|
||||||
|
|
||||||
if configfile:
|
|
||||||
configfile = os.path.expandvars(configfile)
|
|
||||||
|
|
||||||
config = load_yaml_config(configfile)
|
|
||||||
|
|
||||||
config = config or {}
|
|
||||||
|
|
||||||
overlay_configfile = os.environ.get(overlay_env_var, overlay_file)
|
|
||||||
|
|
||||||
if overlay_configfile:
|
|
||||||
overlay_configfile = os.path.expandvars(overlay_configfile)
|
|
||||||
config.update(load_yaml_config(overlay_configfile))
|
|
||||||
|
|
||||||
return config
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
|||||||
from pywb.utils.loaders import load_yaml_config
|
from pywb.utils.loaders import load_yaml_config, load_overlay_config
|
||||||
|
|
||||||
from pywb.warcserver.basewarcserver import BaseWarcServer
|
from pywb.warcserver.basewarcserver import BaseWarcServer
|
||||||
from pywb.warcserver.utils import load_config
|
|
||||||
|
|
||||||
from pywb.warcserver.index.aggregator import CacheDirectoryIndexSource, RedisMultiKeyIndexSource
|
from pywb.warcserver.index.aggregator import CacheDirectoryIndexSource, RedisMultiKeyIndexSource
|
||||||
from pywb.warcserver.index.aggregator import GeventTimeoutAggregator, SimpleAggregator
|
from pywb.warcserver.index.aggregator import GeventTimeoutAggregator, SimpleAggregator
|
||||||
@ -40,7 +39,7 @@ class WarcServer(BaseWarcServer):
|
|||||||
|
|
||||||
if config_file:
|
if config_file:
|
||||||
try:
|
try:
|
||||||
file_config = load_config('PYWB_CONFIG_FILE', config_file)
|
file_config = load_overlay_config('PYWB_CONFIG_FILE', config_file)
|
||||||
config.update(file_config)
|
config.update(file_config)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if not custom_config:
|
if not custom_config:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user