1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-14 15:53:28 +01:00
pywb/pywb/utils/loaders.py
Ilya Kreymer 5345459298 pywb 0.2!
move to distinct packages: pywb.utils, pywb.cdx, pywb.warc, pywb.util, pywb.rewrite!
each package will have its own README and tests
shared sample_data and install
2014-02-17 10:01:09 -08:00

153 lines
4.1 KiB
Python

"""
This module provides loaders for local file system and over http
local and remote access
"""
import os
import hmac
import urllib2
import time
#=================================================================
# load a reader from http
#=================================================================
class HttpLoader(object):
"""
Load a file-like reader over http using range requests
and an optional cookie created via a cookie_maker
"""
def __init__(self, cookie_maker=None):
self.cookie_maker = cookie_maker
def load(self, url, offset, length):
if length > 0:
range_header = 'bytes={0}-{1}'.format(offset, offset + length - 1)
else:
range_header = 'bytes={0}-'.format(offset)
headers = {}
headers['Range'] = range_header
if self.cookie_maker:
headers['Cookie'] = self.cookie_maker.make()
request = urllib2.Request(url, headers=headers)
return urllib2.urlopen(request)
#=================================================================
# Signed Cookie-Maker
#=================================================================
class HMACCookieMaker(object):
"""
Utility class to produce signed HMAC digest cookies
to be used with each http request
"""
def __init__(self, key, name, duration=10):
self.key = key
self.name = name
# duration in seconds
self.duration = duration
def make(self, extra_id=''):
expire = str(long(time.time() + self.duration))
if extra_id:
msg = extra_id + '-' + expire
else:
msg = expire
hmacdigest = hmac.new(self.key, msg)
hexdigest = hmacdigest.hexdigest()
if extra_id:
cookie = '{0}-{1}={2}-{3}'.format(self.name, extra_id,
expire, hexdigest)
else:
cookie = '{0}={1}-{2}'.format(self.name, expire, hexdigest)
return cookie
#=================================================================
# load a reader from local filesystem
#=================================================================
class FileLoader(object):
"""
Load a file-like reader from the local file system
"""
def load(self, url, offset, length):
if url.startswith('file://'):
url = url[len('file://'):]
afile = open(url, 'rb')
afile.seek(offset)
if length > 0:
return LimitReader(afile, length)
#=================================================================
# Limit Reader
#=================================================================
class LimitReader(object):
"""
A reader which will not read more than specified limit
"""
def __init__(self, stream, limit):
self.stream = stream
self.limit = limit
if not self.limit:
self.limit = 1
def read(self, length=None):
length = min(length, self.limit) if length else self.limit
buff = self.stream.read(length)
self.limit -= len(buff)
return buff
def readline(self, length=None):
length = min(length, self.limit) if length else self.limit
buff = self.stream.readline(length)
self.limit -= len(buff)
return buff
def close(self):
self.stream.close()
#=================================================================
# Local text file with known size -- used for binsearch
#=================================================================
class SeekableTextFileReader(object):
"""
A very simple file-like object wrapper that knows it's total size,
via getsize()
Supports seek() operation.
Assumed to be a text file. Used for binsearch.
"""
def __init__(self, filename):
self.fh = open(filename, 'rb')
self.filename = filename
self.size = os.path.getsize(filename)
def getsize(self):
return self.size
def read(self):
return self.fh.read()
def readline(self):
return self.fh.readline()
def seek(self, offset):
return self.fh.seek(offset)
def close(self):
return self.fh.close()