From a84ec2abc79142b909eca5f573698d0c9a679b86 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sat, 28 Dec 2013 17:39:43 -0800 Subject: [PATCH] first iteration of archival mode working w/ banner insertion!! --- pywb/archiveloader.py | 64 ++++++++--- pywb/indexreader.py | 3 + pywb/regexmatch.py | 8 +- pywb/replay.py | 228 +++++++++++++++++++++++++++++++++----- pywb/run.sh | 8 -- pywb/utils.py | 10 ++ pywb/wbapp.py | 16 ++- pywb/wbexceptions.py | 11 ++ pywb/wbhtml.py | 45 ++++++-- pywb/wbrequestresponse.py | 5 +- pywb/wburlrewriter.py | 13 ++- run.sh | 11 ++ static/wb.css | 13 +++ static/wb.js | 42 +++++++ 14 files changed, 410 insertions(+), 67 deletions(-) delete mode 100755 pywb/run.sh create mode 100755 run.sh create mode 100644 static/wb.css create mode 100644 static/wb.js diff --git a/pywb/archiveloader.py b/pywb/archiveloader.py index 9b1fe739..51ae4498 100644 --- a/pywb/archiveloader.py +++ b/pywb/archiveloader.py @@ -15,8 +15,8 @@ class HttpStreamLoader: self.hmacDuration = hmacDuration def load(self, url, offset, length): - if length: - rangeHeader = 'bytes={0}-{1}'.format(offset, int(offset) + int(length) - 1) + if length > 0: + rangeHeader = 'bytes={0}-{1}'.format(offset, offset + length - 1) else: rangeHeader = 'bytes={0}-'.format(offset) @@ -31,7 +31,20 @@ class HttpStreamLoader: #================================================================= -WBArchiveRecord = collections.namedtuple('WBArchiveRecord', 'parsed, stream, statusline, httpHeaders') +# Untested, but for completeness +class FileStreamLoader: + def load(self, url, offset, length): + if url.startswith('file://'): + url = url[len('file://'):] + + afile = open(url, 'rb') + afile.seek(offset) + return afile + + + +#================================================================= +WBArchiveRecord = collections.namedtuple('WBArchiveRecord', 'type, record, stream, statusline, httpHeaders') #================================================================= class ArchiveLoader: @@ -46,14 +59,17 @@ class ArchiveLoader: '.arc': (hanzo.warctools.ArcRecord, 'arc', False), } - HTTP_STATUS_REGEX = re.compile('^HTTP/[\d.]+ ((\d+).*)$') + HTTP_STATUS_REGEX = re.compile('^HTTP/[\d.]+ (\d+.*)$') @staticmethod def createDefaultLoaders(): http = HttpStreamLoader() + file = FileStreamLoader() return { 'http': http, 'https': http, + 'file': file, + '': file } @@ -86,10 +102,15 @@ class ArchiveLoader: else: decomp = None + try: + length = int(length) + except: + length = -1 - raw = loader.load(url, offset, length) - reader = LineReader(raw, self.chunkSize, decomp) + raw = loader.load(url, long(offset), length) + + reader = LineReader(raw, length, self.chunkSize, decomp) parser = loaderCls.make_parser() @@ -104,27 +125,33 @@ class ArchiveLoader: if aFormat == 'arc': - recType = 'arc-response' + recType = 'response' empty = (utils.get_header(parsed.headers, 'length') == 0) else: recType = utils.get_header(parsed.headers, 'WARC-Type') empty = (utils.get_header(parsed.headers, 'Content-Length') == '0') - parsed.recType = recType - parsed.aFormat = aFormat - + # special case: empty w/arc record (hopefully a revisit) if empty: - return WBArchiveRecord(parsed, reader, '400', []) + statusline = '204 No Content' + headers = [] + # special case: warc records that are not expected to have http headers + # attempt to add 200 status and content-type elif recType == 'metadata' or recType == 'resource': + statusline = '200 OK' headers = [('Content-Type', utils.get_header(parsed.headers, 'Content-Type'))] - return WBArchiveRecord(parsed, reader, '200 OK', headers) + # special case: http 0.9 response, no status or headers + elif recType == 'response' and (';version=0.9' in utils.get_header(parsed.headers, 'Content-Type')): + statusline = '200 OK' + headers = [] + # response record: parse HTTP status and headers! else: (statusline, headers) = self.parseHttpHeaders(reader) - return WBArchiveRecord(parsed, reader, statusline, headers) + return WBArchiveRecord((aFormat, recType), parsed, reader, statusline, headers) def parseHttpHeaders(self, stream): @@ -153,20 +180,23 @@ class ArchiveLoader: #================================================================= class LineReader: - def __init__(self, stream, chunkSize = 1024, decomp = None): + def __init__(self, stream, maxLen = 0, chunkSize = 1024, decomp = None): self.stream = stream self.chunkSize = chunkSize self.decomp = decomp self.buff = None - self.numread = 0 + self.numRead = 0 + self.maxLen = maxLen def _fillbuff(self, chunkSize = None): if not chunkSize: chunkSize = self.chunkSize if not self.buff or self.buff.pos >= self.buff.len: - data = self.stream.read(chunkSize) - self.numread += len(data) + toRead = min(self.maxLen - self.numRead, self.chunkSize) if (self.maxLen > 0) else self.chunkSize + data = self.stream.read(toRead) + self.numRead += len(data) + if self.decomp: data = self.decomp.decompress(data) diff --git a/pywb/indexreader.py b/pywb/indexreader.py index 8cc80395..a757788a 100644 --- a/pywb/indexreader.py +++ b/pywb/indexreader.py @@ -80,6 +80,9 @@ class RemoteCDXServer: ArchivalUrl.REPLAY: {'sort': 'closest', 'filter': '!statuscode:(500|502|504)', 'limit': replayClosest, 'closest': wburl.timestamp, 'resolveRevisits': True}, + # BUG: resolveRevisits currently doesn't work for this type of query + # This is not an issue in archival mode, as there is a redirect to the actual timestamp query + # but may be an issue in proxy mode ArchivalUrl.LATEST_REPLAY: {'sort': 'reverse', 'filter': 'statuscode:[23]..', 'limit': '1', 'resolveRevisits': True} diff --git a/pywb/regexmatch.py b/pywb/regexmatch.py index 9f3d4242..c2b61bbc 100644 --- a/pywb/regexmatch.py +++ b/pywb/regexmatch.py @@ -91,8 +91,8 @@ class JSRewriter(RegexRewriter): """ - def __init__(self, httpPrefix, extra = []): - rules = self._createRules(httpPrefix) + def __init__(self, rewriter, extra = []): + rules = self._createRules(rewriter.getAbsUrl()) rules.extend(extra) RegexRewriter.__init__(self, rules) @@ -167,12 +167,10 @@ class CSSRewriter(RegexRewriter): if __name__ == "__main__": import doctest - rwPrefix = '/web/20131010im_/' - arcrw = ArchivalUrlRewriter('/20131010im_/http://example.com/', '/web/') def test_js(string, extra = []): - return JSRewriter(rwPrefix, extra).replaceAll(string) + return JSRewriter(arcrw, extra).replaceAll(string) def test_css(string): return CSSRewriter(arcrw).replaceAll(string) diff --git a/pywb/replay.py b/pywb/replay.py index 1c62a5d6..a992a5e4 100644 --- a/pywb/replay.py +++ b/pywb/replay.py @@ -1,8 +1,32 @@ +import StringIO + import indexreader from wbrequestresponse import WbResponse +from wbarchivalurl import ArchivalUrl import utils +from wburlrewriter import ArchivalUrlRewriter -class ReplayHandler: +import wbhtml +import regexmatch +import wbexceptions + +#================================================================= +class FullHandler: + def __init__(self, query, replay): + self.query = query + self.replay = replay + + def __call__(self, wbrequest, _): + query_response = self.query(wbrequest, None) + + if (wbrequest.wb_url.type == ArchivalUrl.QUERY) or (wbrequest.wb_url.type == ArchivalUrl.URL_QUERY): + return query_response + + return self.replay(wbrequest, query_response) + + +#================================================================= +class ReplayHandler(object): def __init__(self, resolvers, archiveloader): self.resolvers = resolvers self.archiveloader = archiveloader @@ -11,38 +35,45 @@ class ReplayHandler: cdxlist = query_response.body last_e = None first = True + for cdx in cdxlist: try: cdx = indexreader.CDXCaptureResult(cdx) - # First time through, check if do redirect before warc load - if first and (cdx['timestamp'] != wbrequest.wb_url.timestamp): - return WbResponse.better_timestamp_response(wbrequest, cdx['timestamp']) + # ability to intercept and redirect + if first: + self._checkRedir(wbrequest, cdx) + first = False response = self.doReplay(cdx, wbrequest) if response: - # if a fallback, redirect to exact timestamp! - if not first and (cdx['timestamp'] != wbrequest.wb_url.timestamp): - response.close() - return WbResponse.better_timestamp_response(wbrequest, cdx['timestamp']) - + response.cdx = cdx return response - first = False + #except wbexceptions.InternalRedirect as ir: + # raise ir - except Exception, e: + except wbexceptions.CaptureException as ce: import traceback traceback.print_exc() - last_e = e + last_e = ce pass if last_e: raise last_e + else: + raise wbexceptions.ArchiveLoadFailed() + + def _checkRedir(self, wbrequest, cdx): + return None def _load(self, cdx, revisit = False): - prefix = '' if not revisit else 'orig.' - return self.archiveloader.load(self.resolveFull(cdx[prefix + 'filename']), cdx[prefix + 'offset'], cdx[prefix + 'length']) + if revisit: + return self.archiveloader.load(self.resolveFull(cdx['orig.filename']), cdx['orig.offset'], cdx['orig.length']) + else: + return self.archiveloader.load(self.resolveFull(cdx['filename']), cdx['offset'], cdx['length']) + def doReplay(self, cdx, wbrequest): hasCurr = (cdx['filename'] != '-') @@ -75,19 +106,8 @@ class ReplayHandler: else: raise wbexceptions.CaptureException('Invalid CDX' + cdx) - # Check for self redirect - if headersRecord.statusline.startswith('3'): - if self.isSelfRedirect(wbrequest, headersRecord): - raise wbexception.CaptureException('Self Redirect: ' + cdx) - return WbResponse.stream_response(headersRecord.statusline, headersRecord.httpHeaders, payloadRecord.stream) - def isSelfRedirect(self, wbrequest, record): - requestUrl = wbrequest.wb_url.url.lower() - locationUrl = utils.get_header(record.httpHeaders, 'Location').lower() - return requestUrl == locationUrl - #ArchivalUrlRewriter.stripProtocol(requestUrl) == ArchivalUrlRewriter.stripProtocol(locationUrl) - def resolveFull(self, filename): # Attempt to resolve cdx file to full path @@ -100,6 +120,164 @@ class ReplayHandler: raise exceptions.UnresolvedArchiveFileException('Archive File Not Found: ' + cdx.filename) +#================================================================= +class RewritingReplayHandler(ReplayHandler): + + + REWRITE_TYPES = { + 'html': ('text/html', 'application/xhtml'), + 'css': ('text/css'), + 'js': ('text/javascript', 'application/javascript', 'application/x-javascript'), + 'xml': ('/xml', '+xml', '.xml', '.rss'), + } + + + PROXY_HEADERS = ('content-type', 'content-disposition') + + URL_REWRITE_HEADERS = ('location', 'content-location', 'content-base') + + ENCODING_HEADERS = ('content-encoding', 'transfer-encoding') + + + def __init__(self, resolvers, archiveloader, headerPrefix = 'X-Archive-Orig-', headInsert = None): + ReplayHandler.__init__(self, resolvers, archiveloader) + self.headerPrefix = headerPrefix + self.headInsert = headInsert + + + def _canonContentType(self, contentType): + for type, mimelist in self.REWRITE_TYPES.iteritems(): + for mime in mimelist: + if mime in contentType: + return type + + return None + + + def __call__(self, wbrequest, query_response): + urlrewriter = ArchivalUrlRewriter(wbrequest.wb_url, wbrequest.wb_prefix) + wbrequest.urlrewriter = urlrewriter + + response = ReplayHandler.__call__(self, wbrequest, query_response) + + if response and response.cdx: + self._checkRedir(wbrequest, response.cdx) + + # Transparent! + if wbrequest.wb_url.mod == 'id_': + return response + + contentType = utils.get_header(response.headersList, 'Content-Type') + + canonType = self._canonContentType(contentType) + + (newHeaders, remHeaders) = self._rewriteHeaders(response.headersList, (canonType is not None)) + + # binary type, just send through + if canonType is None: + response.headersList = newHeaders + return response + + # Handle text rewriting + # TODO: better way to pass this + stream = response._stream + + # special case -- need to ungzip the body + if (utils.contains_header(remHeaders, ('Content-Encoding', 'gzip'))): + stream = archiveloader.LineStream(stream, decomp = zlib.decompressobj(16 + zlib.MAX_WBITS)) + + return self._rewriteContent(canonType, urlrewriter, stream, newHeaders, response) + + # TODO: first non-streaming attempt, probably want to stream + def _rewriteContent(self, canonType, urlrewriter, stream, newHeaders, origResponse): + if canonType == 'html': + out = StringIO.StringIO() + htmlrewriter = wbhtml.WBHtml(urlrewriter, out, self.headInsert) + + try: + buff = stream.read() + while buff: + htmlrewriter.feed(buff) + buff = stream.read() + + htmlrewriter.close() + + #except Exception as e: + # print e + + finally: + value = [out.getvalue()] + out.close() + + else: + if canonType == 'css': + rewriter = regexmatch.CSSRewriter(urlrewriter) + elif canonType == 'js': + rewriter = regexmatch.JSRewriter(urlrewriter) + + def gen(): + try: + buff = stream.read() + while buff: + yield rewriter.replaceAll(buff) + buff = stream.read() + + finally: + stream.close() + + value = gen() + + return WbResponse(status = origResponse.status, headersList = newHeaders, value = value) + + + + def _rewriteHeaders(self, headers, stripEncoding = False): + newHeaders = [] + removedHeaders = [] + + for (name, value) in headers: + lowername = name.lower() + if lowername in self.PROXY_HEADERS: + newHeaders.append((name, value)) + elif lowername in self.URL_REWRITE_HEADERS: + newHeaders.append((name, urlrewriter.rewrite(value))) + elif lowername in self.ENCODING_HEADERS: + if stripEncoding: + removedHeaders.append((name, value)) + else: + newHeaders.append((name, value)) + else: + newHeaders.append((self.headerPrefix + name, value)) + + return (newHeaders, removedHeaders) + + + def _checkRedir(self, wbrequest, cdx): + if cdx and (cdx['timestamp'] != wbrequest.wb_url.timestamp): + newUrl = wbrequest.urlrewriter.getTimestampUrl(cdx['timestamp'], cdx['original']) + raise wbexceptions.InternalRedirect(newUrl) + #return WbResponse.better_timestamp_response(wbrequest, cdx['timestamp']) + + return None + + + def doReplay(self, cdx, wbrequest): + wbresponse = ReplayHandler.doReplay(self, cdx, wbrequest) + + # Check for self redirect + if wbresponse.status.startswith('3'): + if self.isSelfRedirect(wbrequest, wbresponse.headersList): + raise wbexceptions.CaptureException('Self Redirect: ' + str(cdx)) + + return wbresponse + + def isSelfRedirect(self, wbrequest, httpHeaders): + requestUrl = wbrequest.wb_url.url.lower() + locationUrl = utils.get_header(httpHeaders, 'Location').lower() + #return requestUrl == locationUrl + return (ArchivalUrlRewriter.stripProtocol(requestUrl) == ArchivalUrlRewriter.stripProtocol(locationUrl)) + + #====================================== # PrefixResolver - convert cdx file entry to url with prefix if url contains specified string #====================================== diff --git a/pywb/run.sh b/pywb/run.sh deleted file mode 100755 index c8a1198d..00000000 --- a/pywb/run.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh - -app=$1 -if [ -z "$app" ]; then - app=wbapp.py -fi - -uwsgi --http :9090 --wsgi-file $app diff --git a/pywb/utils.py b/pywb/utils.py index 89dc7459..ba427e55 100644 --- a/pywb/utils.py +++ b/pywb/utils.py @@ -17,6 +17,16 @@ def get_header(headersList, name): if (value[0].lower() == nameLower): return value[1] + return None + +def contains_header(headersList, seekHeader): + header = get_header(headersList, seekHeader[0]) + if not header: + return False + + # see if found header matches value! + return (header == seekHeader[1]) + class HMACCookieMaker: def __init__(self, key, name): self.key = key diff --git a/pywb/wbapp.py b/pywb/wbapp.py index 755550de..5a70c42b 100644 --- a/pywb/wbapp.py +++ b/pywb/wbapp.py @@ -1,4 +1,5 @@ from query import QueryHandler +from replay import FullHandler import wbexceptions from wbrequestresponse import WbResponse @@ -19,7 +20,16 @@ class WBHandler: query = QueryHandler() import testwb -replay = testwb.createReplay() + +headInsert = """ + + + + + +""" + +replay = testwb.createReplay(headInsert) ## =========== parser = ArchivalRequestRouter( @@ -28,6 +38,7 @@ parser = ArchivalRequestRouter( 't1' : [WBHandler()], 't2' : [query], 't3' : [query, replay], + 'web': FullHandler(query, replay) }, hostpaths = ['http://localhost:9090/']) ## =========== @@ -42,6 +53,9 @@ def application(env, start_response): if not response: raise wbexceptions.NotFoundException(env['REQUEST_URI'] + ' was not found') + except wbexceptions.InternalRedirect as ir: + response = WbResponse(status = ir.status, headersList = ir.httpHeaders) + except Exception as e: last_exc = e import traceback diff --git a/pywb/wbexceptions.py b/pywb/wbexceptions.py index ac882b4f..036f309b 100644 --- a/pywb/wbexceptions.py +++ b/pywb/wbexceptions.py @@ -38,4 +38,15 @@ class InvalidArchiveRecordException(CaptureException): super(InvalidArchiveRecordException, self).__init__(msg) self.errList = errList +class ArchiveLoadFailed(CaptureException): + pass + +class InternalRedirect(Exception): + def __init__(self, location, status = '302 Internal Redirect'): + Exception.__init__(self, 'Redirecting -> ' + location) + self.status = status + self.httpHeaders = [('Location', location)] + + def status(_): + return self.status diff --git a/pywb/wbhtml.py b/pywb/wbhtml.py index 946715da..05e81e40 100644 --- a/pywb/wbhtml.py +++ b/pywb/wbhtml.py @@ -22,6 +22,9 @@ class WBHtml(HTMLParser): >>> parse('') + >>> parse('') + + >>> parse('') @@ -41,7 +44,18 @@ class WBHtml(HTMLParser): # Unterminated style tag auto-terminate >>> parse(' - """ + + # Head Insertion + >>> parse('Test', headInsert = '') + Test + + >>> parse('
SomeTest
', headInsert = '/* Insert */') + /* Insert */
SomeTest
+ + >>> parse('
SomeTest
', headInsert = '') +
SomeTest
+ + """ REWRITE_TAGS = { 'a': {'href': ''}, @@ -53,6 +67,7 @@ class WBHtml(HTMLParser): 'body': {'background': 'im_'}, 'del': {'cite': ''}, 'embed': {'src': 'oe_'}, + 'head': {'': ''}, # for head rewriting 'iframe': {'src': 'if_'}, 'img': {'src': 'im_'}, 'ins': {'cite': ''}, @@ -64,6 +79,7 @@ class WBHtml(HTMLParser): 'object': {'codebase': 'oe_', 'data': 'oe_'}, 'q': {'cite': ''}, + 'ref': {'href': 'oe_'}, 'script': {'src': 'js_'}, 'div': {'data-src' : '', 'data-uri' : ''}, @@ -73,17 +89,21 @@ class WBHtml(HTMLParser): STATE_TAGS = ['script', 'style'] + HEAD_TAGS = ['html', 'head', 'base', 'link', 'meta', 'title', 'style', 'script', 'object', 'bgsound'] - def __init__(self, rewriter, outstream = None): + + def __init__(self, rewriter, outstream = None, headInsert = None): HTMLParser.__init__(self) self.rewriter = rewriter self._wbParseContext = None self.out = outstream if outstream else sys.stdout - self.jsRewriter = JSRewriter(rewriter.getAbsUrl()) + self.jsRewriter = JSRewriter(rewriter) self.cssRewriter = CSSRewriter(rewriter) + self.headInsert = headInsert + def close(self): if (self._wbParseContext): @@ -137,6 +157,11 @@ class WBHtml(HTMLParser): elif (tag in WBHtml.STATE_TAGS) and (self._wbParseContext == None): self._wbParseContext = tag + # special case: head insertion, non-head tags + elif (self.headInsert and (self._wbParseContext == None) and (tag not in WBHtml.HEAD_TAGS)): + self.out.write(self.headInsert) + self.headInsert = None + # attr rewriting handler = WBHtml.REWRITE_TAGS.get(tag) if not handler: @@ -159,8 +184,9 @@ class WBHtml(HTMLParser): attrValue = self._rewriteCSS(attrValue) # special case: meta tag - elif (tag == 'meta') and (attrName == 'content') and self.hasAttr(tagAttrs, ('http-equiv', 'refresh')): - attrValue = self._rewriteMetaRefresh(attrValue) + elif (tag == 'meta') and (attrName == 'content'): + if self.hasAttr(tagAttrs, ('http-equiv', 'refresh')): + attrValue = self._rewriteMetaRefresh(attrValue) else: rwMod = handler.get(attrName) @@ -171,6 +197,11 @@ class WBHtml(HTMLParser): self.out.write('/>' if isStartEnd else '>') + # special case: head tag + if (self.headInsert) and (self._wbParseContext == None) and (tag == "head"): + self.out.write(self.headInsert) + self.headInsert = None + return True def handle_starttag(self, tag, attrs): @@ -233,8 +264,8 @@ if __name__ == "__main__": rewriter = ArchivalUrlRewriter('/20131226101010/http://example.com/some/path/index.html', '/web/') - def parse(data): - parser = WBHtml(rewriter) + def parse(data, headInsert = None): + parser = WBHtml(rewriter, headInsert = headInsert) parser.feed(data) parser.close() diff --git a/pywb/wbrequestresponse.py b/pywb/wbrequestresponse.py index 46d7208e..402a6b13 100644 --- a/pywb/wbrequestresponse.py +++ b/pywb/wbrequestresponse.py @@ -118,7 +118,9 @@ class WbResponse: finally: stream.close() - return WbResponse(statusline, headersList = headers, value = streamGen()) + response = WbResponse(statusline, headersList = headers, value = streamGen()) + response._stream = stream + return response @staticmethod def better_timestamp_response(wbrequest, newTimestamp): @@ -139,7 +141,6 @@ class WbResponse: if env['REQUEST_METHOD'] == 'HEAD': if hasattr(self.body, 'close'): self.body.close() - return self.body return [] if hasattr(self.body, '__iter__'): diff --git a/pywb/wburlrewriter.py b/pywb/wburlrewriter.py index 3f059415..d27142ff 100644 --- a/pywb/wburlrewriter.py +++ b/pywb/wburlrewriter.py @@ -38,6 +38,9 @@ class ArchivalUrlRewriter: >>> ArchivalUrlRewriter('/19960708im_/http://domain.example.com/path.txt', '/abc/').getAbsUrl() '/abc/19960708im_/' + >>> ArchivalUrlRewriter('/2013id_/example.com/file/path/blah.html', '/123/').getTimestampUrl('20131024') + '/123/20131024id_/http://example.com/file/path/blah.html' + >>> ArchivalUrlRewriter.stripProtocol('https://example.com') == ArchivalUrlRewriter.stripProtocol('http://example.com') True """ @@ -46,8 +49,8 @@ class ArchivalUrlRewriter: PROTOCOLS = ['http://', 'https://', '//', 'ftp://', 'mms://', 'rtsp://', 'wais://'] - def __init__(self, wburl_str, prefix): - self.wburl = ArchivalUrl(wburl_str) + def __init__(self, wburl, prefix): + self.wburl = wburl if isinstance(wburl, ArchivalUrl) else ArchivalUrl(wburl) self.prefix = prefix if self.prefix.endswith('/'): @@ -84,6 +87,12 @@ class ArchivalUrlRewriter: def getAbsUrl(self, url = ''): return self.prefix + ArchivalUrl.to_str(self.wburl.type, self.wburl.mod, self.wburl.timestamp, url) + def getTimestampUrl(self, timestamp, url = None): + if not url: + url = self.wburl.url + + return self.prefix + ArchivalUrl.to_str(self.wburl.type, self.wburl.mod, timestamp, url) + def setBaseUrl(self, newUrl): self.wburl.url = newUrl diff --git a/run.sh b/run.sh new file mode 100755 index 00000000..e0ee2f35 --- /dev/null +++ b/run.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +mypath=$(cd `dirname $0` && pwd) + +app=$1 +cd $mypath/pywb +if [ -z "$app" ]; then + app=wbapp.py +fi + +uwsgi --static-map /static=$mypath/static --http :9090 --wsgi-file $app diff --git a/static/wb.css b/static/wb.css new file mode 100644 index 00000000..33e55701 --- /dev/null +++ b/static/wb.css @@ -0,0 +1,13 @@ + +#_wayback_banner +{ + display: block; + position: absolute; + top: 0px; + width: 100%; + border: 1px solid; + background-color: lightYellow; + text-align: center; + z-index: 2147483643; +} + diff --git a/static/wb.js b/static/wb.js new file mode 100644 index 00000000..84299af4 --- /dev/null +++ b/static/wb.js @@ -0,0 +1,42 @@ + + +// Rewritten location and domain obj setup +window.WB_wombat_location = window.location + +if (window.top != window) { + window.top.WB_wombat_location = window.top.location +} + +if (window.opener) { + window.opener.WB_wombat_location = window.opener.location +} + +document.WB_wombat_domain = document.domain + +function initBanner() +{ + var BANNER_ID = "_wayback_banner"; + + var banner = document.getElementById(BANNER_ID); + + if (!banner) { + banner = document.createElement("wb_div"); + banner.setAttribute("id", BANNER_ID); + banner.style.cssText = "display: block; width: 100%; border: 1px solid; background-color: lightYellow; text-align: center"; + + //banner.innerHTML = ""; + banner.innerHTML = "PyWb Banner!" + document.body.insertBefore(banner, document.body.firstChild); + } +} + +var readyStateCheckInterval = setInterval(function() { + if (document.readyState === "interactive" || document.readyState === "complete") { + initBanner(); + clearInterval(readyStateCheckInterval); + } +}, 10); + + + +