1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-24 06:59:52 +01:00
pywb/pywb/static/queryWorker.js
John Berlin 69f7f02006
static files:
- re-formatted: default_banner.js, queryWorker.js, search.js, wb_frame.js
2019-09-04 14:59:50 -04:00

173 lines
5.4 KiB
JavaScript

var colon = ':';
var recordCount = 0;
// sentinel representing the \n character as an uint8 value
var newLine = 10;
// options used when converting a response body chunk represented as an Uint8Array to a string
var decoderOptions = { stream: true };
// TextDecoder instance use to convert the current chunk returned by the stream of the response body
var decoder = new TextDecoder('utf-8');
/**
* The remaining string contents of the current response body chunk when we have not consumed
* in its integrity, pre-pended to the next chunk
* @type {Uint8Array}
*/
var bufferedPreviousChunk = null;
self.onmessage = function(event) {
var data = event.data;
if (data.type === 'query') {
fetch(data.queryURL)
.then(consumeResponseBodyAsStream)
.catch(defaultErrorCatcher);
}
};
function defaultErrorCatcher(error) {
console.error('A fatal error occurred', error);
self.postMessage({
type: 'finished',
recordCount: recordCount,
recordCountFormatted: recordCount.toLocaleString()
});
}
/**
* Consumes the entirety of the response body by converting it into chunks
* @param {Response} response
*/
function consumeResponseBodyAsStream(response) {
var reader = response.body.getReader();
reader
.read()
.then(function consumeStream(result) {
if (result.done) {
if (bufferedPreviousChunk) {
var lastChunk = bufferedPreviousChunk;
bufferedPreviousChunk = null;
consumeChunk(lastChunk, true);
}
self.postMessage({
type: 'finished',
recordCount: recordCount,
recordCountFormatted: recordCount.toLocaleString()
});
return;
}
transformChunk(result.value);
reader
.read()
.then(consumeStream)
.catch(defaultErrorCatcher);
})
.catch(defaultErrorCatcher);
}
/**
* @desc Process a chunk that was returned from the reader (ReadableStream) for the query response body.
* If we buffered a part of the previous chunk the previous chunk that was buffered is concatenated with
* the current chunk
* @param {Uint8Array} currentChunk - The current chunk of the response body to be transformed
*/
function transformChunk(currentChunk) {
var chunk;
if (bufferedPreviousChunk) {
// concatenate the bufferedPreviousChunk chunk with the current chunk
chunk = new Uint8Array(bufferedPreviousChunk.length + currentChunk.length);
chunk.set(bufferedPreviousChunk);
chunk.set(currentChunk, bufferedPreviousChunk.length);
bufferedPreviousChunk = null;
} else {
chunk = currentChunk;
}
consumeChunk(chunk, false);
}
/**
* @desc Extracts the individual cdx records from current chunk by performing binary
* splitting on the \n character
* @param {Uint8Array} chunk - The current chunk of the response body to be processed
* @param {boolean} last - indicates that this is the last chunk to be processed
*/
function consumeChunk(chunk, last) {
var offset = 0;
var lastMatch = 0;
var idx;
var maybeCDXRecord;
var chunkLen = chunk.length;
while (true) {
// idx will equal the index of the next \n from the current offset
idx = offset >= chunkLen ? -1 : chunk.indexOf(newLine, offset);
if (idx !== -1 && idx < chunkLen) {
// extract the next json record from last match up to and including the newline
maybeCDXRecord = chunk.slice(lastMatch, idx + 1);
if (maybeCDXRecord.length > 0) {
handleCDXRecord(maybeCDXRecord);
}
// idx = position of \n in front of the last } so we add 1 to it for our next offset
offset = idx + 1;
// make last match our current offset for the else branch
lastMatch = offset;
} else {
// idx reached the end of the current chunk and we now need to check if we should buffer
// any remaining portion of the current chunk
if (lastMatch < chunkLen) {
bufferedPreviousChunk = chunk.slice(lastMatch);
}
if (last && bufferedPreviousChunk && bufferedPreviousChunk.length > 0) {
handleCDXRecord(bufferedPreviousChunk);
}
break;
}
}
}
/**
* Converts the Uint8Array representation of the potential cdx object into an UTF-8 string and
* then attempts to convert it into a JSON object. If the conversion process was successful
* we post a message containing the conversion results to query results page we were created from
* @param {Uint8Array} binaryCDXRecord - The potential cdx object to be converted
*/
function handleCDXRecord(binaryCDXRecord) {
var decodedCDXRecord;
var cdxRecord;
try {
decodedCDXRecord = decoder.decode(binaryCDXRecord, decoderOptions).trim();
if (!decodedCDXRecord) return; // we had an empty string or a string with only whitespace and or \n
} catch (e) {
console.error('failed to decode the potential cdx record', e);
return;
}
try {
cdxRecord = JSON.parse(decodedCDXRecord);
} catch (e) {
console.error('bad JSON in the potential cdx record', e);
return;
}
recordCount += 1;
var ts = cdxRecord.timestamp;
var day = ts.substring(6, 8);
self.postMessage({
type: 'cdxRecord',
record: cdxRecord,
timeInfo: {
year: ts.substring(0, 4),
month: ts.substring(4, 6),
day: day.charAt(0) === '0' ? day.charAt(1) : day,
time:
ts.substring(8, 10) +
colon +
ts.substring(10, 12) +
colon +
ts.substring(12, 14)
},
wasError: false,
recordCount: recordCount,
recordCountFormatted: recordCount.toLocaleString()
});
}