1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00

content rewriter: determine type if no content-type provided

This commit is contained in:
Ilya Kreymer 2017-08-10 10:25:32 -07:00 committed by Ilya Kreymer
parent 9fdff8388e
commit 2115817792

View File

@ -283,6 +283,7 @@ class RewriteInfo(object):
def _fill_text_type_and_charset(self):
content_type = self.record.http_headers.get_header('Content-Type')
if not content_type:
self.text_type = 'html-guess'
return
parts = content_type.split(';', 1)
@ -304,17 +305,27 @@ class RewriteInfo(object):
self.text_type = 'css'
# only attempt to resolve between html and other text types
if self.text_type != 'html':
return
if self.text_type == 'html':
if mod != 'js_' and mod != 'cs_':
return
if mod != 'js_' and mod != 'cs_':
elif self.text_type != 'html-guess':
return
buff = self.read_and_keep(128)
# check if doesn't start with a tag, then likely not html
if not self.TAG_REGEX.match(buff):
is_html = self.TAG_REGEX.match(buff)
if not is_html:
if self.text_type == 'html-guess' and mod not in ('js_', 'cs_'):
self.text_type = None
return
self.text_type = 'js' if mod == 'js_' else 'css'
else:
if self.text_type == 'html-guess':
self.text_type = 'html'
@property
def content_stream(self):