mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
chardet optimization: using chardet feed() approach to avoid passing in entire buffer
This commit is contained in:
parent
d1ad9b5e69
commit
a6b4ae4c47
@ -1,6 +1,8 @@
|
|||||||
import chardet
|
#import chardet
|
||||||
import pkgutil
|
import pkgutil
|
||||||
import yaml
|
import yaml
|
||||||
|
from chardet.universaldetector import UniversalDetector
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
from header_rewriter import RewrittenStatusAndHeaders
|
from header_rewriter import RewrittenStatusAndHeaders
|
||||||
|
|
||||||
@ -151,11 +153,31 @@ class RewriteContent:
|
|||||||
|
|
||||||
return buff
|
return buff
|
||||||
|
|
||||||
|
#def _detect_charset(self, stream):
|
||||||
|
# buff = stream.read(8192)
|
||||||
|
# result = chardet.detect(buff)
|
||||||
|
# print "chardet result: " + str(result)
|
||||||
|
# return (result['encoding'], buff)
|
||||||
|
|
||||||
def _detect_charset(self, stream):
|
def _detect_charset(self, stream):
|
||||||
buff = stream.read(8192)
|
full_buff = stream.read(8192)
|
||||||
result = chardet.detect(buff)
|
io_buff = BytesIO(full_buff)
|
||||||
print "chardet result: " + str(result)
|
|
||||||
return (result['encoding'], buff)
|
detector = UniversalDetector()
|
||||||
|
|
||||||
|
try:
|
||||||
|
buff = io_buff.read(256)
|
||||||
|
while buff:
|
||||||
|
detector.feed(buff)
|
||||||
|
if detector.done:
|
||||||
|
break
|
||||||
|
|
||||||
|
buff = io_buff.read(256)
|
||||||
|
finally:
|
||||||
|
detector.close()
|
||||||
|
|
||||||
|
print "chardet result: " + str(detector.result)
|
||||||
|
return (detector.result['encoding'], full_buff)
|
||||||
|
|
||||||
# Create a generator reading from a stream,
|
# Create a generator reading from a stream,
|
||||||
# with optional rewriting and final read call
|
# with optional rewriting and final read call
|
||||||
|
Loading…
x
Reference in New Issue
Block a user