mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
Merge branch 'develop' into video
This commit is contained in:
commit
fea48fd27a
@ -28,8 +28,17 @@ class CDXFile(CDXSource):
|
|||||||
self.filename = filename
|
self.filename = filename
|
||||||
|
|
||||||
def load_cdx(self, query):
|
def load_cdx(self, query):
|
||||||
source = open(self.filename)
|
def do_open():
|
||||||
return iter_range(source, query.key, query.end_key)
|
try:
|
||||||
|
source = open(self.filename)
|
||||||
|
gen = iter_range(source, query.key, query.end_key)
|
||||||
|
for line in gen:
|
||||||
|
yield line
|
||||||
|
finally:
|
||||||
|
source.close()
|
||||||
|
|
||||||
|
return do_open()
|
||||||
|
#return iter_range(do_open(), query.key, query.end_key)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return 'CDX File - ' + self.filename
|
return 'CDX File - ' + self.filename
|
||||||
|
@ -274,10 +274,20 @@ class HTMLRewriterMixin(object):
|
|||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
class HTMLRewriter(HTMLRewriterMixin, HTMLParser):
|
class HTMLRewriter(HTMLRewriterMixin, HTMLParser):
|
||||||
|
PARSETAG = re.compile('[<]')
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
HTMLParser.__init__(self)
|
HTMLParser.__init__(self)
|
||||||
super(HTMLRewriter, self).__init__(*args, **kwargs)
|
super(HTMLRewriter, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
HTMLParser.reset(self)
|
||||||
|
self.interesting = self.PARSETAG
|
||||||
|
|
||||||
|
def clear_cdata_mode(self):
|
||||||
|
HTMLParser.clear_cdata_mode(self)
|
||||||
|
self.interesting = self.PARSETAG
|
||||||
|
|
||||||
def feed(self, string):
|
def feed(self, string):
|
||||||
try:
|
try:
|
||||||
HTMLParser.feed(self, string)
|
HTMLParser.feed(self, string)
|
||||||
@ -322,11 +332,12 @@ class HTMLRewriter(HTMLRewriterMixin, HTMLParser):
|
|||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
self.parse_data(data)
|
self.parse_data(data)
|
||||||
|
|
||||||
def handle_entityref(self, data):
|
# overriding regex so that these are no longer called
|
||||||
self.out.write('&' + data + ';')
|
#def handle_entityref(self, data):
|
||||||
|
# self.out.write('&' + data + ';')
|
||||||
def handle_charref(self, data):
|
#
|
||||||
self.out.write('&#' + data + ';')
|
#def handle_charref(self, data):
|
||||||
|
# self.out.write('&#' + data + ';')
|
||||||
|
|
||||||
def handle_comment(self, data):
|
def handle_comment(self, data):
|
||||||
self.out.write('<!--')
|
self.out.write('<!--')
|
||||||
|
@ -28,8 +28,11 @@ ur"""
|
|||||||
<base href="/web/20131226101010/http://example.com/some/path/static/"/><img src="/web/20131226101010im_/http://example.com/some/path/static/image.gif"/>
|
<base href="/web/20131226101010/http://example.com/some/path/static/"/><img src="/web/20131226101010im_/http://example.com/some/path/static/image.gif"/>
|
||||||
|
|
||||||
# HTML Entities
|
# HTML Entities
|
||||||
>>> parse('<a href="">› ></div>')
|
>>> parse('<a href="">› > ?</div>')
|
||||||
<a href="">› ></div>
|
<a href="">› > ?</div>
|
||||||
|
|
||||||
|
>>> parse('<div>X&Y</div> </div>X&Y;</div>')
|
||||||
|
<div>X&Y</div> </div>X&Y;</div>
|
||||||
|
|
||||||
# Don't rewrite anchors
|
# Don't rewrite anchors
|
||||||
>>> parse('<HTML><A Href="#abc">Text</a></hTmL>')
|
>>> parse('<HTML><A Href="#abc">Text</a></hTmL>')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user