mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
lxml query parsing fix: (addressing part of ukwa/ukwa-pywb#38)
- ensure lxml-enabled parsing in XmlQueryIndexSource works by passing the raw bytestring instead of unicode text to the parser - tests: add lxml and non-lxml parsing tests to test_xmlquery_indexsource.py, add lxml to test install - misc fixes: fix typo in banner.html, update gevent api to support latest gevent
This commit is contained in:
parent
8bf2f9debb
commit
b8124e3931
@ -248,7 +248,7 @@ class XmlQueryIndexSource(BaseIndexSource):
|
||||
response = self.session.get(query_url)
|
||||
response.raise_for_status()
|
||||
|
||||
results = etree.fromstring(response.text)
|
||||
results = etree.fromstring(response.content)
|
||||
|
||||
items = results.find('results')
|
||||
|
||||
@ -259,7 +259,7 @@ class XmlQueryIndexSource(BaseIndexSource):
|
||||
|
||||
raise NotFoundException('url {0} not found'.format(url))
|
||||
|
||||
if not items:
|
||||
if len(items) == 0:
|
||||
raise NotFoundException('url {0} not found'.format(url))
|
||||
|
||||
items = items.findall('result')
|
||||
|
@ -4,6 +4,7 @@ from pywb.warcserver.index.indexsource import XmlQueryIndexSource
|
||||
from pywb.warcserver.index.aggregator import SimpleAggregator
|
||||
|
||||
from mock import patch
|
||||
import pytest
|
||||
|
||||
|
||||
# ============================================================================
|
||||
@ -27,6 +28,10 @@ def mock_get(self, url):
|
||||
def text(self):
|
||||
return self.string
|
||||
|
||||
@property
|
||||
def content(self):
|
||||
return self.string.encode('utf-8')
|
||||
|
||||
def raise_for_status(self):
|
||||
pass
|
||||
|
||||
@ -40,6 +45,19 @@ class TestXmlQueryIndexSource(BaseTestClass):
|
||||
def setup_class(cls):
|
||||
super(TestXmlQueryIndexSource, cls).setup_class()
|
||||
|
||||
cls.xmlpatch = patch('pywb.warcserver.index.indexsource.etree', cls._get_etree())
|
||||
cls.xmlpatch.start()
|
||||
|
||||
@classmethod
|
||||
def _get_etree(cls):
|
||||
import xml.etree.ElementTree as etree
|
||||
return etree
|
||||
|
||||
@classmethod
|
||||
def teardown_class(cls):
|
||||
cls.xmlpatch.stop()
|
||||
super(TestXmlQueryIndexSource, cls).teardown_class()
|
||||
|
||||
def do_query(self, params):
|
||||
return SimpleAggregator({'source': XmlQueryIndexSource('http://localhost:8080/path')})(params)
|
||||
|
||||
@ -75,6 +93,15 @@ com,example)/some/path 20180216200300 example.warc.gz"""
|
||||
assert(errs == {})
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class TestXmlQueryIndexSourceLXML(TestXmlQueryIndexSource):
|
||||
@classmethod
|
||||
def _get_etree(cls):
|
||||
pytest.importorskip('lxml.etree')
|
||||
import lxml.etree
|
||||
return lxml.etree
|
||||
|
||||
|
||||
# ============================================================================
|
||||
URL_RESPONSE_1 = """
|
||||
<wayback>
|
||||
|
Loading…
x
Reference in New Issue
Block a user