mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
lxml query parsing fix: (addressing part of ukwa/ukwa-pywb#38)
- ensure lxml-enabled parsing in XmlQueryIndexSource works by passing the raw bytestring instead of unicode text to the parser - tests: add lxml and non-lxml parsing tests to test_xmlquery_indexsource.py, add lxml to test install - misc fixes: fix typo in banner.html, update gevent api to support latest gevent
This commit is contained in:
parent
8bf2f9debb
commit
b8124e3931
@ -248,7 +248,7 @@ class XmlQueryIndexSource(BaseIndexSource):
|
|||||||
response = self.session.get(query_url)
|
response = self.session.get(query_url)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
results = etree.fromstring(response.text)
|
results = etree.fromstring(response.content)
|
||||||
|
|
||||||
items = results.find('results')
|
items = results.find('results')
|
||||||
|
|
||||||
@ -259,7 +259,7 @@ class XmlQueryIndexSource(BaseIndexSource):
|
|||||||
|
|
||||||
raise NotFoundException('url {0} not found'.format(url))
|
raise NotFoundException('url {0} not found'.format(url))
|
||||||
|
|
||||||
if not items:
|
if len(items) == 0:
|
||||||
raise NotFoundException('url {0} not found'.format(url))
|
raise NotFoundException('url {0} not found'.format(url))
|
||||||
|
|
||||||
items = items.findall('result')
|
items = items.findall('result')
|
||||||
|
@ -4,6 +4,7 @@ from pywb.warcserver.index.indexsource import XmlQueryIndexSource
|
|||||||
from pywb.warcserver.index.aggregator import SimpleAggregator
|
from pywb.warcserver.index.aggregator import SimpleAggregator
|
||||||
|
|
||||||
from mock import patch
|
from mock import patch
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@ -27,6 +28,10 @@ def mock_get(self, url):
|
|||||||
def text(self):
|
def text(self):
|
||||||
return self.string
|
return self.string
|
||||||
|
|
||||||
|
@property
|
||||||
|
def content(self):
|
||||||
|
return self.string.encode('utf-8')
|
||||||
|
|
||||||
def raise_for_status(self):
|
def raise_for_status(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -40,6 +45,19 @@ class TestXmlQueryIndexSource(BaseTestClass):
|
|||||||
def setup_class(cls):
|
def setup_class(cls):
|
||||||
super(TestXmlQueryIndexSource, cls).setup_class()
|
super(TestXmlQueryIndexSource, cls).setup_class()
|
||||||
|
|
||||||
|
cls.xmlpatch = patch('pywb.warcserver.index.indexsource.etree', cls._get_etree())
|
||||||
|
cls.xmlpatch.start()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _get_etree(cls):
|
||||||
|
import xml.etree.ElementTree as etree
|
||||||
|
return etree
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def teardown_class(cls):
|
||||||
|
cls.xmlpatch.stop()
|
||||||
|
super(TestXmlQueryIndexSource, cls).teardown_class()
|
||||||
|
|
||||||
def do_query(self, params):
|
def do_query(self, params):
|
||||||
return SimpleAggregator({'source': XmlQueryIndexSource('http://localhost:8080/path')})(params)
|
return SimpleAggregator({'source': XmlQueryIndexSource('http://localhost:8080/path')})(params)
|
||||||
|
|
||||||
@ -75,6 +93,15 @@ com,example)/some/path 20180216200300 example.warc.gz"""
|
|||||||
assert(errs == {})
|
assert(errs == {})
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
class TestXmlQueryIndexSourceLXML(TestXmlQueryIndexSource):
|
||||||
|
@classmethod
|
||||||
|
def _get_etree(cls):
|
||||||
|
pytest.importorskip('lxml.etree')
|
||||||
|
import lxml.etree
|
||||||
|
return lxml.etree
|
||||||
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
URL_RESPONSE_1 = """
|
URL_RESPONSE_1 = """
|
||||||
<wayback>
|
<wayback>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user