mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 08:04:49 +01:00
Dedup Policy Tests (#613)
* dedup tests: add basic tests for dedup system, continuing from #611 - ensure config merge works correctly
This commit is contained in:
parent
aee458b7f5
commit
78a9888b46
@ -62,9 +62,15 @@ class WarcServer(BaseWarcServer):
|
||||
if 'proxy' in custom_config and 'proxy' in config:
|
||||
custom_config['proxy'].update(config['proxy'])
|
||||
if 'recorder' in custom_config and 'recorder' in config:
|
||||
if isinstance(custom_config['recorder'], str):
|
||||
custom_config['recorder'] = {'source_coll': custom_config['recorder']}
|
||||
|
||||
if isinstance(config['recorder'], str):
|
||||
config['recorder'] = {'source_coll': config['recorder']}
|
||||
|
||||
config['recorder'].update(custom_config['recorder'])
|
||||
custom_config['recorder'] = config['recorder']
|
||||
|
||||
config.update(custom_config)
|
||||
|
||||
super(WarcServer, self).__init__(debug=config.get('debug', False))
|
||||
|
12
tests/config_test_record_dedup.yaml
Normal file
12
tests/config_test_record_dedup.yaml
Normal file
@ -0,0 +1,12 @@
|
||||
debug: true
|
||||
|
||||
collections_root: _test_colls
|
||||
|
||||
recorder:
|
||||
source_coll: live
|
||||
dedup_policy: skip
|
||||
|
||||
collections:
|
||||
'live': '$live'
|
||||
|
||||
|
52
tests/test_record_dedup.py
Normal file
52
tests/test_record_dedup.py
Normal file
@ -0,0 +1,52 @@
|
||||
from .base_config_test import BaseConfigTest, CollsDirMixin, BaseTestClass
|
||||
from pywb.manager.manager import main as manager
|
||||
from pywb.warcserver.test.testutils import to_path, HttpBinLiveTests, FakeRedisTests
|
||||
|
||||
from fakeredis import FakeStrictRedis
|
||||
|
||||
from warcio import ArchiveIterator
|
||||
|
||||
import os
|
||||
import time
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ============================================================================
|
||||
class TestRecordDedup(HttpBinLiveTests, CollsDirMixin, BaseConfigTest, FakeRedisTests, BaseTestClass):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super(TestRecordDedup, cls).setup_class('config_test_record_dedup.yaml', custom_config={'recorder': 'live'})
|
||||
cls.redis = FakeStrictRedis.from_url("redis://localhost/0")
|
||||
|
||||
def test_init_coll(self):
|
||||
manager(['init', 'test-dedup'])
|
||||
assert os.path.isdir(os.path.join(self.root_dir, '_test_colls', 'test-dedup', 'archive'))
|
||||
|
||||
def test_record_1(self):
|
||||
res = self.testapp.get('/test-dedup/record/mp_/http://httpbin.org/get?A=B', headers={"Referer": "http://httpbin.org/"})
|
||||
assert '"A": "B"' in res.text
|
||||
|
||||
time.sleep(1.2)
|
||||
|
||||
res = self.testapp.get('/test-dedup/record/mp_/http://httpbin.org/get?A=B', headers={"Referer": "http://httpbin.org/"})
|
||||
assert '"A": "B"' in res.text
|
||||
|
||||
def test_single_redis_entry(self):
|
||||
res = self.redis.zrange("pywb:test-dedup:cdxj", 0, -1)
|
||||
assert len(res) == 1
|
||||
|
||||
def test_single_warc_record(self):
|
||||
dir_name = os.path.join(self.root_dir, '_test_colls', 'test-dedup', 'archive')
|
||||
files = os.listdir(dir_name)
|
||||
assert len(files) == 1
|
||||
|
||||
records = []
|
||||
|
||||
with open(os.path.join(dir_name, files[0]), 'rb') as fh:
|
||||
for record in ArchiveIterator(fh):
|
||||
records.append(record.rec_type)
|
||||
|
||||
# ensure only one response/request pair written
|
||||
assert records == ['response', 'request']
|
Loading…
x
Reference in New Issue
Block a user