1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 08:04:49 +01:00

Dedup Policy Tests ()

* dedup tests: add basic tests for dedup system, continuing from 
- ensure config merge works correctly
This commit is contained in:
Ilya Kreymer 2021-01-26 22:39:52 -08:00 committed by GitHub
parent aee458b7f5
commit 78a9888b46
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 70 additions and 0 deletions

@ -62,9 +62,15 @@ class WarcServer(BaseWarcServer):
if 'proxy' in custom_config and 'proxy' in config:
custom_config['proxy'].update(config['proxy'])
if 'recorder' in custom_config and 'recorder' in config:
if isinstance(custom_config['recorder'], str):
custom_config['recorder'] = {'source_coll': custom_config['recorder']}
if isinstance(config['recorder'], str):
config['recorder'] = {'source_coll': config['recorder']}
config['recorder'].update(custom_config['recorder'])
custom_config['recorder'] = config['recorder']
config.update(custom_config)
super(WarcServer, self).__init__(debug=config.get('debug', False))

@ -0,0 +1,12 @@
debug: true
collections_root: _test_colls
recorder:
source_coll: live
dedup_policy: skip
collections:
'live': '$live'

@ -0,0 +1,52 @@
from .base_config_test import BaseConfigTest, CollsDirMixin, BaseTestClass
from pywb.manager.manager import main as manager
from pywb.warcserver.test.testutils import to_path, HttpBinLiveTests, FakeRedisTests
from fakeredis import FakeStrictRedis
from warcio import ArchiveIterator
import os
import time
import json
import pytest
# ============================================================================
class TestRecordDedup(HttpBinLiveTests, CollsDirMixin, BaseConfigTest, FakeRedisTests, BaseTestClass):
@classmethod
def setup_class(cls):
super(TestRecordDedup, cls).setup_class('config_test_record_dedup.yaml', custom_config={'recorder': 'live'})
cls.redis = FakeStrictRedis.from_url("redis://localhost/0")
def test_init_coll(self):
manager(['init', 'test-dedup'])
assert os.path.isdir(os.path.join(self.root_dir, '_test_colls', 'test-dedup', 'archive'))
def test_record_1(self):
res = self.testapp.get('/test-dedup/record/mp_/http://httpbin.org/get?A=B', headers={"Referer": "http://httpbin.org/"})
assert '"A": "B"' in res.text
time.sleep(1.2)
res = self.testapp.get('/test-dedup/record/mp_/http://httpbin.org/get?A=B', headers={"Referer": "http://httpbin.org/"})
assert '"A": "B"' in res.text
def test_single_redis_entry(self):
res = self.redis.zrange("pywb:test-dedup:cdxj", 0, -1)
assert len(res) == 1
def test_single_warc_record(self):
dir_name = os.path.join(self.root_dir, '_test_colls', 'test-dedup', 'archive')
files = os.listdir(dir_name)
assert len(files) == 1
records = []
with open(os.path.join(dir_name, files[0]), 'rb') as fh:
for record in ArchiveIterator(fh):
records.append(record.rec_type)
# ensure only one response/request pair written
assert records == ['response', 'request']