mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
Rules Work (vimeo) and live_only flag (#264)
* rules work: - apply 'js_regexs' on json content also, using 'js-proxy' rewriter - rules for vimeo, disable hls/dash - add 'live_only' flag 'rewrite' to enable rewrite only when 'is_live' is set - tests: add test for new vimeo rules, testing live_only cli: add '--record' cli option to enable quick-recording from live collection
This commit is contained in:
parent
93b3b95664
commit
db3ba5a067
@ -35,6 +35,7 @@ class BaseCli(object):
|
||||
parser.add_argument('--profile', action='store_true')
|
||||
|
||||
parser.add_argument('--live', action='store_true', help='Add live-web handler at /live')
|
||||
parser.add_argument('--record', action='store_true')
|
||||
|
||||
parser.add_argument('--proxy', help='Enable HTTP/S Proxy on specified collection')
|
||||
parser.add_argument('--proxy-record', action='store_true', help='Enable Proxy Recording into specified collection')
|
||||
@ -71,6 +72,9 @@ class BaseCli(object):
|
||||
if self.r.debug:
|
||||
self.extra_config['debug'] = True
|
||||
|
||||
if self.r.record:
|
||||
self.extra_config['recorder'] = 'live'
|
||||
|
||||
def run(self):
|
||||
self.run_gevent()
|
||||
return self
|
||||
|
@ -70,7 +70,19 @@ class BaseContentRewriter(object):
|
||||
|
||||
return {}
|
||||
|
||||
def has_custom_rules(self, rule, cdx):
|
||||
if 'js_regex_func' not in rule:
|
||||
return False
|
||||
|
||||
if rule.get('live_only') and not cdx.get('is_live'):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def get_rw_class(self, rule, text_type, rwinfo):
|
||||
if text_type == 'json' and 'js_regex_func' in rule:
|
||||
text_type = 'js-proxy'
|
||||
|
||||
if text_type == 'js' and not rwinfo.is_url_rw():
|
||||
text_type = 'js-proxy'
|
||||
|
||||
@ -89,7 +101,7 @@ class BaseContentRewriter(object):
|
||||
|
||||
if rw_type in ('js', 'js-proxy'):
|
||||
extra_rules = []
|
||||
if 'js_regex_func' in rule:
|
||||
if self.has_custom_rules(rule, cdx):
|
||||
extra_rules = rule['js_regex_func'](rwinfo.url_rewriter)
|
||||
|
||||
# if js-proxy and no rules, default to none
|
||||
|
@ -13,11 +13,13 @@ from pywb.rewrite.url_rewriter import UrlRewriter
|
||||
from pywb.rewrite.default_rewriter import DefaultRewriter
|
||||
|
||||
from pywb import get_test_dir
|
||||
|
||||
import os
|
||||
import json
|
||||
import pytest
|
||||
|
||||
|
||||
# ============================================================================
|
||||
@pytest.fixture(params=[{'Content-Type': 'text/html'},
|
||||
{'Content-Type': 'application/xhtml+xml'},
|
||||
{'Content-Type': 'application/octet-stream'},
|
||||
@ -51,7 +53,7 @@ class TestContentRewriter(object):
|
||||
|
||||
def rewrite_record(self, headers, content, ts, url='http://example.com/',
|
||||
prefix='http://localhost:8080/prefix/', warc_headers=None,
|
||||
request_url=None):
|
||||
request_url=None, is_live=None):
|
||||
|
||||
record = self._create_response_record(url, headers, content, warc_headers)
|
||||
|
||||
@ -64,6 +66,7 @@ class TestContentRewriter(object):
|
||||
cdx['urlkey'] = canonicalize(url)
|
||||
if request_url != url:
|
||||
cdx['is_fuzzy'] = '1'
|
||||
cdx['is_live'] = is_live
|
||||
|
||||
return self.content_rewriter(record, url_rewriter, None, cdx=cdx)
|
||||
|
||||
@ -272,6 +275,24 @@ class TestContentRewriter(object):
|
||||
|
||||
assert b''.join(gen).decode('utf-8') == '{"ssid":"5678"}'
|
||||
|
||||
def test_custom_live_only(self):
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
content = '{"foo":"bar", "dash": {"on": "true"}, "some": ["list"]'
|
||||
|
||||
# is_live
|
||||
rw_headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701mp_',
|
||||
url='https://player.vimeo.com/video/123445/config/config?A=B',
|
||||
is_live='1')
|
||||
|
||||
# rewritten
|
||||
assert b''.join(gen).decode('utf-8') == '{"foo":"bar", "__dash": {"on": "true"}, "some": ["list"]'
|
||||
|
||||
# not is_live
|
||||
rw_headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701mp_',
|
||||
url='https://player.vimeo.com/video/123445/config/config?A=B')
|
||||
|
||||
assert b''.join(gen).decode('utf-8') == content
|
||||
|
||||
def test_hls_default_max(self):
|
||||
headers = {'Content-Type': 'application/vnd.apple.mpegurl'}
|
||||
with open(os.path.join(get_test_dir(), 'text_content', 'sample_hls.m3u8'), 'rt') as fh:
|
||||
|
@ -261,6 +261,21 @@ rules:
|
||||
# only use non query part of url, ignore query
|
||||
fuzzy_lookup: '()'
|
||||
|
||||
- url_prefix: 'com,vimeo,player)/video/'
|
||||
|
||||
fuzzy_lookup:
|
||||
match:
|
||||
regex: 'com,vimeo.player\)/video/[\d]+/config?.*'
|
||||
|
||||
rewrite:
|
||||
live_only: true
|
||||
js_regexs:
|
||||
- match: '"dash":'
|
||||
replace: '"__dash":'
|
||||
|
||||
- match: '"hls":'
|
||||
replace: '"__hls":'
|
||||
|
||||
- url_prefix: 'com,vimeocdn,'
|
||||
|
||||
fuzzy_lookup: '()'
|
||||
@ -279,7 +294,9 @@ rules:
|
||||
- videoFileId
|
||||
- signature
|
||||
|
||||
|
||||
# vine
|
||||
#=================================================================
|
||||
- url_prefix: 'co,vine,cdn,'
|
||||
|
||||
fuzzy_lookup:
|
||||
|
Loading…
x
Reference in New Issue
Block a user