mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-24 06:59:52 +01:00
Rules Work (vimeo) and live_only flag (#264)
* rules work: - apply 'js_regexs' on json content also, using 'js-proxy' rewriter - rules for vimeo, disable hls/dash - add 'live_only' flag 'rewrite' to enable rewrite only when 'is_live' is set - tests: add test for new vimeo rules, testing live_only cli: add '--record' cli option to enable quick-recording from live collection
This commit is contained in:
parent
93b3b95664
commit
db3ba5a067
@ -35,6 +35,7 @@ class BaseCli(object):
|
|||||||
parser.add_argument('--profile', action='store_true')
|
parser.add_argument('--profile', action='store_true')
|
||||||
|
|
||||||
parser.add_argument('--live', action='store_true', help='Add live-web handler at /live')
|
parser.add_argument('--live', action='store_true', help='Add live-web handler at /live')
|
||||||
|
parser.add_argument('--record', action='store_true')
|
||||||
|
|
||||||
parser.add_argument('--proxy', help='Enable HTTP/S Proxy on specified collection')
|
parser.add_argument('--proxy', help='Enable HTTP/S Proxy on specified collection')
|
||||||
parser.add_argument('--proxy-record', action='store_true', help='Enable Proxy Recording into specified collection')
|
parser.add_argument('--proxy-record', action='store_true', help='Enable Proxy Recording into specified collection')
|
||||||
@ -71,6 +72,9 @@ class BaseCli(object):
|
|||||||
if self.r.debug:
|
if self.r.debug:
|
||||||
self.extra_config['debug'] = True
|
self.extra_config['debug'] = True
|
||||||
|
|
||||||
|
if self.r.record:
|
||||||
|
self.extra_config['recorder'] = 'live'
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.run_gevent()
|
self.run_gevent()
|
||||||
return self
|
return self
|
||||||
|
@ -70,7 +70,19 @@ class BaseContentRewriter(object):
|
|||||||
|
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
def has_custom_rules(self, rule, cdx):
|
||||||
|
if 'js_regex_func' not in rule:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if rule.get('live_only') and not cdx.get('is_live'):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
def get_rw_class(self, rule, text_type, rwinfo):
|
def get_rw_class(self, rule, text_type, rwinfo):
|
||||||
|
if text_type == 'json' and 'js_regex_func' in rule:
|
||||||
|
text_type = 'js-proxy'
|
||||||
|
|
||||||
if text_type == 'js' and not rwinfo.is_url_rw():
|
if text_type == 'js' and not rwinfo.is_url_rw():
|
||||||
text_type = 'js-proxy'
|
text_type = 'js-proxy'
|
||||||
|
|
||||||
@ -89,7 +101,7 @@ class BaseContentRewriter(object):
|
|||||||
|
|
||||||
if rw_type in ('js', 'js-proxy'):
|
if rw_type in ('js', 'js-proxy'):
|
||||||
extra_rules = []
|
extra_rules = []
|
||||||
if 'js_regex_func' in rule:
|
if self.has_custom_rules(rule, cdx):
|
||||||
extra_rules = rule['js_regex_func'](rwinfo.url_rewriter)
|
extra_rules = rule['js_regex_func'](rwinfo.url_rewriter)
|
||||||
|
|
||||||
# if js-proxy and no rules, default to none
|
# if js-proxy and no rules, default to none
|
||||||
|
@ -13,11 +13,13 @@ from pywb.rewrite.url_rewriter import UrlRewriter
|
|||||||
from pywb.rewrite.default_rewriter import DefaultRewriter
|
from pywb.rewrite.default_rewriter import DefaultRewriter
|
||||||
|
|
||||||
from pywb import get_test_dir
|
from pywb import get_test_dir
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
@pytest.fixture(params=[{'Content-Type': 'text/html'},
|
@pytest.fixture(params=[{'Content-Type': 'text/html'},
|
||||||
{'Content-Type': 'application/xhtml+xml'},
|
{'Content-Type': 'application/xhtml+xml'},
|
||||||
{'Content-Type': 'application/octet-stream'},
|
{'Content-Type': 'application/octet-stream'},
|
||||||
@ -51,7 +53,7 @@ class TestContentRewriter(object):
|
|||||||
|
|
||||||
def rewrite_record(self, headers, content, ts, url='http://example.com/',
|
def rewrite_record(self, headers, content, ts, url='http://example.com/',
|
||||||
prefix='http://localhost:8080/prefix/', warc_headers=None,
|
prefix='http://localhost:8080/prefix/', warc_headers=None,
|
||||||
request_url=None):
|
request_url=None, is_live=None):
|
||||||
|
|
||||||
record = self._create_response_record(url, headers, content, warc_headers)
|
record = self._create_response_record(url, headers, content, warc_headers)
|
||||||
|
|
||||||
@ -64,6 +66,7 @@ class TestContentRewriter(object):
|
|||||||
cdx['urlkey'] = canonicalize(url)
|
cdx['urlkey'] = canonicalize(url)
|
||||||
if request_url != url:
|
if request_url != url:
|
||||||
cdx['is_fuzzy'] = '1'
|
cdx['is_fuzzy'] = '1'
|
||||||
|
cdx['is_live'] = is_live
|
||||||
|
|
||||||
return self.content_rewriter(record, url_rewriter, None, cdx=cdx)
|
return self.content_rewriter(record, url_rewriter, None, cdx=cdx)
|
||||||
|
|
||||||
@ -272,6 +275,24 @@ class TestContentRewriter(object):
|
|||||||
|
|
||||||
assert b''.join(gen).decode('utf-8') == '{"ssid":"5678"}'
|
assert b''.join(gen).decode('utf-8') == '{"ssid":"5678"}'
|
||||||
|
|
||||||
|
def test_custom_live_only(self):
|
||||||
|
headers = {'Content-Type': 'application/json'}
|
||||||
|
content = '{"foo":"bar", "dash": {"on": "true"}, "some": ["list"]'
|
||||||
|
|
||||||
|
# is_live
|
||||||
|
rw_headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701mp_',
|
||||||
|
url='https://player.vimeo.com/video/123445/config/config?A=B',
|
||||||
|
is_live='1')
|
||||||
|
|
||||||
|
# rewritten
|
||||||
|
assert b''.join(gen).decode('utf-8') == '{"foo":"bar", "__dash": {"on": "true"}, "some": ["list"]'
|
||||||
|
|
||||||
|
# not is_live
|
||||||
|
rw_headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701mp_',
|
||||||
|
url='https://player.vimeo.com/video/123445/config/config?A=B')
|
||||||
|
|
||||||
|
assert b''.join(gen).decode('utf-8') == content
|
||||||
|
|
||||||
def test_hls_default_max(self):
|
def test_hls_default_max(self):
|
||||||
headers = {'Content-Type': 'application/vnd.apple.mpegurl'}
|
headers = {'Content-Type': 'application/vnd.apple.mpegurl'}
|
||||||
with open(os.path.join(get_test_dir(), 'text_content', 'sample_hls.m3u8'), 'rt') as fh:
|
with open(os.path.join(get_test_dir(), 'text_content', 'sample_hls.m3u8'), 'rt') as fh:
|
||||||
|
@ -261,6 +261,21 @@ rules:
|
|||||||
# only use non query part of url, ignore query
|
# only use non query part of url, ignore query
|
||||||
fuzzy_lookup: '()'
|
fuzzy_lookup: '()'
|
||||||
|
|
||||||
|
- url_prefix: 'com,vimeo,player)/video/'
|
||||||
|
|
||||||
|
fuzzy_lookup:
|
||||||
|
match:
|
||||||
|
regex: 'com,vimeo.player\)/video/[\d]+/config?.*'
|
||||||
|
|
||||||
|
rewrite:
|
||||||
|
live_only: true
|
||||||
|
js_regexs:
|
||||||
|
- match: '"dash":'
|
||||||
|
replace: '"__dash":'
|
||||||
|
|
||||||
|
- match: '"hls":'
|
||||||
|
replace: '"__hls":'
|
||||||
|
|
||||||
- url_prefix: 'com,vimeocdn,'
|
- url_prefix: 'com,vimeocdn,'
|
||||||
|
|
||||||
fuzzy_lookup: '()'
|
fuzzy_lookup: '()'
|
||||||
@ -279,7 +294,9 @@ rules:
|
|||||||
- videoFileId
|
- videoFileId
|
||||||
- signature
|
- signature
|
||||||
|
|
||||||
|
|
||||||
# vine
|
# vine
|
||||||
|
#=================================================================
|
||||||
- url_prefix: 'co,vine,cdn,'
|
- url_prefix: 'co,vine,cdn,'
|
||||||
|
|
||||||
fuzzy_lookup:
|
fuzzy_lookup:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user