1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

Rules Work (vimeo) and live_only flag (#264)

* rules work:
- apply 'js_regexs' on json content also, using 'js-proxy' rewriter
- rules for vimeo, disable hls/dash
- add 'live_only' flag 'rewrite' to enable rewrite only when 'is_live' is set
- tests: add test for new vimeo rules, testing live_only
cli: add '--record' cli option to enable quick-recording from live collection
This commit is contained in:
Ilya Kreymer 2017-11-02 19:43:48 -07:00 committed by GitHub
parent 93b3b95664
commit db3ba5a067
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 56 additions and 2 deletions

View File

@ -35,6 +35,7 @@ class BaseCli(object):
parser.add_argument('--profile', action='store_true')
parser.add_argument('--live', action='store_true', help='Add live-web handler at /live')
parser.add_argument('--record', action='store_true')
parser.add_argument('--proxy', help='Enable HTTP/S Proxy on specified collection')
parser.add_argument('--proxy-record', action='store_true', help='Enable Proxy Recording into specified collection')
@ -71,6 +72,9 @@ class BaseCli(object):
if self.r.debug:
self.extra_config['debug'] = True
if self.r.record:
self.extra_config['recorder'] = 'live'
def run(self):
self.run_gevent()
return self

View File

@ -70,7 +70,19 @@ class BaseContentRewriter(object):
return {}
def has_custom_rules(self, rule, cdx):
if 'js_regex_func' not in rule:
return False
if rule.get('live_only') and not cdx.get('is_live'):
return False
return True
def get_rw_class(self, rule, text_type, rwinfo):
if text_type == 'json' and 'js_regex_func' in rule:
text_type = 'js-proxy'
if text_type == 'js' and not rwinfo.is_url_rw():
text_type = 'js-proxy'
@ -89,7 +101,7 @@ class BaseContentRewriter(object):
if rw_type in ('js', 'js-proxy'):
extra_rules = []
if 'js_regex_func' in rule:
if self.has_custom_rules(rule, cdx):
extra_rules = rule['js_regex_func'](rwinfo.url_rewriter)
# if js-proxy and no rules, default to none

View File

@ -13,11 +13,13 @@ from pywb.rewrite.url_rewriter import UrlRewriter
from pywb.rewrite.default_rewriter import DefaultRewriter
from pywb import get_test_dir
import os
import json
import pytest
# ============================================================================
@pytest.fixture(params=[{'Content-Type': 'text/html'},
{'Content-Type': 'application/xhtml+xml'},
{'Content-Type': 'application/octet-stream'},
@ -51,7 +53,7 @@ class TestContentRewriter(object):
def rewrite_record(self, headers, content, ts, url='http://example.com/',
prefix='http://localhost:8080/prefix/', warc_headers=None,
request_url=None):
request_url=None, is_live=None):
record = self._create_response_record(url, headers, content, warc_headers)
@ -64,6 +66,7 @@ class TestContentRewriter(object):
cdx['urlkey'] = canonicalize(url)
if request_url != url:
cdx['is_fuzzy'] = '1'
cdx['is_live'] = is_live
return self.content_rewriter(record, url_rewriter, None, cdx=cdx)
@ -272,6 +275,24 @@ class TestContentRewriter(object):
assert b''.join(gen).decode('utf-8') == '{"ssid":"5678"}'
def test_custom_live_only(self):
headers = {'Content-Type': 'application/json'}
content = '{"foo":"bar", "dash": {"on": "true"}, "some": ["list"]'
# is_live
rw_headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701mp_',
url='https://player.vimeo.com/video/123445/config/config?A=B',
is_live='1')
# rewritten
assert b''.join(gen).decode('utf-8') == '{"foo":"bar", "__dash": {"on": "true"}, "some": ["list"]'
# not is_live
rw_headers, gen, is_rw = self.rewrite_record(headers, content, ts='201701mp_',
url='https://player.vimeo.com/video/123445/config/config?A=B')
assert b''.join(gen).decode('utf-8') == content
def test_hls_default_max(self):
headers = {'Content-Type': 'application/vnd.apple.mpegurl'}
with open(os.path.join(get_test_dir(), 'text_content', 'sample_hls.m3u8'), 'rt') as fh:

View File

@ -261,6 +261,21 @@ rules:
# only use non query part of url, ignore query
fuzzy_lookup: '()'
- url_prefix: 'com,vimeo,player)/video/'
fuzzy_lookup:
match:
regex: 'com,vimeo.player\)/video/[\d]+/config?.*'
rewrite:
live_only: true
js_regexs:
- match: '"dash":'
replace: '"__dash":'
- match: '"hls":'
replace: '"__hls":'
- url_prefix: 'com,vimeocdn,'
fuzzy_lookup: '()'
@ -279,7 +294,9 @@ rules:
- videoFileId
- signature
# vine
#=================================================================
- url_prefix: 'co,vine,cdn,'
fuzzy_lookup: