2017-08-21 11:01:31 -07:00
|
|
|
# Default Filters
|
|
|
|
default_filters:
|
|
|
|
# exts that should *not* be treated as files (ignore all query args)
|
|
|
|
not_exts:
|
|
|
|
- asp
|
|
|
|
- aspx
|
|
|
|
- jsp
|
|
|
|
- php
|
|
|
|
- pl
|
|
|
|
- exe
|
|
|
|
- dll
|
|
|
|
|
|
|
|
# ignore query args for the following mime types
|
|
|
|
mimes:
|
2017-09-06 23:23:39 -07:00
|
|
|
# flash
|
2017-08-21 11:01:31 -07:00
|
|
|
- 'application/x-shockwave-flash'
|
|
|
|
|
2017-09-06 23:23:39 -07:00
|
|
|
# dash
|
|
|
|
- 'application/dash+xml'
|
|
|
|
|
|
|
|
# hls
|
|
|
|
- 'application/x-mpegURL'
|
|
|
|
- 'application/vnd.apple.mpegurl'
|
|
|
|
|
2017-08-21 11:01:31 -07:00
|
|
|
# apply following url normalization rules
|
|
|
|
# on both match url and request url
|
|
|
|
# to find a match (not limited to query argument removal)
|
|
|
|
url_normalize:
|
|
|
|
# remove known cache busting args
|
|
|
|
- match: '[?&](_|cb|uncache)=([\d]+)(?=&|$)'
|
|
|
|
replace: ''
|
|
|
|
|
|
|
|
# GA cache busting params
|
|
|
|
- match: '[?&]utm_[^=]+=[^&]+(?=&|$)'
|
|
|
|
replace: ''
|
|
|
|
|
|
|
|
# remove jquery callback dynamic timestamp
|
|
|
|
- match: '[?&]((?:\w+)=jquery)[\d]+_[\d]+'
|
|
|
|
replace: '\1'
|
|
|
|
|
|
|
|
# remove more generic cache-busting params:
|
|
|
|
# name contains 'bust', value appears to be a timestamp
|
2017-10-18 10:51:49 -07:00
|
|
|
- match: '[?&](\w*(bust|ts)\w*=1[\d]{12,15})(?=&|$)'
|
2017-08-21 11:01:31 -07:00
|
|
|
replace: ''
|
2014-02-26 18:02:01 -08:00
|
|
|
|
2017-08-21 11:01:31 -07:00
|
|
|
rules:
|
2015-07-23 22:44:10 -07:00
|
|
|
|
2014-02-26 18:02:01 -08:00
|
|
|
# twitter rules
|
|
|
|
#=================================================================
|
|
|
|
- url_prefix: 'com,twitter)/i/profiles/show/'
|
|
|
|
|
|
|
|
fuzzy_lookup: '/profiles/show/.*with_replies\?.*(max_id=[^&]+)'
|
|
|
|
|
2016-04-26 17:02:54 -07:00
|
|
|
- url_prefix: 'com,twitter)/i/timeline'
|
|
|
|
|
|
|
|
fuzzy_lookup:
|
|
|
|
- max_position
|
|
|
|
- include_entities
|
|
|
|
|
2016-05-03 17:33:13 -07:00
|
|
|
- url_prefix: 'com,twitter)/i/videos/tweet'
|
|
|
|
|
|
|
|
fuzzy_lookup: '()'
|
|
|
|
|
2014-02-26 18:02:01 -08:00
|
|
|
|
|
|
|
# facebook rules
|
|
|
|
#=================================================================
|
2014-06-18 16:45:54 -07:00
|
|
|
- url_prefix: 'com,facebook)/ajax/pagelet/generic.php/'
|
2014-10-30 00:10:39 -07:00
|
|
|
|
2017-08-22 13:51:10 -07:00
|
|
|
#fuzzy_lookup: 'com,facebook\)/.*[?&]data=(.*?(?:[&]|query_type[^,]+))'
|
|
|
|
fuzzy_lookup: 'com,facebook\)/.*[?&]data=(.*?(?:[&]|(?:query_type|fbid)[^,]+))'
|
2014-10-30 00:10:39 -07:00
|
|
|
|
2014-06-18 16:45:54 -07:00
|
|
|
- url_prefix: 'com,facebook)/ajax/ufi/'
|
2014-10-30 00:10:39 -07:00
|
|
|
|
2014-09-21 19:48:14 -07:00
|
|
|
fuzzy_lookup:
|
|
|
|
- ft_ent_identifier
|
|
|
|
- lsd
|
2014-02-26 18:02:01 -08:00
|
|
|
|
2014-09-21 13:23:19 -07:00
|
|
|
- url_prefix: 'com,facebook)/ajax/chat/hovercard/sidebar.php'
|
|
|
|
|
2014-10-30 00:10:39 -07:00
|
|
|
fuzzy_lookup:
|
2014-09-21 19:48:14 -07:00
|
|
|
- ids[0]
|
2014-09-21 13:23:19 -07:00
|
|
|
|
|
|
|
- url_prefix: 'com,facebook)/login.php'
|
|
|
|
|
2014-09-21 19:48:14 -07:00
|
|
|
fuzzy_lookup:
|
|
|
|
- email
|
|
|
|
- lgnrnd
|
|
|
|
- lsd
|
2014-09-21 13:23:19 -07:00
|
|
|
|
2015-04-27 00:44:24 -07:00
|
|
|
- url_prefix: 'com,facebook)/ajax/timezone/update.php'
|
|
|
|
|
|
|
|
fuzzy_lookup:
|
|
|
|
- __user
|
|
|
|
|
2014-09-21 19:48:14 -07:00
|
|
|
# fallback for all /ajax/
|
|
|
|
- url_prefix: 'com,facebook)/ajax/'
|
2014-02-26 18:02:01 -08:00
|
|
|
|
2014-09-21 19:48:14 -07:00
|
|
|
fuzzy_lookup: '([?&][^_]\w+=[^&]+)+'
|
2014-02-26 18:02:01 -08:00
|
|
|
|
|
|
|
- url_prefix: 'com,facebook)/'
|
|
|
|
rewrite:
|
|
|
|
js_regexs:
|
2014-03-06 02:51:54 -08:00
|
|
|
- match: 'Bootloader\.configurePage.*?;'
|
2014-02-26 18:02:01 -08:00
|
|
|
replace: '/* {0} */'
|
|
|
|
|
2017-03-21 11:18:53 -07:00
|
|
|
- match: 'dash_manifest:"(.*",dash_prefetched_representation_ids:.*?])'
|
|
|
|
group: 1
|
|
|
|
function: 'pywb.rewrite.rewrite_dash:rewrite_fb_dash'
|
|
|
|
|
2014-08-05 01:47:52 -07:00
|
|
|
parse_comments: true
|
|
|
|
|
2014-10-06 10:14:45 -07:00
|
|
|
- url_prefix: 'com,facebook'
|
|
|
|
rewrite:
|
|
|
|
cookie_scope: root
|
|
|
|
|
2014-02-26 18:02:01 -08:00
|
|
|
|
2015-10-16 09:43:22 -07:00
|
|
|
# fastly
|
|
|
|
#=================================================================
|
|
|
|
- url_prefix: ['net,fastly,']
|
|
|
|
|
|
|
|
fuzzy_lookup: '()'
|
|
|
|
|
|
|
|
|
2014-09-30 12:42:11 -07:00
|
|
|
# instagram rules
|
|
|
|
#=================================================================
|
2015-08-09 00:14:26 -07:00
|
|
|
- url_prefix: 'net,cloudfront,'
|
2014-10-17 08:27:56 -07:00
|
|
|
|
2015-08-09 00:14:26 -07:00
|
|
|
rewrite:
|
|
|
|
js_regexs:
|
|
|
|
- match: '\burl\((//[^)]+)\)'
|
|
|
|
rewrite: true
|
|
|
|
group: 1
|
2014-09-30 12:42:11 -07:00
|
|
|
|
2016-11-13 22:47:24 -08:00
|
|
|
- url_prefix: 'com,instagram)/'
|
2014-10-17 08:27:56 -07:00
|
|
|
|
2016-11-14 13:19:26 -08:00
|
|
|
fuzzy_lookup: '()'
|
2014-10-17 08:27:56 -07:00
|
|
|
|
|
|
|
|
2015-10-06 00:05:20 -07:00
|
|
|
# blogspot rules
|
|
|
|
#=================================================================
|
|
|
|
- url_prefix: ['com,blogspot,']
|
|
|
|
|
|
|
|
fuzzy_lookup:
|
|
|
|
match:
|
|
|
|
- path
|
|
|
|
- action
|
|
|
|
- widgettype
|
|
|
|
|
|
|
|
replace: '.com/'
|
|
|
|
|
|
|
|
|
2015-02-03 11:14:06 -08:00
|
|
|
# flickr rules
|
2014-02-26 18:02:01 -08:00
|
|
|
#=================================================================
|
2014-04-20 21:40:27 -07:00
|
|
|
- url_prefix: ['com,yimg,l)/g/combo', 'com,yimg,s)/pw/combo', 'com,yahooapis,yui)/combo']
|
2014-02-26 18:02:01 -08:00
|
|
|
fuzzy_lookup: '([^/]+(?:\.css|\.js))'
|
|
|
|
|
|
|
|
|
2014-03-08 15:53:52 -08:00
|
|
|
- url_prefix: 'com,staticflickr,'
|
|
|
|
|
|
|
|
fuzzy_lookup:
|
|
|
|
match: '([0-9]+_[a-z0-9]+).*?.jpg'
|
|
|
|
replace: '/'
|
|
|
|
|
|
|
|
|
2014-06-10 19:21:46 -07:00
|
|
|
# google plus rules
|
|
|
|
#=================================================================
|
|
|
|
|
|
|
|
- url_prefix: 'com,google,plus)/_/stream/getactivities'
|
2014-10-30 00:10:39 -07:00
|
|
|
|
|
|
|
# fuzzy_lookup: '(egk[^"]+)?.*(f.sid=[^&]+)'
|
|
|
|
fuzzy_lookup: 'f.req=.*\]\]\]\,\"([^"]+).*(f.sid=[^&]+)'
|
|
|
|
|
2014-06-10 19:21:46 -07:00
|
|
|
- url_prefix: 'com,google,plus)/_/stream/squarestream'
|
2014-10-30 00:10:39 -07:00
|
|
|
|
2015-05-22 18:45:50 -07:00
|
|
|
#fuzzy_lookup: '(cai[^"]+).*(f.sid=[^&]+)'
|
|
|
|
fuzzy_lookup: 'f.req=.*?\"([^"]+).*(f.sid=[^&]+)'
|
2014-10-30 00:10:39 -07:00
|
|
|
|
2014-06-10 19:21:46 -07:00
|
|
|
- url_prefix: 'com,google,plus)/_/communities/rt/landing'
|
2014-10-30 00:10:39 -07:00
|
|
|
|
2014-06-10 19:21:46 -07:00
|
|
|
fuzzy_lookup: 'com,google,plus\)/_/.*?.*\,(\d{13}\])&.*(f.sid=[^&]+).*'
|
2014-10-30 00:10:39 -07:00
|
|
|
|
2014-06-10 19:21:46 -07:00
|
|
|
|
|
|
|
- url_prefix: 'com,google,plus)/_/'
|
2014-10-30 00:10:39 -07:00
|
|
|
|
2014-06-10 19:21:46 -07:00
|
|
|
fuzzy_lookup: 'com,google,plus\)/_/.*?.*(f.sid=[^&]+)'
|
2014-10-17 08:27:56 -07:00
|
|
|
|
2016-09-12 20:07:14 -07:00
|
|
|
# periscope
|
|
|
|
#=================================================================
|
|
|
|
|
|
|
|
- url_prefix: 'tv,periscope,assets)/js/'
|
|
|
|
|
|
|
|
rewrite:
|
|
|
|
js_regexs:
|
|
|
|
- match: '"location"'
|
|
|
|
replace: '"WB_wombat_location"'
|
2014-10-17 08:27:56 -07:00
|
|
|
|
2016-11-13 22:47:24 -08:00
|
|
|
|
|
|
|
# medium
|
|
|
|
#=================================================================
|
|
|
|
|
|
|
|
- url_prefix: 'com,medium'
|
|
|
|
|
|
|
|
rewrite:
|
|
|
|
js_regexs:
|
|
|
|
- match: '"noitacol"'
|
|
|
|
replace: '"noitacol_tabmow_BW"'
|
|
|
|
|
|
|
|
|
2014-10-17 08:27:56 -07:00
|
|
|
# vimeo rules
|
|
|
|
#=================================================================
|
|
|
|
|
|
|
|
- url_prefix: 'com,vimeo,av)/'
|
|
|
|
|
|
|
|
# only use non query part of url, ignore query
|
|
|
|
fuzzy_lookup: '()'
|
|
|
|
|
2014-12-11 00:20:43 -08:00
|
|
|
- url_prefix: 'com,vimeocdn,'
|
|
|
|
|
|
|
|
fuzzy_lookup: '()'
|
|
|
|
|
|
|
|
- url_prefix: 'metadata)/player.vimeo.com/'
|
|
|
|
|
|
|
|
fuzzy_lookup:
|
|
|
|
match: '()'
|
|
|
|
replace: '&'
|
|
|
|
|
2014-10-17 08:27:56 -07:00
|
|
|
- url_prefix: 'com,vimeo,player)/log/'
|
|
|
|
|
|
|
|
fuzzy_lookup:
|
|
|
|
- id
|
|
|
|
- ownerId
|
|
|
|
- videoFileId
|
|
|
|
- signature
|
2014-10-28 10:36:48 -07:00
|
|
|
|
2015-01-22 16:45:09 -05:00
|
|
|
# vine
|
|
|
|
- url_prefix: 'co,vine,cdn,'
|
2014-10-28 10:36:48 -07:00
|
|
|
|
2015-04-13 13:02:55 -07:00
|
|
|
fuzzy_lookup:
|
|
|
|
replace: 'videos'
|
|
|
|
match: 'videos(?:_[^/]+)?/([^?]+)'
|
|
|
|
|
2015-05-13 13:48:08 -07:00
|
|
|
|
2015-06-18 02:49:26 -04:00
|
|
|
- url_prefix: 'com,disqus)/embed/comments'
|
2015-06-18 02:33:03 -04:00
|
|
|
|
|
|
|
fuzzy_lookup:
|
|
|
|
- base
|
|
|
|
- version
|
|
|
|
- t_i
|
|
|
|
|
|
|
|
|
2015-05-13 13:48:08 -07:00
|
|
|
|
2014-10-28 10:36:48 -07:00
|
|
|
# youtube rules
|
|
|
|
#=================================================================
|
|
|
|
|
2015-05-03 22:09:33 -07:00
|
|
|
- url_prefix: ['com,youtube)/get_video_info', 'com,youtube-nocookie)/get_video_info']
|
2014-10-28 10:36:48 -07:00
|
|
|
|
|
|
|
fuzzy_lookup:
|
|
|
|
- video_id
|
|
|
|
- html5
|
|
|
|
|
2014-11-04 22:11:25 -08:00
|
|
|
- url_prefix: 'com,youtube,s)/api/stats/qoe'
|
|
|
|
|
|
|
|
fuzzy_lookup:
|
|
|
|
- docid
|
|
|
|
|
|
|
|
- url_prefix: 'com,youtube,s)/api/stats/watch'
|
|
|
|
|
|
|
|
fuzzy_lookup:
|
|
|
|
- docid
|
2014-10-28 10:36:48 -07:00
|
|
|
|
2014-11-23 18:39:58 -08:00
|
|
|
- url_prefix: 'com,youtube,c'
|
|
|
|
|
2014-12-26 14:29:51 -08:00
|
|
|
fuzzy_lookup:
|
|
|
|
match:
|
|
|
|
regex: 'com,youtube,c.*/videogoodput.*'
|
|
|
|
args:
|
|
|
|
- id
|
2014-11-23 18:39:58 -08:00
|
|
|
|
2015-05-21 17:20:40 -07:00
|
|
|
# yt comments POST
|
2015-05-21 23:52:09 +00:00
|
|
|
- url_prefix: 'com,youtube)/watch_fragments_ajax'
|
|
|
|
|
|
|
|
fuzzy_lookup:
|
|
|
|
- v
|
|
|
|
- frags
|
2015-05-21 17:20:40 -07:00
|
|
|
|
|
|
|
- url_prefix: 'com,youtube)/comment_ajax'
|
|
|
|
|
|
|
|
fuzzy_lookup:
|
|
|
|
- video_id
|
|
|
|
- page_token
|
|
|
|
- action_load_comments
|
|
|
|
- filter
|
|
|
|
|
2015-05-21 23:52:09 +00:00
|
|
|
|
2014-10-28 10:36:48 -07:00
|
|
|
- url_prefix: 'com,googlevideo,'
|
|
|
|
|
2014-11-24 11:10:49 -08:00
|
|
|
fuzzy_lookup:
|
2014-12-26 14:29:51 -08:00
|
|
|
match:
|
|
|
|
regex: 'com,googlevideo.*/videoplayback.*'
|
|
|
|
args:
|
|
|
|
- id
|
|
|
|
- itag
|
2015-01-03 12:51:09 -08:00
|
|
|
#- mime
|
2014-12-26 14:29:51 -08:00
|
|
|
|
2014-11-24 11:10:49 -08:00
|
|
|
filter:
|
|
|
|
- '~urlkey:{0}'
|
|
|
|
- '!mimetype:text/plain'
|
|
|
|
|
|
|
|
type: 'domain'
|
2014-10-28 10:36:48 -07:00
|
|
|
|
2014-12-07 21:21:51 -08:00
|
|
|
# comments support
|
|
|
|
- url_prefix: 'com,googleapis,plus)/u/0/_/widget/render/comments'
|
|
|
|
|
|
|
|
rewrite:
|
2015-09-14 19:25:32 -07:00
|
|
|
js_rewrite_location: location
|
2014-12-07 21:21:51 -08:00
|
|
|
|
|
|
|
fuzzy_lookup:
|
|
|
|
- href
|
|
|
|
- stream_id
|
|
|
|
- substream_id
|
|
|
|
|
2015-09-14 19:25:32 -07:00
|
|
|
- url_prefix: 'com,ytimg,s)/yts/jsbin/html5player-new'
|
2014-12-07 21:21:51 -08:00
|
|
|
|
|
|
|
rewrite:
|
2015-09-14 19:25:32 -07:00
|
|
|
js_regexs:
|
|
|
|
- match: '\"0\"\=\=c\.dash'
|
|
|
|
replace: 'c.dash="0";c.dashmpd="";{0}'
|
2014-06-10 19:21:46 -07:00
|
|
|
|
2015-01-03 12:51:09 -08:00
|
|
|
# watch and embed config changes
|
2015-05-03 22:09:33 -07:00
|
|
|
- url_prefix: ['com,youtube)/', 'com,youtube-nocookie)/']
|
2014-12-28 16:34:19 -08:00
|
|
|
|
|
|
|
rewrite:
|
|
|
|
js_regexs:
|
|
|
|
- match: 'ytplayer.load\(\);'
|
|
|
|
replace: 'ytplayer.config.args.dash = "0"; ytplayer.config.args.dashmpd = ""; {0}'
|
|
|
|
|
2015-02-03 11:14:06 -08:00
|
|
|
- match: 'yt\.setConfig.*PLAYER_CONFIG.*args":\s*{'
|
2015-01-03 12:51:09 -08:00
|
|
|
replace: '{0} "dash": "0", dashmpd: "", '
|
|
|
|
|
2014-02-26 18:02:01 -08:00
|
|
|
# testing rules -- not for valid domain
|
|
|
|
#=================================================================
|
|
|
|
# this rule block is a non-existent prefix merely for testing
|
2015-08-07 12:02:48 -07:00
|
|
|
|
|
|
|
- url_prefix: 'example,example,test,all)/'
|
|
|
|
|
|
|
|
rewrite:
|
|
|
|
js_rewrite_location: all
|
|
|
|
|
2014-12-07 21:09:37 -08:00
|
|
|
- url_prefix: 'example,example,test,loconly)/'
|
|
|
|
|
|
|
|
rewrite:
|
|
|
|
js_rewrite_location: location
|
|
|
|
|
2015-02-11 15:01:29 -08:00
|
|
|
- url_prefix: 'example,example,test,norewrite)/'
|
|
|
|
|
|
|
|
rewrite:
|
|
|
|
js_rewrite_location: none
|
|
|
|
|
2014-02-28 09:40:51 -08:00
|
|
|
- url_prefix: 'example,example,test)/'
|
|
|
|
|
|
|
|
canonicalize:
|
|
|
|
match: '(example,example,test\)/.*?)[?].*?(id=value).*'
|
|
|
|
replace: '\1?\2'
|
2014-02-26 18:02:01 -08:00
|
|
|
|
2014-09-21 19:48:14 -07:00
|
|
|
fuzzy_lookup:
|
|
|
|
- param1
|
|
|
|
- id
|
2014-09-21 14:46:10 -07:00
|
|
|
|
2014-02-26 18:02:01 -08:00
|
|
|
rewrite:
|
2014-12-07 21:09:37 -08:00
|
|
|
js_rewrite_location: urls
|
2014-02-26 18:02:01 -08:00
|
|
|
|
2017-03-20 14:41:12 -07:00
|
|
|
# all domain rules -- fallback to this dataset
|
2014-02-26 18:02:01 -08:00
|
|
|
#=================================================================
|
|
|
|
# Applies to all urls -- should be last
|
|
|
|
- url_prefix: ''
|
2017-03-20 14:41:12 -07:00
|
|
|
fuzzy_lookup:
|
|
|
|
match: '()'
|
2017-03-14 11:39:36 -07:00
|
|
|
|