1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00
pywb/pywb/rules.yaml

121 lines
3.1 KiB
YAML
Raw Normal View History

rules:
# twitter rules
#=================================================================
- url_prefix: 'com,twitter)/i/profiles/show/'
fuzzy_lookup: '/profiles/show/.*with_replies\?.*(max_id=[^&]+)'
# facebook rules
#=================================================================
2014-06-18 16:45:54 -07:00
- url_prefix: 'com,facebook)/ajax/pagelet/generic.php/'
fuzzy_lookup: 'com,facebook\)/.*[?&]data=(.*?(?:[&]|query_type[^,]+))'
2014-06-18 16:45:54 -07:00
- url_prefix: 'com,facebook)/ajax/ufi/'
fuzzy_lookup:
- ft_ent_identifier
- lsd
- url_prefix: 'com,facebook)/ajax/chat/hovercard/sidebar.php'
fuzzy_lookup:
- ids[0]
- url_prefix: 'com,facebook)/login.php'
fuzzy_lookup:
- email
- lgnrnd
- lsd
# fallback for all /ajax/
- url_prefix: 'com,facebook)/ajax/'
fuzzy_lookup: '([?&][^_]\w+=[^&]+)+'
- url_prefix: 'com,facebook)/'
rewrite:
js_regexs:
2014-03-06 02:51:54 -08:00
- match: 'Bootloader\.configurePage.*?;'
replace: '/* {0} */'
parse_comments: true
- url_prefix: 'com,facebook'
rewrite:
cookie_scope: root
# instagram rules
#=================================================================
- url_prefix: 'net,cloudfront,'
rewrite:
js_regexs:
- match: '\burl\((//[^)]+)\)'
rewrite: true
group: 1
# flickr rules
#=================================================================
2014-04-20 21:40:27 -07:00
- url_prefix: ['com,yimg,l)/g/combo', 'com,yimg,s)/pw/combo', 'com,yahooapis,yui)/combo']
fuzzy_lookup: '([^/]+(?:\.css|\.js))'
- url_prefix: 'com,staticflickr,'
fuzzy_lookup:
match: '([0-9]+_[a-z0-9]+).*?.jpg'
replace: '/'
# google plus rules
#=================================================================
- url_prefix: 'com,google,plus)/_/stream/getactivities'
fuzzy_lookup: '(egk[^"]+).*(f.sid=[^&]+)'
- url_prefix: 'com,google,plus)/_/stream/squarestream'
fuzzy_lookup: '(cai[^"]+).*(f.sid=[^&]+)'
- url_prefix: 'com,google,plus)/_/communities/rt/landing'
fuzzy_lookup: 'com,google,plus\)/_/.*?.*\,(\d{13}\])&.*(f.sid=[^&]+).*'
- url_prefix: 'com,google,plus)/_/'
fuzzy_lookup: 'com,google,plus\)/_/.*?.*(f.sid=[^&]+)'
# testing rules -- not for valid domain
#=================================================================
# this rule block is a non-existent prefix merely for testing
- url_prefix: 'example,example,test)/'
canonicalize:
match: '(example,example,test\)/.*?)[?].*?(id=value).*'
replace: '\1?\2'
fuzzy_lookup:
- param1
- id
rewrite:
js_rewrite_location: False
# all domain rules -- fallback to this dataset
#=================================================================
# Applies to all urls -- should be last
- url_prefix: ''
fuzzy_lookup:
match: '(.*)[&?](?:_|uncache)=[\d]+[&]?'
filter: '=urlkey:{0}'
replace: '?'