From 68878fa72a26906c5360f8a4abec9fe2c954e9e2 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Sat, 8 Mar 2014 15:53:52 -0800 Subject: [PATCH] update domain-specific rules to make flickr replay work better! --- pywb/cdx/cdxdomainspecific.py | 8 +++++++- pywb/rules.yaml | 9 ++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/pywb/cdx/cdxdomainspecific.py b/pywb/cdx/cdxdomainspecific.py index e77c4666..d576f8a8 100644 --- a/pywb/cdx/cdxdomainspecific.py +++ b/pywb/cdx/cdxdomainspecific.py @@ -95,7 +95,11 @@ class FuzzyQuery: if not matched_rule: return None - inx = url.find('?') + repl = '?' + if matched_rule.replace: + repl = matched_rule.replace + + inx = url.rfind(repl) if inx > 0: url = url[:inx + 1] @@ -104,6 +108,8 @@ class FuzzyQuery: 'filter': filter_, 'output': output} + print params + return CDXQuery(**params) diff --git a/pywb/rules.yaml b/pywb/rules.yaml index c1d6a7e7..e9892962 100644 --- a/pywb/rules.yaml +++ b/pywb/rules.yaml @@ -27,13 +27,20 @@ rules: replace: '/* {0} */' - # yahoo rules + # flickr rules #================================================================= - url_prefix: ['com,yimg,l)/g/combo', 'com,yahooapis,yui)/combo'] fuzzy_lookup: '([^/]+(?:\.css|\.js))' + - url_prefix: 'com,staticflickr,' + + fuzzy_lookup: + match: '([0-9]+_[a-z0-9]+).*?.jpg' + replace: '/' + + # testing rules -- not for valid domain #================================================================= # this rule block is a non-existent prefix merely for testing