diff --git a/pywb/cdx/cdxdomainspecific.py b/pywb/cdx/cdxdomainspecific.py index e77c4666..d576f8a8 100644 --- a/pywb/cdx/cdxdomainspecific.py +++ b/pywb/cdx/cdxdomainspecific.py @@ -95,7 +95,11 @@ class FuzzyQuery: if not matched_rule: return None - inx = url.find('?') + repl = '?' + if matched_rule.replace: + repl = matched_rule.replace + + inx = url.rfind(repl) if inx > 0: url = url[:inx + 1] @@ -104,6 +108,8 @@ class FuzzyQuery: 'filter': filter_, 'output': output} + print params + return CDXQuery(**params) diff --git a/pywb/rules.yaml b/pywb/rules.yaml index c1d6a7e7..e9892962 100644 --- a/pywb/rules.yaml +++ b/pywb/rules.yaml @@ -27,13 +27,20 @@ rules: replace: '/* {0} */' - # yahoo rules + # flickr rules #================================================================= - url_prefix: ['com,yimg,l)/g/combo', 'com,yahooapis,yui)/combo'] fuzzy_lookup: '([^/]+(?:\.css|\.js))' + - url_prefix: 'com,staticflickr,' + + fuzzy_lookup: + match: '([0-9]+_[a-z0-9]+).*?.jpg' + replace: '/' + + # testing rules -- not for valid domain #================================================================= # this rule block is a non-existent prefix merely for testing