mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-14 15:53:28 +01:00
proxy mode: don't rewrite xml for ajax requests. Support python 3.8 (#563)
* rewrite: - don't rewrite xml in proxy mode / html-insert only mode - ajax: if sec-fetch-mode is set to non-navigate, also treat as 'ajax' * ci: build python 3.8, ignore 2.7 failures * reqs: use released ujson for extra_reqs * hmac: add digestmod, fix for py3.8
This commit is contained in:
parent
ed89fcc6f8
commit
5e9b13e267
@ -5,6 +5,7 @@ python:
|
||||
- "3.5"
|
||||
- "3.6"
|
||||
- "3.7"
|
||||
- "3.8"
|
||||
|
||||
dist: xenial
|
||||
|
||||
@ -39,6 +40,7 @@ after_success:
|
||||
matrix:
|
||||
allow_failures:
|
||||
- env: WR_TEST=yes
|
||||
- python: "2.7"
|
||||
|
||||
exclude:
|
||||
- env: WR_TEST=yes
|
||||
|
@ -2,6 +2,6 @@ certauth
|
||||
youtube-dl
|
||||
boto3
|
||||
uwsgi
|
||||
git+https://github.com/esnme/ultrajson.git
|
||||
ujson
|
||||
pysocks
|
||||
lxml
|
||||
|
@ -822,6 +822,12 @@ class RewriterApp(object):
|
||||
if value and value.lower() == 'xmlhttprequest':
|
||||
return True
|
||||
|
||||
# if Chrome Sec-Fetch-Mode is set and is not 'navigate', then this is likely
|
||||
# a fetch / ajax request
|
||||
sec_fetch_mode = environ.get('HTTP_SEC_FETCH_MODE')
|
||||
if sec_fetch_mode and sec_fetch_mode != 'navigate':
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def is_preflight(self, environ):
|
||||
|
@ -9,13 +9,22 @@ class HTMLInsertOnlyRewriter(StreamingRewriter):
|
||||
"""
|
||||
NOT_HEAD_REGEX = re.compile(r'(<\s*\b)(?!(html|head))', re.I)
|
||||
|
||||
XML_HEADER = re.compile(r'<\?xml.*\?>')
|
||||
|
||||
def __init__(self, url_rewriter, **kwargs):
|
||||
super(HTMLInsertOnlyRewriter, self).__init__(url_rewriter, False)
|
||||
self.head_insert = kwargs['head_insert']
|
||||
|
||||
self.done = False
|
||||
self.first = True
|
||||
|
||||
def rewrite(self, string):
|
||||
if self.first:
|
||||
if self.url_rewriter.rewrite_opts.get('is_ajax') and self.XML_HEADER.search(string):
|
||||
self.done = True
|
||||
|
||||
self.first = False
|
||||
|
||||
if self.done:
|
||||
return string
|
||||
|
||||
|
@ -16,14 +16,24 @@ r'''
|
||||
|
||||
>>> parse('<head></head>text')
|
||||
'<head></head>text<!--Insert-->'
|
||||
|
||||
>>> parse('<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n<html xmlns="http://www.w3.org/1999/xhtml"><body></body></html>')
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n<html xmlns="http://www.w3.org/1999/xhtml"><!--Insert--><body></body></html>'
|
||||
|
||||
# ajax leave unchanged?
|
||||
>>> parse('<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n<html xmlns="http://www.w3.org/1999/xhtml"><body></body></html>', is_ajax=True)
|
||||
'<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n<html xmlns="http://www.w3.org/1999/xhtml"><body></body></html>'
|
||||
'''
|
||||
|
||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||
from pywb.rewrite.html_insert_rewriter import HTMLInsertOnlyRewriter
|
||||
|
||||
def parse(html_text):
|
||||
def parse(html_text, is_ajax=False):
|
||||
urlrewriter = UrlRewriter('20131226101010/https://example.com/some/path.html', '/web/')
|
||||
|
||||
if is_ajax:
|
||||
urlrewriter.rewrite_opts['is_ajax'] = True
|
||||
|
||||
rewriter = HTMLInsertOnlyRewriter(urlrewriter, head_insert='<!--Insert-->')
|
||||
|
||||
return rewriter.rewrite(html_text) + rewriter.final_read()
|
||||
|
@ -7,6 +7,7 @@ local and remote access
|
||||
|
||||
import os
|
||||
import hmac
|
||||
import hashlib
|
||||
import requests
|
||||
import yaml
|
||||
|
||||
@ -485,7 +486,7 @@ class HMACCookieMaker(object):
|
||||
else:
|
||||
msg = expire
|
||||
|
||||
hmacdigest = hmac.new(self.key.encode('utf-8'), msg.encode('utf-8'))
|
||||
hmacdigest = hmac.new(self.key.encode('utf-8'), msg.encode('utf-8'), digestmod=hashlib.md5)
|
||||
hexdigest = hmacdigest.hexdigest()
|
||||
|
||||
if extra_id:
|
||||
|
Loading…
x
Reference in New Issue
Block a user