mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-14 15:53:28 +01:00
proxy mode: don't rewrite xml for ajax requests. Support python 3.8 (#563)
* rewrite: - don't rewrite xml in proxy mode / html-insert only mode - ajax: if sec-fetch-mode is set to non-navigate, also treat as 'ajax' * ci: build python 3.8, ignore 2.7 failures * reqs: use released ujson for extra_reqs * hmac: add digestmod, fix for py3.8
This commit is contained in:
parent
ed89fcc6f8
commit
5e9b13e267
@ -5,6 +5,7 @@ python:
|
|||||||
- "3.5"
|
- "3.5"
|
||||||
- "3.6"
|
- "3.6"
|
||||||
- "3.7"
|
- "3.7"
|
||||||
|
- "3.8"
|
||||||
|
|
||||||
dist: xenial
|
dist: xenial
|
||||||
|
|
||||||
@ -39,6 +40,7 @@ after_success:
|
|||||||
matrix:
|
matrix:
|
||||||
allow_failures:
|
allow_failures:
|
||||||
- env: WR_TEST=yes
|
- env: WR_TEST=yes
|
||||||
|
- python: "2.7"
|
||||||
|
|
||||||
exclude:
|
exclude:
|
||||||
- env: WR_TEST=yes
|
- env: WR_TEST=yes
|
||||||
|
@ -2,6 +2,6 @@ certauth
|
|||||||
youtube-dl
|
youtube-dl
|
||||||
boto3
|
boto3
|
||||||
uwsgi
|
uwsgi
|
||||||
git+https://github.com/esnme/ultrajson.git
|
ujson
|
||||||
pysocks
|
pysocks
|
||||||
lxml
|
lxml
|
||||||
|
@ -822,6 +822,12 @@ class RewriterApp(object):
|
|||||||
if value and value.lower() == 'xmlhttprequest':
|
if value and value.lower() == 'xmlhttprequest':
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
# if Chrome Sec-Fetch-Mode is set and is not 'navigate', then this is likely
|
||||||
|
# a fetch / ajax request
|
||||||
|
sec_fetch_mode = environ.get('HTTP_SEC_FETCH_MODE')
|
||||||
|
if sec_fetch_mode and sec_fetch_mode != 'navigate':
|
||||||
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def is_preflight(self, environ):
|
def is_preflight(self, environ):
|
||||||
|
@ -9,13 +9,22 @@ class HTMLInsertOnlyRewriter(StreamingRewriter):
|
|||||||
"""
|
"""
|
||||||
NOT_HEAD_REGEX = re.compile(r'(<\s*\b)(?!(html|head))', re.I)
|
NOT_HEAD_REGEX = re.compile(r'(<\s*\b)(?!(html|head))', re.I)
|
||||||
|
|
||||||
|
XML_HEADER = re.compile(r'<\?xml.*\?>')
|
||||||
|
|
||||||
def __init__(self, url_rewriter, **kwargs):
|
def __init__(self, url_rewriter, **kwargs):
|
||||||
super(HTMLInsertOnlyRewriter, self).__init__(url_rewriter, False)
|
super(HTMLInsertOnlyRewriter, self).__init__(url_rewriter, False)
|
||||||
self.head_insert = kwargs['head_insert']
|
self.head_insert = kwargs['head_insert']
|
||||||
|
|
||||||
self.done = False
|
self.done = False
|
||||||
|
self.first = True
|
||||||
|
|
||||||
def rewrite(self, string):
|
def rewrite(self, string):
|
||||||
|
if self.first:
|
||||||
|
if self.url_rewriter.rewrite_opts.get('is_ajax') and self.XML_HEADER.search(string):
|
||||||
|
self.done = True
|
||||||
|
|
||||||
|
self.first = False
|
||||||
|
|
||||||
if self.done:
|
if self.done:
|
||||||
return string
|
return string
|
||||||
|
|
||||||
|
@ -16,14 +16,24 @@ r'''
|
|||||||
|
|
||||||
>>> parse('<head></head>text')
|
>>> parse('<head></head>text')
|
||||||
'<head></head>text<!--Insert-->'
|
'<head></head>text<!--Insert-->'
|
||||||
|
|
||||||
|
>>> parse('<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n<html xmlns="http://www.w3.org/1999/xhtml"><body></body></html>')
|
||||||
|
'<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n<html xmlns="http://www.w3.org/1999/xhtml"><!--Insert--><body></body></html>'
|
||||||
|
|
||||||
|
# ajax leave unchanged?
|
||||||
|
>>> parse('<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n<html xmlns="http://www.w3.org/1999/xhtml"><body></body></html>', is_ajax=True)
|
||||||
|
'<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n<html xmlns="http://www.w3.org/1999/xhtml"><body></body></html>'
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from pywb.rewrite.url_rewriter import UrlRewriter
|
from pywb.rewrite.url_rewriter import UrlRewriter
|
||||||
from pywb.rewrite.html_insert_rewriter import HTMLInsertOnlyRewriter
|
from pywb.rewrite.html_insert_rewriter import HTMLInsertOnlyRewriter
|
||||||
|
|
||||||
def parse(html_text):
|
def parse(html_text, is_ajax=False):
|
||||||
urlrewriter = UrlRewriter('20131226101010/https://example.com/some/path.html', '/web/')
|
urlrewriter = UrlRewriter('20131226101010/https://example.com/some/path.html', '/web/')
|
||||||
|
|
||||||
|
if is_ajax:
|
||||||
|
urlrewriter.rewrite_opts['is_ajax'] = True
|
||||||
|
|
||||||
rewriter = HTMLInsertOnlyRewriter(urlrewriter, head_insert='<!--Insert-->')
|
rewriter = HTMLInsertOnlyRewriter(urlrewriter, head_insert='<!--Insert-->')
|
||||||
|
|
||||||
return rewriter.rewrite(html_text) + rewriter.final_read()
|
return rewriter.rewrite(html_text) + rewriter.final_read()
|
||||||
|
@ -7,6 +7,7 @@ local and remote access
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import hmac
|
import hmac
|
||||||
|
import hashlib
|
||||||
import requests
|
import requests
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
@ -485,7 +486,7 @@ class HMACCookieMaker(object):
|
|||||||
else:
|
else:
|
||||||
msg = expire
|
msg = expire
|
||||||
|
|
||||||
hmacdigest = hmac.new(self.key.encode('utf-8'), msg.encode('utf-8'))
|
hmacdigest = hmac.new(self.key.encode('utf-8'), msg.encode('utf-8'), digestmod=hashlib.md5)
|
||||||
hexdigest = hmacdigest.hexdigest()
|
hexdigest = hmacdigest.hexdigest()
|
||||||
|
|
||||||
if extra_id:
|
if extra_id:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user