mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
windows support work: fix loaders to use pathname2url to convert to
file:/// url, use urlopen to open file paths fix some tests to use universal line breaks
This commit is contained in:
parent
06b2ea498e
commit
1eb0f96f92
@ -6,8 +6,10 @@ import requests
|
||||
import datetime
|
||||
import mimetypes
|
||||
import logging
|
||||
import os
|
||||
|
||||
from urlparse import urlsplit
|
||||
from urlparse import urlsplit, urljoin
|
||||
from urllib import pathname2url
|
||||
|
||||
from pywb.utils.loaders import is_http, LimitReader, BlockLoader
|
||||
from pywb.utils.loaders import extract_client_cookie
|
||||
@ -180,16 +182,25 @@ class LiveRewriter(object):
|
||||
if url.startswith('//'):
|
||||
url = 'http:' + url
|
||||
|
||||
if is_http(url):
|
||||
is_remote = True
|
||||
else:
|
||||
is_remote = False
|
||||
if not url.startswith('file:'):
|
||||
url = os.path.abspath(url)
|
||||
url = urljoin('file:', pathname2url(url))
|
||||
print(url)
|
||||
|
||||
# explicit urlkey may be passed in (say for testing)
|
||||
if not urlkey:
|
||||
urlkey = canonicalize(url)
|
||||
|
||||
if is_http(url):
|
||||
if is_remote:
|
||||
(status_headers, stream) = self.fetch_http(url, urlkey, env,
|
||||
req_headers,
|
||||
follow_redirects,
|
||||
ignore_proxies)
|
||||
else:
|
||||
else:
|
||||
(status_headers, stream) = self.fetch_local_file(url)
|
||||
|
||||
if timestamp is None:
|
||||
|
@ -129,13 +129,13 @@ class BlockLoader(object):
|
||||
# if starting with . or /, can only be a file path..
|
||||
file_only = url.startswith(('/', '.'))
|
||||
|
||||
if url.startswith('file://'):
|
||||
url = url[len('file://'):]
|
||||
file_only = True
|
||||
|
||||
try:
|
||||
# first, try as file
|
||||
afile = open(url, 'rb')
|
||||
if url.startswith('file://'):
|
||||
file_only = True
|
||||
afile = urllib.urlopen(url)
|
||||
else:
|
||||
afile = open(url, 'rb')
|
||||
|
||||
except IOError:
|
||||
if file_only:
|
||||
|
@ -3,11 +3,11 @@ r"""
|
||||
#=================================================================
|
||||
|
||||
# DecompressingBufferedReader readline()
|
||||
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline()
|
||||
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU')).readline()
|
||||
' CDX N b a m s k r M S V g\n'
|
||||
|
||||
# detect not compressed
|
||||
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline()
|
||||
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU'), decomp_type = 'gzip').readline()
|
||||
' CDX N b a m s k r M S V g\n'
|
||||
|
||||
# decompress with on the fly compression, default gzip compression
|
||||
|
@ -25,7 +25,7 @@ True
|
||||
100
|
||||
|
||||
# no length specified, read full amount requested
|
||||
>>> len(BlockLoader().load('file://' + test_cdx_dir + 'example.cdx', 0, -1).read(400))
|
||||
>>> len(BlockLoader().load('file:' + pathname2url(test_cdx_dir + 'example.cdx'), 0, -1).read(400))
|
||||
400
|
||||
|
||||
# HMAC Cookie Maker
|
||||
@ -65,6 +65,8 @@ from io import BytesIO
|
||||
from pywb.utils.loaders import BlockLoader, HMACCookieMaker
|
||||
from pywb.utils.loaders import LimitReader, extract_client_cookie
|
||||
|
||||
from urllib import pathname2url
|
||||
|
||||
from pywb import get_test_dir
|
||||
|
||||
test_cdx_dir = get_test_dir() + 'cdx/'
|
||||
|
@ -160,7 +160,7 @@ TEST_CDX_DIR = get_test_dir() + 'cdx/'
|
||||
TEST_WARC_DIR = get_test_dir() + 'warcs/'
|
||||
|
||||
def read_fully(cdx):
|
||||
with open(TEST_CDX_DIR + cdx) as fh:
|
||||
with open(TEST_CDX_DIR + cdx, 'rU') as fh:
|
||||
curr = BytesIO()
|
||||
while True:
|
||||
b = fh.read()
|
||||
@ -172,7 +172,7 @@ def read_fully(cdx):
|
||||
def cdx_index(warc, **options):
|
||||
buff = BytesIO()
|
||||
|
||||
with open(TEST_WARC_DIR + warc) as fh:
|
||||
with open(TEST_WARC_DIR + warc, 'rU') as fh:
|
||||
write_cdx_index(buff, fh, warc, **options)
|
||||
|
||||
return buff.getvalue()
|
||||
@ -213,7 +213,7 @@ def cli_lines_with_dir(input_):
|
||||
|
||||
print filename
|
||||
|
||||
with open(os.path.join(tmp_dir, filename), 'r') as fh:
|
||||
with open(os.path.join(tmp_dir, filename), 'rU') as fh:
|
||||
lines = fh.read(8192).rstrip().split('\n')
|
||||
|
||||
finally:
|
||||
|
8
setup.py
8
setup.py
@ -58,10 +58,10 @@ setup(
|
||||
'pywb': ['static/flowplayer/*', 'static/*.*', 'ui/*', '*.yaml'],
|
||||
},
|
||||
data_files=[
|
||||
('sample_archive/cdx/', glob.glob('sample_archive/cdx/*')),
|
||||
('sample_archive/zipcdx/', glob.glob('sample_archive/zipcdx/*')),
|
||||
('sample_archive/warcs/', glob.glob('sample_archive/warcs/*')),
|
||||
('sample_archive/text_content/',
|
||||
('sample_archive/cdx', glob.glob('sample_archive/cdx/*')),
|
||||
('sample_archive/zipcdx', glob.glob('sample_archive/zipcdx/*')),
|
||||
('sample_archive/warcs', glob.glob('sample_archive/warcs/*')),
|
||||
('sample_archive/text_content',
|
||||
glob.glob('sample_archive/text_content/*')),
|
||||
],
|
||||
install_requires=[
|
||||
|
Loading…
x
Reference in New Issue
Block a user