1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

windows support work: fix loaders to use pathname2url to convert to

file:/// url, use urlopen to open file paths
fix some tests to use universal line breaks
This commit is contained in:
Ilya Kreymer 2015-01-10 14:06:15 -08:00
parent 06b2ea498e
commit 1eb0f96f92
6 changed files with 31 additions and 18 deletions

View File

@ -6,8 +6,10 @@ import requests
import datetime
import mimetypes
import logging
import os
from urlparse import urlsplit
from urlparse import urlsplit, urljoin
from urllib import pathname2url
from pywb.utils.loaders import is_http, LimitReader, BlockLoader
from pywb.utils.loaders import extract_client_cookie
@ -180,11 +182,20 @@ class LiveRewriter(object):
if url.startswith('//'):
url = 'http:' + url
if is_http(url):
is_remote = True
else:
is_remote = False
if not url.startswith('file:'):
url = os.path.abspath(url)
url = urljoin('file:', pathname2url(url))
print(url)
# explicit urlkey may be passed in (say for testing)
if not urlkey:
urlkey = canonicalize(url)
if is_http(url):
if is_remote:
(status_headers, stream) = self.fetch_http(url, urlkey, env,
req_headers,
follow_redirects,

View File

@ -129,13 +129,13 @@ class BlockLoader(object):
# if starting with . or /, can only be a file path..
file_only = url.startswith(('/', '.'))
if url.startswith('file://'):
url = url[len('file://'):]
file_only = True
try:
# first, try as file
afile = open(url, 'rb')
if url.startswith('file://'):
file_only = True
afile = urllib.urlopen(url)
else:
afile = open(url, 'rb')
except IOError:
if file_only:

View File

@ -3,11 +3,11 @@ r"""
#=================================================================
# DecompressingBufferedReader readline()
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline()
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU')).readline()
' CDX N b a m s k r M S V g\n'
# detect not compressed
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline()
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU'), decomp_type = 'gzip').readline()
' CDX N b a m s k r M S V g\n'
# decompress with on the fly compression, default gzip compression

View File

@ -25,7 +25,7 @@ True
100
# no length specified, read full amount requested
>>> len(BlockLoader().load('file://' + test_cdx_dir + 'example.cdx', 0, -1).read(400))
>>> len(BlockLoader().load('file:' + pathname2url(test_cdx_dir + 'example.cdx'), 0, -1).read(400))
400
# HMAC Cookie Maker
@ -65,6 +65,8 @@ from io import BytesIO
from pywb.utils.loaders import BlockLoader, HMACCookieMaker
from pywb.utils.loaders import LimitReader, extract_client_cookie
from urllib import pathname2url
from pywb import get_test_dir
test_cdx_dir = get_test_dir() + 'cdx/'

View File

@ -160,7 +160,7 @@ TEST_CDX_DIR = get_test_dir() + 'cdx/'
TEST_WARC_DIR = get_test_dir() + 'warcs/'
def read_fully(cdx):
with open(TEST_CDX_DIR + cdx) as fh:
with open(TEST_CDX_DIR + cdx, 'rU') as fh:
curr = BytesIO()
while True:
b = fh.read()
@ -172,7 +172,7 @@ def read_fully(cdx):
def cdx_index(warc, **options):
buff = BytesIO()
with open(TEST_WARC_DIR + warc) as fh:
with open(TEST_WARC_DIR + warc, 'rU') as fh:
write_cdx_index(buff, fh, warc, **options)
return buff.getvalue()
@ -213,7 +213,7 @@ def cli_lines_with_dir(input_):
print filename
with open(os.path.join(tmp_dir, filename), 'r') as fh:
with open(os.path.join(tmp_dir, filename), 'rU') as fh:
lines = fh.read(8192).rstrip().split('\n')
finally:

View File

@ -58,10 +58,10 @@ setup(
'pywb': ['static/flowplayer/*', 'static/*.*', 'ui/*', '*.yaml'],
},
data_files=[
('sample_archive/cdx/', glob.glob('sample_archive/cdx/*')),
('sample_archive/zipcdx/', glob.glob('sample_archive/zipcdx/*')),
('sample_archive/warcs/', glob.glob('sample_archive/warcs/*')),
('sample_archive/text_content/',
('sample_archive/cdx', glob.glob('sample_archive/cdx/*')),
('sample_archive/zipcdx', glob.glob('sample_archive/zipcdx/*')),
('sample_archive/warcs', glob.glob('sample_archive/warcs/*')),
('sample_archive/text_content',
glob.glob('sample_archive/text_content/*')),
],
install_requires=[