mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
windows support work: fix loaders to use pathname2url to convert to
file:/// url, use urlopen to open file paths fix some tests to use universal line breaks
This commit is contained in:
parent
06b2ea498e
commit
1eb0f96f92
@ -6,8 +6,10 @@ import requests
|
|||||||
import datetime
|
import datetime
|
||||||
import mimetypes
|
import mimetypes
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
from urlparse import urlsplit
|
from urlparse import urlsplit, urljoin
|
||||||
|
from urllib import pathname2url
|
||||||
|
|
||||||
from pywb.utils.loaders import is_http, LimitReader, BlockLoader
|
from pywb.utils.loaders import is_http, LimitReader, BlockLoader
|
||||||
from pywb.utils.loaders import extract_client_cookie
|
from pywb.utils.loaders import extract_client_cookie
|
||||||
@ -180,16 +182,25 @@ class LiveRewriter(object):
|
|||||||
if url.startswith('//'):
|
if url.startswith('//'):
|
||||||
url = 'http:' + url
|
url = 'http:' + url
|
||||||
|
|
||||||
|
if is_http(url):
|
||||||
|
is_remote = True
|
||||||
|
else:
|
||||||
|
is_remote = False
|
||||||
|
if not url.startswith('file:'):
|
||||||
|
url = os.path.abspath(url)
|
||||||
|
url = urljoin('file:', pathname2url(url))
|
||||||
|
print(url)
|
||||||
|
|
||||||
# explicit urlkey may be passed in (say for testing)
|
# explicit urlkey may be passed in (say for testing)
|
||||||
if not urlkey:
|
if not urlkey:
|
||||||
urlkey = canonicalize(url)
|
urlkey = canonicalize(url)
|
||||||
|
|
||||||
if is_http(url):
|
if is_remote:
|
||||||
(status_headers, stream) = self.fetch_http(url, urlkey, env,
|
(status_headers, stream) = self.fetch_http(url, urlkey, env,
|
||||||
req_headers,
|
req_headers,
|
||||||
follow_redirects,
|
follow_redirects,
|
||||||
ignore_proxies)
|
ignore_proxies)
|
||||||
else:
|
else:
|
||||||
(status_headers, stream) = self.fetch_local_file(url)
|
(status_headers, stream) = self.fetch_local_file(url)
|
||||||
|
|
||||||
if timestamp is None:
|
if timestamp is None:
|
||||||
|
@ -129,13 +129,13 @@ class BlockLoader(object):
|
|||||||
# if starting with . or /, can only be a file path..
|
# if starting with . or /, can only be a file path..
|
||||||
file_only = url.startswith(('/', '.'))
|
file_only = url.startswith(('/', '.'))
|
||||||
|
|
||||||
if url.startswith('file://'):
|
|
||||||
url = url[len('file://'):]
|
|
||||||
file_only = True
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# first, try as file
|
# first, try as file
|
||||||
afile = open(url, 'rb')
|
if url.startswith('file://'):
|
||||||
|
file_only = True
|
||||||
|
afile = urllib.urlopen(url)
|
||||||
|
else:
|
||||||
|
afile = open(url, 'rb')
|
||||||
|
|
||||||
except IOError:
|
except IOError:
|
||||||
if file_only:
|
if file_only:
|
||||||
|
@ -3,11 +3,11 @@ r"""
|
|||||||
#=================================================================
|
#=================================================================
|
||||||
|
|
||||||
# DecompressingBufferedReader readline()
|
# DecompressingBufferedReader readline()
|
||||||
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline()
|
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU')).readline()
|
||||||
' CDX N b a m s k r M S V g\n'
|
' CDX N b a m s k r M S V g\n'
|
||||||
|
|
||||||
# detect not compressed
|
# detect not compressed
|
||||||
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline()
|
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU'), decomp_type = 'gzip').readline()
|
||||||
' CDX N b a m s k r M S V g\n'
|
' CDX N b a m s k r M S V g\n'
|
||||||
|
|
||||||
# decompress with on the fly compression, default gzip compression
|
# decompress with on the fly compression, default gzip compression
|
||||||
|
@ -25,7 +25,7 @@ True
|
|||||||
100
|
100
|
||||||
|
|
||||||
# no length specified, read full amount requested
|
# no length specified, read full amount requested
|
||||||
>>> len(BlockLoader().load('file://' + test_cdx_dir + 'example.cdx', 0, -1).read(400))
|
>>> len(BlockLoader().load('file:' + pathname2url(test_cdx_dir + 'example.cdx'), 0, -1).read(400))
|
||||||
400
|
400
|
||||||
|
|
||||||
# HMAC Cookie Maker
|
# HMAC Cookie Maker
|
||||||
@ -65,6 +65,8 @@ from io import BytesIO
|
|||||||
from pywb.utils.loaders import BlockLoader, HMACCookieMaker
|
from pywb.utils.loaders import BlockLoader, HMACCookieMaker
|
||||||
from pywb.utils.loaders import LimitReader, extract_client_cookie
|
from pywb.utils.loaders import LimitReader, extract_client_cookie
|
||||||
|
|
||||||
|
from urllib import pathname2url
|
||||||
|
|
||||||
from pywb import get_test_dir
|
from pywb import get_test_dir
|
||||||
|
|
||||||
test_cdx_dir = get_test_dir() + 'cdx/'
|
test_cdx_dir = get_test_dir() + 'cdx/'
|
||||||
|
@ -160,7 +160,7 @@ TEST_CDX_DIR = get_test_dir() + 'cdx/'
|
|||||||
TEST_WARC_DIR = get_test_dir() + 'warcs/'
|
TEST_WARC_DIR = get_test_dir() + 'warcs/'
|
||||||
|
|
||||||
def read_fully(cdx):
|
def read_fully(cdx):
|
||||||
with open(TEST_CDX_DIR + cdx) as fh:
|
with open(TEST_CDX_DIR + cdx, 'rU') as fh:
|
||||||
curr = BytesIO()
|
curr = BytesIO()
|
||||||
while True:
|
while True:
|
||||||
b = fh.read()
|
b = fh.read()
|
||||||
@ -172,7 +172,7 @@ def read_fully(cdx):
|
|||||||
def cdx_index(warc, **options):
|
def cdx_index(warc, **options):
|
||||||
buff = BytesIO()
|
buff = BytesIO()
|
||||||
|
|
||||||
with open(TEST_WARC_DIR + warc) as fh:
|
with open(TEST_WARC_DIR + warc, 'rU') as fh:
|
||||||
write_cdx_index(buff, fh, warc, **options)
|
write_cdx_index(buff, fh, warc, **options)
|
||||||
|
|
||||||
return buff.getvalue()
|
return buff.getvalue()
|
||||||
@ -213,7 +213,7 @@ def cli_lines_with_dir(input_):
|
|||||||
|
|
||||||
print filename
|
print filename
|
||||||
|
|
||||||
with open(os.path.join(tmp_dir, filename), 'r') as fh:
|
with open(os.path.join(tmp_dir, filename), 'rU') as fh:
|
||||||
lines = fh.read(8192).rstrip().split('\n')
|
lines = fh.read(8192).rstrip().split('\n')
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
8
setup.py
8
setup.py
@ -58,10 +58,10 @@ setup(
|
|||||||
'pywb': ['static/flowplayer/*', 'static/*.*', 'ui/*', '*.yaml'],
|
'pywb': ['static/flowplayer/*', 'static/*.*', 'ui/*', '*.yaml'],
|
||||||
},
|
},
|
||||||
data_files=[
|
data_files=[
|
||||||
('sample_archive/cdx/', glob.glob('sample_archive/cdx/*')),
|
('sample_archive/cdx', glob.glob('sample_archive/cdx/*')),
|
||||||
('sample_archive/zipcdx/', glob.glob('sample_archive/zipcdx/*')),
|
('sample_archive/zipcdx', glob.glob('sample_archive/zipcdx/*')),
|
||||||
('sample_archive/warcs/', glob.glob('sample_archive/warcs/*')),
|
('sample_archive/warcs', glob.glob('sample_archive/warcs/*')),
|
||||||
('sample_archive/text_content/',
|
('sample_archive/text_content',
|
||||||
glob.glob('sample_archive/text_content/*')),
|
glob.glob('sample_archive/text_content/*')),
|
||||||
],
|
],
|
||||||
install_requires=[
|
install_requires=[
|
||||||
|
Loading…
x
Reference in New Issue
Block a user