1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

windows support work: fix loaders to use pathname2url to convert to

file:/// url, use urlopen to open file paths
fix some tests to use universal line breaks
This commit is contained in:
Ilya Kreymer 2015-01-10 14:06:15 -08:00
parent 06b2ea498e
commit 1eb0f96f92
6 changed files with 31 additions and 18 deletions

View File

@ -6,8 +6,10 @@ import requests
import datetime import datetime
import mimetypes import mimetypes
import logging import logging
import os
from urlparse import urlsplit from urlparse import urlsplit, urljoin
from urllib import pathname2url
from pywb.utils.loaders import is_http, LimitReader, BlockLoader from pywb.utils.loaders import is_http, LimitReader, BlockLoader
from pywb.utils.loaders import extract_client_cookie from pywb.utils.loaders import extract_client_cookie
@ -180,16 +182,25 @@ class LiveRewriter(object):
if url.startswith('//'): if url.startswith('//'):
url = 'http:' + url url = 'http:' + url
if is_http(url):
is_remote = True
else:
is_remote = False
if not url.startswith('file:'):
url = os.path.abspath(url)
url = urljoin('file:', pathname2url(url))
print(url)
# explicit urlkey may be passed in (say for testing) # explicit urlkey may be passed in (say for testing)
if not urlkey: if not urlkey:
urlkey = canonicalize(url) urlkey = canonicalize(url)
if is_http(url): if is_remote:
(status_headers, stream) = self.fetch_http(url, urlkey, env, (status_headers, stream) = self.fetch_http(url, urlkey, env,
req_headers, req_headers,
follow_redirects, follow_redirects,
ignore_proxies) ignore_proxies)
else: else:
(status_headers, stream) = self.fetch_local_file(url) (status_headers, stream) = self.fetch_local_file(url)
if timestamp is None: if timestamp is None:

View File

@ -129,13 +129,13 @@ class BlockLoader(object):
# if starting with . or /, can only be a file path.. # if starting with . or /, can only be a file path..
file_only = url.startswith(('/', '.')) file_only = url.startswith(('/', '.'))
if url.startswith('file://'):
url = url[len('file://'):]
file_only = True
try: try:
# first, try as file # first, try as file
afile = open(url, 'rb') if url.startswith('file://'):
file_only = True
afile = urllib.urlopen(url)
else:
afile = open(url, 'rb')
except IOError: except IOError:
if file_only: if file_only:

View File

@ -3,11 +3,11 @@ r"""
#================================================================= #=================================================================
# DecompressingBufferedReader readline() # DecompressingBufferedReader readline()
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb')).readline() >>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU')).readline()
' CDX N b a m s k r M S V g\n' ' CDX N b a m s k r M S V g\n'
# detect not compressed # detect not compressed
>>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rb'), decomp_type = 'gzip').readline() >>> DecompressingBufferedReader(open(test_cdx_dir + 'iana.cdx', 'rU'), decomp_type = 'gzip').readline()
' CDX N b a m s k r M S V g\n' ' CDX N b a m s k r M S V g\n'
# decompress with on the fly compression, default gzip compression # decompress with on the fly compression, default gzip compression

View File

@ -25,7 +25,7 @@ True
100 100
# no length specified, read full amount requested # no length specified, read full amount requested
>>> len(BlockLoader().load('file://' + test_cdx_dir + 'example.cdx', 0, -1).read(400)) >>> len(BlockLoader().load('file:' + pathname2url(test_cdx_dir + 'example.cdx'), 0, -1).read(400))
400 400
# HMAC Cookie Maker # HMAC Cookie Maker
@ -65,6 +65,8 @@ from io import BytesIO
from pywb.utils.loaders import BlockLoader, HMACCookieMaker from pywb.utils.loaders import BlockLoader, HMACCookieMaker
from pywb.utils.loaders import LimitReader, extract_client_cookie from pywb.utils.loaders import LimitReader, extract_client_cookie
from urllib import pathname2url
from pywb import get_test_dir from pywb import get_test_dir
test_cdx_dir = get_test_dir() + 'cdx/' test_cdx_dir = get_test_dir() + 'cdx/'

View File

@ -160,7 +160,7 @@ TEST_CDX_DIR = get_test_dir() + 'cdx/'
TEST_WARC_DIR = get_test_dir() + 'warcs/' TEST_WARC_DIR = get_test_dir() + 'warcs/'
def read_fully(cdx): def read_fully(cdx):
with open(TEST_CDX_DIR + cdx) as fh: with open(TEST_CDX_DIR + cdx, 'rU') as fh:
curr = BytesIO() curr = BytesIO()
while True: while True:
b = fh.read() b = fh.read()
@ -172,7 +172,7 @@ def read_fully(cdx):
def cdx_index(warc, **options): def cdx_index(warc, **options):
buff = BytesIO() buff = BytesIO()
with open(TEST_WARC_DIR + warc) as fh: with open(TEST_WARC_DIR + warc, 'rU') as fh:
write_cdx_index(buff, fh, warc, **options) write_cdx_index(buff, fh, warc, **options)
return buff.getvalue() return buff.getvalue()
@ -213,7 +213,7 @@ def cli_lines_with_dir(input_):
print filename print filename
with open(os.path.join(tmp_dir, filename), 'r') as fh: with open(os.path.join(tmp_dir, filename), 'rU') as fh:
lines = fh.read(8192).rstrip().split('\n') lines = fh.read(8192).rstrip().split('\n')
finally: finally:

View File

@ -58,10 +58,10 @@ setup(
'pywb': ['static/flowplayer/*', 'static/*.*', 'ui/*', '*.yaml'], 'pywb': ['static/flowplayer/*', 'static/*.*', 'ui/*', '*.yaml'],
}, },
data_files=[ data_files=[
('sample_archive/cdx/', glob.glob('sample_archive/cdx/*')), ('sample_archive/cdx', glob.glob('sample_archive/cdx/*')),
('sample_archive/zipcdx/', glob.glob('sample_archive/zipcdx/*')), ('sample_archive/zipcdx', glob.glob('sample_archive/zipcdx/*')),
('sample_archive/warcs/', glob.glob('sample_archive/warcs/*')), ('sample_archive/warcs', glob.glob('sample_archive/warcs/*')),
('sample_archive/text_content/', ('sample_archive/text_content',
glob.glob('sample_archive/text_content/*')), glob.glob('sample_archive/text_content/*')),
], ],
install_requires=[ install_requires=[