mirror of
https://github.com/webrecorder/pywb.git
synced 2025-03-15 00:03:28 +01:00
S3 loader to use boto3 built-in credential configuration (#723)
* S3Loader: allow authenticated S3 access using boto3 built-in configuration methods without explicitly passing credentials, cf. https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#configuring-credentials * S3Loader tests: re-enable tests reading from s3://commoncrawl/ in order to test authenticated reads. Tests are skipped if no AWS credentials are configured.
This commit is contained in:
parent
fbed87aa46
commit
510c9dc9f1
@ -185,7 +185,8 @@ class BlockLoader(BaseLoader):
|
||||
"""
|
||||
a loader which can stream blocks of content
|
||||
given a uri, offset and optional length.
|
||||
Currently supports: http/https and file/local file system
|
||||
Currently supports: http/https, file/local file system,
|
||||
pkg, WebHDFS, S3
|
||||
"""
|
||||
|
||||
loaders = {}
|
||||
@ -393,14 +394,15 @@ class S3Loader(BaseLoader):
|
||||
|
||||
def s3_load(anon=False):
|
||||
if not self.client:
|
||||
s3_client_args = {}
|
||||
if anon:
|
||||
config = Config(signature_version=UNSIGNED)
|
||||
else:
|
||||
config = None
|
||||
s3_client_args['config'] = Config(signature_version=UNSIGNED)
|
||||
if aws_access_key_id:
|
||||
s3_client_args['aws_access_key_id'] = aws_access_key_id
|
||||
s3_client_args['aws_secret_access_key'] = aws_secret_access_key
|
||||
|
||||
client = boto3.client('s3', **s3_client_args)
|
||||
|
||||
client = boto3.client('s3', aws_access_key_id=aws_access_key_id,
|
||||
aws_secret_access_key=aws_secret_access_key,
|
||||
config=config)
|
||||
else:
|
||||
client = self.client
|
||||
|
||||
|
@ -97,10 +97,19 @@ from pywb import get_test_dir
|
||||
|
||||
test_cdx_dir = get_test_dir() + 'cdx/'
|
||||
|
||||
@pytest.mark.skip("skip for now, made need different s3 source")
|
||||
def test_s3_read_1():
|
||||
def s3_authenticated_access_verification(bucket):
|
||||
import boto3, botocore
|
||||
s3_client = boto3.client('s3')
|
||||
try:
|
||||
s3_client.head_bucket(Bucket=bucket)
|
||||
except botocore.exceptions.NoCredentialsError:
|
||||
pytest.skip("Skipping S3Loader test for authenticated reads: no credentials configured")
|
||||
|
||||
def test_s3_read_authenticated_1():
|
||||
pytest.importorskip('boto3')
|
||||
|
||||
s3_authenticated_access_verification('commoncrawl')
|
||||
|
||||
res = BlockLoader().load('s3://commoncrawl/crawl-data/CC-MAIN-2015-11/segments/1424936462700.28/warc/CC-MAIN-20150226074102-00159-ip-10-28-5-156.ec2.internal.warc.gz',
|
||||
offset=53235662,
|
||||
length=2526)
|
||||
@ -112,10 +121,11 @@ def test_s3_read_1():
|
||||
assert reader.readline() == b'WARC/1.0\r\n'
|
||||
assert reader.readline() == b'WARC-Type: response\r\n'
|
||||
|
||||
@pytest.mark.skip("skip for now, made need different s3 source")
|
||||
def test_s3_read_2():
|
||||
def test_s3_read_authenticated_2():
|
||||
pytest.importorskip('boto3')
|
||||
|
||||
s3_authenticated_access_verification('commoncrawl')
|
||||
|
||||
res = BlockLoader().load('s3://commoncrawl/crawl-data/CC-MAIN-2015-11/index.html')
|
||||
|
||||
buff = res.read()
|
||||
|
Loading…
x
Reference in New Issue
Block a user