I'm trying to upload a file around 1gb in size to Amazon Glacier. Somewhat arbitrarily, I've decided to break it into 32mb parts and upload them in serial.
import math
import boto3
from botocore.utils import calculate_tree_hash
client = boto3.client('glacier')
vault_name = 'my-vault'
size = 1073745600 # in bytes
size_mb = size / (2**20) # Convert to megabytes for readability
local_file = 'filename'
multi_up = client.initiate_multipart_upload(vaultName=vault_name,
archiveDescription=local_file,
partSize=str(2**25)) # 32 mb in bytes
parts = math.floor(size_mb / 32)
with open("/Users/alexchase/Desktop/{}".format(local_file), 'rb') as upload:
for p in range(parts):
# Calculate lower and upper bounds for the byte ranges. The last range
# is bigger than the ones that come before.
lower = (p * (2**25))
upper = (((p + 1) * (2**25)) - 1) if (p + 1 < parts) else (size)
up_part = client.upload_multipart_part(vaultName=vault_name,
uploadId=multi_up['uploadId'],
range='bytes {}-{}/*'.format(lower, upper),
body=upload)
checksum = calculate_tree_hash(upload)
complete_up = client.complete_multipart_upload(archiveSize=str(size),
checksum=checksum,
uploadId=multi_up['uploadId'],
vaultName=vault_name)
This generates an error about the first byte range.
---------------------------------------------------------------------------
InvalidParameterValueException Traceback (most recent call last)
<ipython-input-2-9dd3ac986601> in <module>()
93 uploadId=multi_up['uploadId'],
94 range='bytes {}-{}/*'.format(lower, upper),
---> 95 body=upload)
96 upload_info.append(up_part)
97 checksum = calculate_tree_hash(upload)
~/anaconda/lib/python3.5/site-packages/botocore/client.py in _api_call(self, *args, **kwargs)
251 "%s() only accepts keyword arguments." % py_operation_name)
252 # The "self" in this scope is referring to the BaseClient.
--> 253 return self._make_api_call(operation_name, kwargs)
254
255 _api_call.__name__ = str(py_operation_name)
~/anaconda/lib/python3.5/site-packages/botocore/client.py in _make_api_call(self, operation_name, api_params)
555 error_code = parsed_response.get("Error", {}).get("Code")
556 error_class = self.exceptions.from_code(error_code)
--> 557 raise error_class(parsed_response, operation_name)
558 else:
559 return parsed_response
InvalidParameterValueException: An error occurred (InvalidParameterValueException) when calling the UploadMultipartPart operation:
Content-Range: bytes 0-33554431/* is incompatible with Content-Length: 1073745600
Can anyone see what I'm doing wrong?
@Michael-sqlbot is quite right, the issue with the Content-Range
was that I was passing the whole file instead of a part. I fixed this by using the read()
method, but then I discovered a separate issue, which is that (per the docs), the final part has to be the same size or smaller than the preceding parts. This means using math.ceil()
instead of math.floor()
to define the number of parts.
The working code is:
import math
import boto3
from botocore.utils import calculate_tree_hash
client = boto3.client('glacier')
vault_name = 'my-vault'
size = 1073745600 # in bytes
size_mb = size / (2**20) # Convert to megabytes for readability
local_file = 'filename'
partSize=(2**25)
multi_up = client.initiate_multipart_upload(vaultName=vault_name,
archiveDescription=local_file,
partSize=str(partSize)) # 32 mb in bytes
parts = math.ceil(size_mb / 32) # The number of <=32mb parts we need
with open("/Users/alexchase/Desktop/{}".format(local_file), 'rb') as upload:
for p in range(parts):
# Calculate lower and upper bounds for the byte ranges. The last range
# is now smaller than the ones that come before.
lower = (p * (partSize))
upper = (((p + 1) * (partSize)) - 1) if (p + 1 < parts) else (size-1)
read_size = upper-lower+1
file_part = upload.read(read_size)
up_part = client.upload_multipart_part(vaultName=vault_name,
uploadId=multi_up['uploadId'],
range='bytes {}-{}/*'.format(lower, upper),
body=file_part)
checksum = calculate_tree_hash(upload)
complete_up = client.complete_multipart_upload(archiveSize=str(size),
checksum=checksum,
uploadId=multi_up['uploadId'],
vaultName=vault_name)
Content-Range: bytes 0-33554431/* is incompatible with Content-Length: 1073745600
You're telling the API that you're sending the first 32 MiB, but you're actually sending (proposing to send) the entire file, since body=upload
and upload
isn't just the first part, it's the entire file. The Content-Length
refers to the size of this part upload, which should be 33554432 (32 MiB).
The docs are admittedly ambiguous...
body
(bytes or seekable file-like object) -- The data to upload.
...but the "data to upload" seems to refer to the data for only this part, in spite of the word "seekable."
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With