comparison venv/lib/python2.7/site-packages/boto/s3/key.py @ 0:d67268158946 draft

planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author bcclaywell
date Mon, 12 Oct 2015 17:43:33 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d67268158946
1 # Copyright (c) 2006-2012 Mitch Garnaat http://garnaat.org/
2 # Copyright (c) 2011, Nexenta Systems Inc.
3 # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining a
6 # copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish, dis-
9 # tribute, sublicense, and/or sell copies of the Software, and to permit
10 # persons to whom the Software is furnished to do so, subject to the fol-
11 # lowing conditions:
12 #
13 # The above copyright notice and this permission notice shall be included
14 # in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 # IN THE SOFTWARE.
23 import email.utils
24 import errno
25 import hashlib
26 import mimetypes
27 import os
28 import re
29 import base64
30 import binascii
31 import math
32 from hashlib import md5
33 import boto.utils
34 from boto.compat import BytesIO, six, urllib, encodebytes
35
36 from boto.exception import BotoClientError
37 from boto.exception import StorageDataError
38 from boto.exception import PleaseRetryException
39 from boto.provider import Provider
40 from boto.s3.keyfile import KeyFile
41 from boto.s3.user import User
42 from boto import UserAgent
43 from boto.utils import compute_md5, compute_hash
44 from boto.utils import find_matching_headers
45 from boto.utils import merge_headers_by_name
46
47
48 class Key(object):
49 """
50 Represents a key (object) in an S3 bucket.
51
52 :ivar bucket: The parent :class:`boto.s3.bucket.Bucket`.
53 :ivar name: The name of this Key object.
54 :ivar metadata: A dictionary containing user metadata that you
55 wish to store with the object or that has been retrieved from
56 an existing object.
57 :ivar cache_control: The value of the `Cache-Control` HTTP header.
58 :ivar content_type: The value of the `Content-Type` HTTP header.
59 :ivar content_encoding: The value of the `Content-Encoding` HTTP header.
60 :ivar content_disposition: The value of the `Content-Disposition` HTTP
61 header.
62 :ivar content_language: The value of the `Content-Language` HTTP header.
63 :ivar etag: The `etag` associated with this object.
64 :ivar last_modified: The string timestamp representing the last
65 time this object was modified in S3.
66 :ivar owner: The ID of the owner of this object.
67 :ivar storage_class: The storage class of the object. Currently, one of:
68 STANDARD | REDUCED_REDUNDANCY | GLACIER
69 :ivar md5: The MD5 hash of the contents of the object.
70 :ivar size: The size, in bytes, of the object.
71 :ivar version_id: The version ID of this object, if it is a versioned
72 object.
73 :ivar encrypted: Whether the object is encrypted while at rest on
74 the server.
75 """
76
77 DefaultContentType = 'application/octet-stream'
78
79 RestoreBody = """<?xml version="1.0" encoding="UTF-8"?>
80 <RestoreRequest xmlns="http://s3.amazonaws.com/doc/2006-03-01">
81 <Days>%s</Days>
82 </RestoreRequest>"""
83
84
85 BufferSize = boto.config.getint('Boto', 'key_buffer_size', 8192)
86
87 # The object metadata fields a user can set, other than custom metadata
88 # fields (i.e., those beginning with a provider-specific prefix like
89 # x-amz-meta).
90 base_user_settable_fields = set(["cache-control", "content-disposition",
91 "content-encoding", "content-language",
92 "content-md5", "content-type",
93 "x-robots-tag", "expires"])
94 _underscore_base_user_settable_fields = set()
95 for f in base_user_settable_fields:
96 _underscore_base_user_settable_fields.add(f.replace('-', '_'))
97 # Metadata fields, whether user-settable or not, other than custom
98 # metadata fields (i.e., those beginning with a provider specific prefix
99 # like x-amz-meta).
100 base_fields = (base_user_settable_fields |
101 set(["last-modified", "content-length", "date", "etag"]))
102
103
104
105 def __init__(self, bucket=None, name=None):
106 self.bucket = bucket
107 self.name = name
108 self.metadata = {}
109 self.cache_control = None
110 self.content_type = self.DefaultContentType
111 self.content_encoding = None
112 self.content_disposition = None
113 self.content_language = None
114 self.filename = None
115 self.etag = None
116 self.is_latest = False
117 self.last_modified = None
118 self.owner = None
119 self._storage_class = None
120 self.path = None
121 self.resp = None
122 self.mode = None
123 self.size = None
124 self.version_id = None
125 self.source_version_id = None
126 self.delete_marker = False
127 self.encrypted = None
128 # If the object is being restored, this attribute will be set to True.
129 # If the object is restored, it will be set to False. Otherwise this
130 # value will be None. If the restore is completed (ongoing_restore =
131 # False), the expiry_date will be populated with the expiry date of the
132 # restored object.
133 self.ongoing_restore = None
134 self.expiry_date = None
135 self.local_hashes = {}
136
137 def __repr__(self):
138 if self.bucket:
139 name = u'<Key: %s,%s>' % (self.bucket.name, self.name)
140 else:
141 name = u'<Key: None,%s>' % self.name
142
143 # Encode to bytes for Python 2 to prevent display decoding issues
144 if not isinstance(name, str):
145 name = name.encode('utf-8')
146
147 return name
148
149 def __iter__(self):
150 return self
151
152 @property
153 def provider(self):
154 provider = None
155 if self.bucket and self.bucket.connection:
156 provider = self.bucket.connection.provider
157 return provider
158
159 def _get_key(self):
160 return self.name
161
162 def _set_key(self, value):
163 self.name = value
164
165 key = property(_get_key, _set_key);
166
167 def _get_md5(self):
168 if 'md5' in self.local_hashes and self.local_hashes['md5']:
169 return binascii.b2a_hex(self.local_hashes['md5'])
170
171 def _set_md5(self, value):
172 if value:
173 self.local_hashes['md5'] = binascii.a2b_hex(value)
174 elif 'md5' in self.local_hashes:
175 self.local_hashes.pop('md5', None)
176
177 md5 = property(_get_md5, _set_md5);
178
179 def _get_base64md5(self):
180 if 'md5' in self.local_hashes and self.local_hashes['md5']:
181 md5 = self.local_hashes['md5']
182 if not isinstance(md5, bytes):
183 md5 = md5.encode('utf-8')
184 return binascii.b2a_base64(md5).decode('utf-8').rstrip('\n')
185
186 def _set_base64md5(self, value):
187 if value:
188 if not isinstance(value, six.string_types):
189 value = value.decode('utf-8')
190 self.local_hashes['md5'] = binascii.a2b_base64(value)
191 elif 'md5' in self.local_hashes:
192 del self.local_hashes['md5']
193
194 base64md5 = property(_get_base64md5, _set_base64md5);
195
196 def _get_storage_class(self):
197 if self._storage_class is None and self.bucket:
198 # Attempt to fetch storage class
199 list_items = list(self.bucket.list(self.name.encode('utf-8')))
200 if len(list_items) and getattr(list_items[0], '_storage_class',
201 None):
202 self._storage_class = list_items[0]._storage_class
203 else:
204 # Key is not yet saved? Just use default...
205 self._storage_class = 'STANDARD'
206
207 return self._storage_class
208
209 def _set_storage_class(self, value):
210 self._storage_class = value
211
212 storage_class = property(_get_storage_class, _set_storage_class)
213
214 def get_md5_from_hexdigest(self, md5_hexdigest):
215 """
216 A utility function to create the 2-tuple (md5hexdigest, base64md5)
217 from just having a precalculated md5_hexdigest.
218 """
219 digest = binascii.unhexlify(md5_hexdigest)
220 base64md5 = encodebytes(digest)
221 if base64md5[-1] == '\n':
222 base64md5 = base64md5[0:-1]
223 return (md5_hexdigest, base64md5)
224
225 def handle_encryption_headers(self, resp):
226 provider = self.bucket.connection.provider
227 if provider.server_side_encryption_header:
228 self.encrypted = resp.getheader(
229 provider.server_side_encryption_header, None)
230 else:
231 self.encrypted = None
232
233 def handle_version_headers(self, resp, force=False):
234 provider = self.bucket.connection.provider
235 # If the Key object already has a version_id attribute value, it
236 # means that it represents an explicit version and the user is
237 # doing a get_contents_*(version_id=<foo>) to retrieve another
238 # version of the Key. In that case, we don't really want to
239 # overwrite the version_id in this Key object. Comprende?
240 if self.version_id is None or force:
241 self.version_id = resp.getheader(provider.version_id, None)
242 self.source_version_id = resp.getheader(provider.copy_source_version_id,
243 None)
244 if resp.getheader(provider.delete_marker, 'false') == 'true':
245 self.delete_marker = True
246 else:
247 self.delete_marker = False
248
249 def handle_restore_headers(self, response):
250 provider = self.bucket.connection.provider
251 header = response.getheader(provider.restore_header)
252 if header is None:
253 return
254 parts = header.split(',', 1)
255 for part in parts:
256 key, val = [i.strip() for i in part.split('=')]
257 val = val.replace('"', '')
258 if key == 'ongoing-request':
259 self.ongoing_restore = True if val.lower() == 'true' else False
260 elif key == 'expiry-date':
261 self.expiry_date = val
262
263 def handle_addl_headers(self, headers):
264 """
265 Used by Key subclasses to do additional, provider-specific
266 processing of response headers. No-op for this base class.
267 """
268 pass
269
270 def open_read(self, headers=None, query_args='',
271 override_num_retries=None, response_headers=None):
272 """
273 Open this key for reading
274
275 :type headers: dict
276 :param headers: Headers to pass in the web request
277
278 :type query_args: string
279 :param query_args: Arguments to pass in the query string
280 (ie, 'torrent')
281
282 :type override_num_retries: int
283 :param override_num_retries: If not None will override configured
284 num_retries parameter for underlying GET.
285
286 :type response_headers: dict
287 :param response_headers: A dictionary containing HTTP
288 headers/values that will override any headers associated
289 with the stored object in the response. See
290 http://goo.gl/EWOPb for details.
291 """
292 if self.resp is None:
293 self.mode = 'r'
294
295 provider = self.bucket.connection.provider
296 self.resp = self.bucket.connection.make_request(
297 'GET', self.bucket.name, self.name, headers,
298 query_args=query_args,
299 override_num_retries=override_num_retries)
300 if self.resp.status < 199 or self.resp.status > 299:
301 body = self.resp.read()
302 raise provider.storage_response_error(self.resp.status,
303 self.resp.reason, body)
304 response_headers = self.resp.msg
305 self.metadata = boto.utils.get_aws_metadata(response_headers,
306 provider)
307 for name, value in response_headers.items():
308 # To get correct size for Range GETs, use Content-Range
309 # header if one was returned. If not, use Content-Length
310 # header.
311 if (name.lower() == 'content-length' and
312 'Content-Range' not in response_headers):
313 self.size = int(value)
314 elif name.lower() == 'content-range':
315 end_range = re.sub('.*/(.*)', '\\1', value)
316 self.size = int(end_range)
317 elif name.lower() in Key.base_fields:
318 self.__dict__[name.lower().replace('-', '_')] = value
319 self.handle_version_headers(self.resp)
320 self.handle_encryption_headers(self.resp)
321 self.handle_restore_headers(self.resp)
322 self.handle_addl_headers(self.resp.getheaders())
323
324 def open_write(self, headers=None, override_num_retries=None):
325 """
326 Open this key for writing.
327 Not yet implemented
328
329 :type headers: dict
330 :param headers: Headers to pass in the write request
331
332 :type override_num_retries: int
333 :param override_num_retries: If not None will override configured
334 num_retries parameter for underlying PUT.
335 """
336 raise BotoClientError('Not Implemented')
337
338 def open(self, mode='r', headers=None, query_args=None,
339 override_num_retries=None):
340 if mode == 'r':
341 self.mode = 'r'
342 self.open_read(headers=headers, query_args=query_args,
343 override_num_retries=override_num_retries)
344 elif mode == 'w':
345 self.mode = 'w'
346 self.open_write(headers=headers,
347 override_num_retries=override_num_retries)
348 else:
349 raise BotoClientError('Invalid mode: %s' % mode)
350
351 closed = False
352
353 def close(self, fast=False):
354 """
355 Close this key.
356
357 :type fast: bool
358 :param fast: True if you want the connection to be closed without first
359 reading the content. This should only be used in cases where subsequent
360 calls don't need to return the content from the open HTTP connection.
361 Note: As explained at
362 http://docs.python.org/2/library/httplib.html#httplib.HTTPConnection.getresponse,
363 callers must read the whole response before sending a new request to the
364 server. Calling Key.close(fast=True) and making a subsequent request to
365 the server will work because boto will get an httplib exception and
366 close/reopen the connection.
367
368 """
369 if self.resp and not fast:
370 self.resp.read()
371 self.resp = None
372 self.mode = None
373 self.closed = True
374
375 def next(self):
376 """
377 By providing a next method, the key object supports use as an iterator.
378 For example, you can now say:
379
380 for bytes in key:
381 write bytes to a file or whatever
382
383 All of the HTTP connection stuff is handled for you.
384 """
385 self.open_read()
386 data = self.resp.read(self.BufferSize)
387 if not data:
388 self.close()
389 raise StopIteration
390 return data
391
392 # Python 3 iterator support
393 __next__ = next
394
395 def read(self, size=0):
396 self.open_read()
397 if size == 0:
398 data = self.resp.read()
399 else:
400 data = self.resp.read(size)
401 if not data:
402 self.close()
403 return data
404
405 def change_storage_class(self, new_storage_class, dst_bucket=None,
406 validate_dst_bucket=True):
407 """
408 Change the storage class of an existing key.
409 Depending on whether a different destination bucket is supplied
410 or not, this will either move the item within the bucket, preserving
411 all metadata and ACL info bucket changing the storage class or it
412 will copy the item to the provided destination bucket, also
413 preserving metadata and ACL info.
414
415 :type new_storage_class: string
416 :param new_storage_class: The new storage class for the Key.
417 Possible values are:
418 * STANDARD
419 * REDUCED_REDUNDANCY
420
421 :type dst_bucket: string
422 :param dst_bucket: The name of a destination bucket. If not
423 provided the current bucket of the key will be used.
424
425 :type validate_dst_bucket: bool
426 :param validate_dst_bucket: If True, will validate the dst_bucket
427 by using an extra list request.
428 """
429 bucket_name = dst_bucket or self.bucket.name
430 if new_storage_class == 'STANDARD':
431 return self.copy(bucket_name, self.name,
432 reduced_redundancy=False, preserve_acl=True,
433 validate_dst_bucket=validate_dst_bucket)
434 elif new_storage_class == 'REDUCED_REDUNDANCY':
435 return self.copy(bucket_name, self.name,
436 reduced_redundancy=True, preserve_acl=True,
437 validate_dst_bucket=validate_dst_bucket)
438 else:
439 raise BotoClientError('Invalid storage class: %s' %
440 new_storage_class)
441
442 def copy(self, dst_bucket, dst_key, metadata=None,
443 reduced_redundancy=False, preserve_acl=False,
444 encrypt_key=False, validate_dst_bucket=True):
445 """
446 Copy this Key to another bucket.
447
448 :type dst_bucket: string
449 :param dst_bucket: The name of the destination bucket
450
451 :type dst_key: string
452 :param dst_key: The name of the destination key
453
454 :type metadata: dict
455 :param metadata: Metadata to be associated with new key. If
456 metadata is supplied, it will replace the metadata of the
457 source key being copied. If no metadata is supplied, the
458 source key's metadata will be copied to the new key.
459
460 :type reduced_redundancy: bool
461 :param reduced_redundancy: If True, this will force the
462 storage class of the new Key to be REDUCED_REDUNDANCY
463 regardless of the storage class of the key being copied.
464 The Reduced Redundancy Storage (RRS) feature of S3,
465 provides lower redundancy at lower storage cost.
466
467 :type preserve_acl: bool
468 :param preserve_acl: If True, the ACL from the source key will
469 be copied to the destination key. If False, the
470 destination key will have the default ACL. Note that
471 preserving the ACL in the new key object will require two
472 additional API calls to S3, one to retrieve the current
473 ACL and one to set that ACL on the new object. If you
474 don't care about the ACL, a value of False will be
475 significantly more efficient.
476
477 :type encrypt_key: bool
478 :param encrypt_key: If True, the new copy of the object will
479 be encrypted on the server-side by S3 and will be stored
480 in an encrypted form while at rest in S3.
481
482 :type validate_dst_bucket: bool
483 :param validate_dst_bucket: If True, will validate the dst_bucket
484 by using an extra list request.
485
486 :rtype: :class:`boto.s3.key.Key` or subclass
487 :returns: An instance of the newly created key object
488 """
489 dst_bucket = self.bucket.connection.lookup(dst_bucket,
490 validate_dst_bucket)
491 if reduced_redundancy:
492 storage_class = 'REDUCED_REDUNDANCY'
493 else:
494 storage_class = self.storage_class
495 return dst_bucket.copy_key(dst_key, self.bucket.name,
496 self.name, metadata,
497 storage_class=storage_class,
498 preserve_acl=preserve_acl,
499 encrypt_key=encrypt_key,
500 src_version_id=self.version_id)
501
502 def startElement(self, name, attrs, connection):
503 if name == 'Owner':
504 self.owner = User(self)
505 return self.owner
506 else:
507 return None
508
509 def endElement(self, name, value, connection):
510 if name == 'Key':
511 self.name = value
512 elif name == 'ETag':
513 self.etag = value
514 elif name == 'IsLatest':
515 if value == 'true':
516 self.is_latest = True
517 else:
518 self.is_latest = False
519 elif name == 'LastModified':
520 self.last_modified = value
521 elif name == 'Size':
522 self.size = int(value)
523 elif name == 'StorageClass':
524 self.storage_class = value
525 elif name == 'Owner':
526 pass
527 elif name == 'VersionId':
528 self.version_id = value
529 else:
530 setattr(self, name, value)
531
532 def exists(self, headers=None):
533 """
534 Returns True if the key exists
535
536 :rtype: bool
537 :return: Whether the key exists on S3
538 """
539 return bool(self.bucket.lookup(self.name, headers=headers))
540
541 def delete(self, headers=None):
542 """
543 Delete this key from S3
544 """
545 return self.bucket.delete_key(self.name, version_id=self.version_id,
546 headers=headers)
547
548 def get_metadata(self, name):
549 return self.metadata.get(name)
550
551 def set_metadata(self, name, value):
552 # Ensure that metadata that is vital to signing is in the correct
553 # case. Applies to ``Content-Type`` & ``Content-MD5``.
554 if name.lower() == 'content-type':
555 self.metadata['Content-Type'] = value
556 elif name.lower() == 'content-md5':
557 self.metadata['Content-MD5'] = value
558 else:
559 self.metadata[name] = value
560 if name.lower() in Key.base_user_settable_fields:
561 self.__dict__[name.lower().replace('-', '_')] = value
562
563 def update_metadata(self, d):
564 self.metadata.update(d)
565
566 # convenience methods for setting/getting ACL
567 def set_acl(self, acl_str, headers=None):
568 if self.bucket is not None:
569 self.bucket.set_acl(acl_str, self.name, headers=headers)
570
571 def get_acl(self, headers=None):
572 if self.bucket is not None:
573 return self.bucket.get_acl(self.name, headers=headers)
574
575 def get_xml_acl(self, headers=None):
576 if self.bucket is not None:
577 return self.bucket.get_xml_acl(self.name, headers=headers)
578
579 def set_xml_acl(self, acl_str, headers=None):
580 if self.bucket is not None:
581 return self.bucket.set_xml_acl(acl_str, self.name, headers=headers)
582
583 def set_canned_acl(self, acl_str, headers=None):
584 return self.bucket.set_canned_acl(acl_str, self.name, headers)
585
586 def get_redirect(self):
587 """Return the redirect location configured for this key.
588
589 If no redirect is configured (via set_redirect), then None
590 will be returned.
591
592 """
593 response = self.bucket.connection.make_request(
594 'HEAD', self.bucket.name, self.name)
595 if response.status == 200:
596 return response.getheader('x-amz-website-redirect-location')
597 else:
598 raise self.provider.storage_response_error(
599 response.status, response.reason, response.read())
600
601 def set_redirect(self, redirect_location, headers=None):
602 """Configure this key to redirect to another location.
603
604 When the bucket associated with this key is accessed from the website
605 endpoint, a 301 redirect will be issued to the specified
606 `redirect_location`.
607
608 :type redirect_location: string
609 :param redirect_location: The location to redirect.
610
611 """
612 if headers is None:
613 headers = {}
614 else:
615 headers = headers.copy()
616
617 headers['x-amz-website-redirect-location'] = redirect_location
618 response = self.bucket.connection.make_request('PUT', self.bucket.name,
619 self.name, headers)
620 if response.status == 200:
621 return True
622 else:
623 raise self.provider.storage_response_error(
624 response.status, response.reason, response.read())
625
626 def make_public(self, headers=None):
627 return self.bucket.set_canned_acl('public-read', self.name, headers)
628
629 def generate_url(self, expires_in, method='GET', headers=None,
630 query_auth=True, force_http=False, response_headers=None,
631 expires_in_absolute=False, version_id=None,
632 policy=None, reduced_redundancy=False, encrypt_key=False):
633 """
634 Generate a URL to access this key.
635
636 :type expires_in: int
637 :param expires_in: How long the url is valid for, in seconds
638
639 :type method: string
640 :param method: The method to use for retrieving the file
641 (default is GET)
642
643 :type headers: dict
644 :param headers: Any headers to pass along in the request
645
646 :type query_auth: bool
647 :param query_auth:
648
649 :type force_http: bool
650 :param force_http: If True, http will be used instead of https.
651
652 :type response_headers: dict
653 :param response_headers: A dictionary containing HTTP
654 headers/values that will override any headers associated
655 with the stored object in the response. See
656 http://goo.gl/EWOPb for details.
657
658 :type expires_in_absolute: bool
659 :param expires_in_absolute:
660
661 :type version_id: string
662 :param version_id: The version_id of the object to GET. If specified
663 this overrides any value in the key.
664
665 :type policy: :class:`boto.s3.acl.CannedACLStrings`
666 :param policy: A canned ACL policy that will be applied to the
667 new key in S3.
668
669 :type reduced_redundancy: bool
670 :param reduced_redundancy: If True, this will set the storage
671 class of the new Key to be REDUCED_REDUNDANCY. The Reduced
672 Redundancy Storage (RRS) feature of S3, provides lower
673 redundancy at lower storage cost.
674
675 :type encrypt_key: bool
676 :param encrypt_key: If True, the new copy of the object will
677 be encrypted on the server-side by S3 and will be stored
678 in an encrypted form while at rest in S3.
679
680 :rtype: string
681 :return: The URL to access the key
682 """
683 provider = self.bucket.connection.provider
684 version_id = version_id or self.version_id
685 if headers is None:
686 headers = {}
687 else:
688 headers = headers.copy()
689
690 # add headers accordingly (usually PUT case)
691 if policy:
692 headers[provider.acl_header] = policy
693 if reduced_redundancy:
694 self.storage_class = 'REDUCED_REDUNDANCY'
695 if provider.storage_class_header:
696 headers[provider.storage_class_header] = self.storage_class
697 if encrypt_key:
698 headers[provider.server_side_encryption_header] = 'AES256'
699 headers = boto.utils.merge_meta(headers, self.metadata, provider)
700
701 return self.bucket.connection.generate_url(expires_in, method,
702 self.bucket.name, self.name,
703 headers, query_auth,
704 force_http,
705 response_headers,
706 expires_in_absolute,
707 version_id)
708
709 def send_file(self, fp, headers=None, cb=None, num_cb=10,
710 query_args=None, chunked_transfer=False, size=None):
711 """
712 Upload a file to a key into a bucket on S3.
713
714 :type fp: file
715 :param fp: The file pointer to upload. The file pointer must
716 point point at the offset from which you wish to upload.
717 ie. if uploading the full file, it should point at the
718 start of the file. Normally when a file is opened for
719 reading, the fp will point at the first byte. See the
720 bytes parameter below for more info.
721
722 :type headers: dict
723 :param headers: The headers to pass along with the PUT request
724
725 :type num_cb: int
726 :param num_cb: (optional) If a callback is specified with the
727 cb parameter this parameter determines the granularity of
728 the callback by defining the maximum number of times the
729 callback will be called during the file
730 transfer. Providing a negative integer will cause your
731 callback to be called with each buffer read.
732
733 :type query_args: string
734 :param query_args: (optional) Arguments to pass in the query string.
735
736 :type chunked_transfer: boolean
737 :param chunked_transfer: (optional) If true, we use chunked
738 Transfer-Encoding.
739
740 :type size: int
741 :param size: (optional) The Maximum number of bytes to read
742 from the file pointer (fp). This is useful when uploading
743 a file in multiple parts where you are splitting the file
744 up into different ranges to be uploaded. If not specified,
745 the default behaviour is to read all bytes from the file
746 pointer. Less bytes may be available.
747 """
748 self._send_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb,
749 query_args=query_args,
750 chunked_transfer=chunked_transfer, size=size)
751
752 def _send_file_internal(self, fp, headers=None, cb=None, num_cb=10,
753 query_args=None, chunked_transfer=False, size=None,
754 hash_algs=None):
755 provider = self.bucket.connection.provider
756 try:
757 spos = fp.tell()
758 except IOError:
759 spos = None
760 self.read_from_stream = False
761
762 # If hash_algs is unset and the MD5 hasn't already been computed,
763 # default to an MD5 hash_alg to hash the data on-the-fly.
764 if hash_algs is None and not self.md5:
765 hash_algs = {'md5': md5}
766 digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {})
767
768 def sender(http_conn, method, path, data, headers):
769 # This function is called repeatedly for temporary retries
770 # so we must be sure the file pointer is pointing at the
771 # start of the data.
772 if spos is not None and spos != fp.tell():
773 fp.seek(spos)
774 elif spos is None and self.read_from_stream:
775 # if seek is not supported, and we've read from this
776 # stream already, then we need to abort retries to
777 # avoid setting bad data.
778 raise provider.storage_data_error(
779 'Cannot retry failed request. fp does not support seeking.')
780
781 # If the caller explicitly specified host header, tell putrequest
782 # not to add a second host header. Similarly for accept-encoding.
783 skips = {}
784 if boto.utils.find_matching_headers('host', headers):
785 skips['skip_host'] = 1
786 if boto.utils.find_matching_headers('accept-encoding', headers):
787 skips['skip_accept_encoding'] = 1
788 http_conn.putrequest(method, path, **skips)
789 for key in headers:
790 http_conn.putheader(key, headers[key])
791 http_conn.endheaders()
792
793 save_debug = self.bucket.connection.debug
794 self.bucket.connection.debug = 0
795 # If the debuglevel < 4 we don't want to show connection
796 # payload, so turn off HTTP connection-level debug output (to
797 # be restored below).
798 # Use the getattr approach to allow this to work in AppEngine.
799 if getattr(http_conn, 'debuglevel', 0) < 4:
800 http_conn.set_debuglevel(0)
801
802 data_len = 0
803 if cb:
804 if size:
805 cb_size = size
806 elif self.size:
807 cb_size = self.size
808 else:
809 cb_size = 0
810 if chunked_transfer and cb_size == 0:
811 # For chunked Transfer, we call the cb for every 1MB
812 # of data transferred, except when we know size.
813 cb_count = (1024 * 1024) / self.BufferSize
814 elif num_cb > 1:
815 cb_count = int(
816 math.ceil(cb_size / self.BufferSize / (num_cb - 1.0)))
817 elif num_cb < 0:
818 cb_count = -1
819 else:
820 cb_count = 0
821 i = 0
822 cb(data_len, cb_size)
823
824 bytes_togo = size
825 if bytes_togo and bytes_togo < self.BufferSize:
826 chunk = fp.read(bytes_togo)
827 else:
828 chunk = fp.read(self.BufferSize)
829
830 if not isinstance(chunk, bytes):
831 chunk = chunk.encode('utf-8')
832
833 if spos is None:
834 # read at least something from a non-seekable fp.
835 self.read_from_stream = True
836 while chunk:
837 chunk_len = len(chunk)
838 data_len += chunk_len
839 if chunked_transfer:
840 http_conn.send('%x;\r\n' % chunk_len)
841 http_conn.send(chunk)
842 http_conn.send('\r\n')
843 else:
844 http_conn.send(chunk)
845 for alg in digesters:
846 digesters[alg].update(chunk)
847 if bytes_togo:
848 bytes_togo -= chunk_len
849 if bytes_togo <= 0:
850 break
851 if cb:
852 i += 1
853 if i == cb_count or cb_count == -1:
854 cb(data_len, cb_size)
855 i = 0
856 if bytes_togo and bytes_togo < self.BufferSize:
857 chunk = fp.read(bytes_togo)
858 else:
859 chunk = fp.read(self.BufferSize)
860
861 if not isinstance(chunk, bytes):
862 chunk = chunk.encode('utf-8')
863
864 self.size = data_len
865
866 for alg in digesters:
867 self.local_hashes[alg] = digesters[alg].digest()
868
869 if chunked_transfer:
870 http_conn.send('0\r\n')
871 # http_conn.send("Content-MD5: %s\r\n" % self.base64md5)
872 http_conn.send('\r\n')
873
874 if cb and (cb_count <= 1 or i > 0) and data_len > 0:
875 cb(data_len, cb_size)
876
877 http_conn.set_debuglevel(save_debug)
878 self.bucket.connection.debug = save_debug
879 response = http_conn.getresponse()
880 body = response.read()
881
882 if not self.should_retry(response, chunked_transfer):
883 raise provider.storage_response_error(
884 response.status, response.reason, body)
885
886 return response
887
888 if not headers:
889 headers = {}
890 else:
891 headers = headers.copy()
892 # Overwrite user-supplied user-agent.
893 for header in find_matching_headers('User-Agent', headers):
894 del headers[header]
895 headers['User-Agent'] = UserAgent
896 # If storage_class is None, then a user has not explicitly requested
897 # a storage class, so we can assume STANDARD here
898 if self._storage_class not in [None, 'STANDARD']:
899 headers[provider.storage_class_header] = self.storage_class
900 if find_matching_headers('Content-Encoding', headers):
901 self.content_encoding = merge_headers_by_name(
902 'Content-Encoding', headers)
903 if find_matching_headers('Content-Language', headers):
904 self.content_language = merge_headers_by_name(
905 'Content-Language', headers)
906 content_type_headers = find_matching_headers('Content-Type', headers)
907 if content_type_headers:
908 # Some use cases need to suppress sending of the Content-Type
909 # header and depend on the receiving server to set the content
910 # type. This can be achieved by setting headers['Content-Type']
911 # to None when calling this method.
912 if (len(content_type_headers) == 1 and
913 headers[content_type_headers[0]] is None):
914 # Delete null Content-Type value to skip sending that header.
915 del headers[content_type_headers[0]]
916 else:
917 self.content_type = merge_headers_by_name(
918 'Content-Type', headers)
919 elif self.path:
920 self.content_type = mimetypes.guess_type(self.path)[0]
921 if self.content_type is None:
922 self.content_type = self.DefaultContentType
923 headers['Content-Type'] = self.content_type
924 else:
925 headers['Content-Type'] = self.content_type
926 if self.base64md5:
927 headers['Content-MD5'] = self.base64md5
928 if chunked_transfer:
929 headers['Transfer-Encoding'] = 'chunked'
930 #if not self.base64md5:
931 # headers['Trailer'] = "Content-MD5"
932 else:
933 headers['Content-Length'] = str(self.size)
934 # This is terrible. We need a SHA256 of the body for SigV4, but to do
935 # the chunked ``sender`` behavior above, the ``fp`` isn't available to
936 # the auth mechanism (because closures). Detect if it's SigV4 & embelish
937 # while we can before the auth calculations occur.
938 if 'hmac-v4-s3' in self.bucket.connection._required_auth_capability():
939 kwargs = {'fp': fp, 'hash_algorithm': hashlib.sha256}
940 if size is not None:
941 kwargs['size'] = size
942 headers['_sha256'] = compute_hash(**kwargs)[0]
943 headers['Expect'] = '100-Continue'
944 headers = boto.utils.merge_meta(headers, self.metadata, provider)
945 resp = self.bucket.connection.make_request(
946 'PUT',
947 self.bucket.name,
948 self.name,
949 headers,
950 sender=sender,
951 query_args=query_args
952 )
953 self.handle_version_headers(resp, force=True)
954 self.handle_addl_headers(resp.getheaders())
955
956 def should_retry(self, response, chunked_transfer=False):
957 provider = self.bucket.connection.provider
958
959 if not chunked_transfer:
960 if response.status in [500, 503]:
961 # 500 & 503 can be plain retries.
962 return True
963
964 if response.getheader('location'):
965 # If there's a redirect, plain retry.
966 return True
967
968 if 200 <= response.status <= 299:
969 self.etag = response.getheader('etag')
970 md5 = self.md5
971 if isinstance(md5, bytes):
972 md5 = md5.decode('utf-8')
973
974 # If you use customer-provided encryption keys, the ETag value that
975 # Amazon S3 returns in the response will not be the MD5 of the
976 # object.
977 server_side_encryption_customer_algorithm = response.getheader(
978 'x-amz-server-side-encryption-customer-algorithm', None)
979 if server_side_encryption_customer_algorithm is None:
980 if self.etag != '"%s"' % md5:
981 raise provider.storage_data_error(
982 'ETag from S3 did not match computed MD5. '
983 '%s vs. %s' % (self.etag, self.md5))
984
985 return True
986
987 if response.status == 400:
988 # The 400 must be trapped so the retry handler can check to
989 # see if it was a timeout.
990 # If ``RequestTimeout`` is present, we'll retry. Otherwise, bomb
991 # out.
992 body = response.read()
993 err = provider.storage_response_error(
994 response.status,
995 response.reason,
996 body
997 )
998
999 if err.error_code in ['RequestTimeout']:
1000 raise PleaseRetryException(
1001 "Saw %s, retrying" % err.error_code,
1002 response=response
1003 )
1004
1005 return False
1006
1007 def compute_md5(self, fp, size=None):
1008 """
1009 :type fp: file
1010 :param fp: File pointer to the file to MD5 hash. The file
1011 pointer will be reset to the same position before the
1012 method returns.
1013
1014 :type size: int
1015 :param size: (optional) The Maximum number of bytes to read
1016 from the file pointer (fp). This is useful when uploading
1017 a file in multiple parts where the file is being split
1018 in place into different parts. Less bytes may be available.
1019 """
1020 hex_digest, b64_digest, data_size = compute_md5(fp, size=size)
1021 # Returned values are MD5 hash, base64 encoded MD5 hash, and data size.
1022 # The internal implementation of compute_md5() needs to return the
1023 # data size but we don't want to return that value to the external
1024 # caller because it changes the class interface (i.e. it might
1025 # break some code) so we consume the third tuple value here and
1026 # return the remainder of the tuple to the caller, thereby preserving
1027 # the existing interface.
1028 self.size = data_size
1029 return (hex_digest, b64_digest)
1030
1031 def set_contents_from_stream(self, fp, headers=None, replace=True,
1032 cb=None, num_cb=10, policy=None,
1033 reduced_redundancy=False, query_args=None,
1034 size=None):
1035 """
1036 Store an object using the name of the Key object as the key in
1037 cloud and the contents of the data stream pointed to by 'fp' as
1038 the contents.
1039
1040 The stream object is not seekable and total size is not known.
1041 This has the implication that we can't specify the
1042 Content-Size and Content-MD5 in the header. So for huge
1043 uploads, the delay in calculating MD5 is avoided but with a
1044 penalty of inability to verify the integrity of the uploaded
1045 data.
1046
1047 :type fp: file
1048 :param fp: the file whose contents are to be uploaded
1049
1050 :type headers: dict
1051 :param headers: additional HTTP headers to be sent with the
1052 PUT request.
1053
1054 :type replace: bool
1055 :param replace: If this parameter is False, the method will first check
1056 to see if an object exists in the bucket with the same key. If it
1057 does, it won't overwrite it. The default value is True which will
1058 overwrite the object.
1059
1060 :type cb: function
1061 :param cb: a callback function that will be called to report
1062 progress on the upload. The callback should accept two integer
1063 parameters, the first representing the number of bytes that have
1064 been successfully transmitted to GS and the second representing the
1065 total number of bytes that need to be transmitted.
1066
1067 :type num_cb: int
1068 :param num_cb: (optional) If a callback is specified with the
1069 cb parameter, this parameter determines the granularity of
1070 the callback by defining the maximum number of times the
1071 callback will be called during the file transfer.
1072
1073 :type policy: :class:`boto.gs.acl.CannedACLStrings`
1074 :param policy: A canned ACL policy that will be applied to the new key
1075 in GS.
1076
1077 :type reduced_redundancy: bool
1078 :param reduced_redundancy: If True, this will set the storage
1079 class of the new Key to be REDUCED_REDUNDANCY. The Reduced
1080 Redundancy Storage (RRS) feature of S3, provides lower
1081 redundancy at lower storage cost.
1082
1083 :type size: int
1084 :param size: (optional) The Maximum number of bytes to read from
1085 the file pointer (fp). This is useful when uploading a
1086 file in multiple parts where you are splitting the file up
1087 into different ranges to be uploaded. If not specified,
1088 the default behaviour is to read all bytes from the file
1089 pointer. Less bytes may be available.
1090 """
1091
1092 provider = self.bucket.connection.provider
1093 if not provider.supports_chunked_transfer():
1094 raise BotoClientError('%s does not support chunked transfer'
1095 % provider.get_provider_name())
1096
1097 # Name of the Object should be specified explicitly for Streams.
1098 if not self.name or self.name == '':
1099 raise BotoClientError('Cannot determine the destination '
1100 'object name for the given stream')
1101
1102 if headers is None:
1103 headers = {}
1104 if policy:
1105 headers[provider.acl_header] = policy
1106
1107 if reduced_redundancy:
1108 self.storage_class = 'REDUCED_REDUNDANCY'
1109 if provider.storage_class_header:
1110 headers[provider.storage_class_header] = self.storage_class
1111
1112 if self.bucket is not None:
1113 if not replace:
1114 if self.bucket.lookup(self.name):
1115 return
1116 self.send_file(fp, headers, cb, num_cb, query_args,
1117 chunked_transfer=True, size=size)
1118
1119 def set_contents_from_file(self, fp, headers=None, replace=True,
1120 cb=None, num_cb=10, policy=None, md5=None,
1121 reduced_redundancy=False, query_args=None,
1122 encrypt_key=False, size=None, rewind=False):
1123 """
1124 Store an object in S3 using the name of the Key object as the
1125 key in S3 and the contents of the file pointed to by 'fp' as the
1126 contents. The data is read from 'fp' from its current position until
1127 'size' bytes have been read or EOF.
1128
1129 :type fp: file
1130 :param fp: the file whose contents to upload
1131
1132 :type headers: dict
1133 :param headers: Additional HTTP headers that will be sent with
1134 the PUT request.
1135
1136 :type replace: bool
1137 :param replace: If this parameter is False, the method will
1138 first check to see if an object exists in the bucket with
1139 the same key. If it does, it won't overwrite it. The
1140 default value is True which will overwrite the object.
1141
1142 :type cb: function
1143 :param cb: a callback function that will be called to report
1144 progress on the upload. The callback should accept two
1145 integer parameters, the first representing the number of
1146 bytes that have been successfully transmitted to S3 and
1147 the second representing the size of the to be transmitted
1148 object.
1149
1150 :type num_cb: int
1151 :param num_cb: (optional) If a callback is specified with the
1152 cb parameter this parameter determines the granularity of
1153 the callback by defining the maximum number of times the
1154 callback will be called during the file transfer.
1155
1156 :type policy: :class:`boto.s3.acl.CannedACLStrings`
1157 :param policy: A canned ACL policy that will be applied to the
1158 new key in S3.
1159
1160 :type md5: A tuple containing the hexdigest version of the MD5
1161 checksum of the file as the first element and the
1162 Base64-encoded version of the plain checksum as the second
1163 element. This is the same format returned by the
1164 compute_md5 method.
1165 :param md5: If you need to compute the MD5 for any reason
1166 prior to upload, it's silly to have to do it twice so this
1167 param, if present, will be used as the MD5 values of the
1168 file. Otherwise, the checksum will be computed.
1169
1170 :type reduced_redundancy: bool
1171 :param reduced_redundancy: If True, this will set the storage
1172 class of the new Key to be REDUCED_REDUNDANCY. The Reduced
1173 Redundancy Storage (RRS) feature of S3, provides lower
1174 redundancy at lower storage cost.
1175
1176 :type encrypt_key: bool
1177 :param encrypt_key: If True, the new copy of the object will
1178 be encrypted on the server-side by S3 and will be stored
1179 in an encrypted form while at rest in S3.
1180
1181 :type size: int
1182 :param size: (optional) The Maximum number of bytes to read
1183 from the file pointer (fp). This is useful when uploading
1184 a file in multiple parts where you are splitting the file
1185 up into different ranges to be uploaded. If not specified,
1186 the default behaviour is to read all bytes from the file
1187 pointer. Less bytes may be available.
1188
1189 :type rewind: bool
1190 :param rewind: (optional) If True, the file pointer (fp) will
1191 be rewound to the start before any bytes are read from
1192 it. The default behaviour is False which reads from the
1193 current position of the file pointer (fp).
1194
1195 :rtype: int
1196 :return: The number of bytes written to the key.
1197 """
1198 provider = self.bucket.connection.provider
1199 headers = headers or {}
1200 if policy:
1201 headers[provider.acl_header] = policy
1202 if encrypt_key:
1203 headers[provider.server_side_encryption_header] = 'AES256'
1204
1205 if rewind:
1206 # caller requests reading from beginning of fp.
1207 fp.seek(0, os.SEEK_SET)
1208 else:
1209 # The following seek/tell/seek logic is intended
1210 # to detect applications using the older interface to
1211 # set_contents_from_file(), which automatically rewound the
1212 # file each time the Key was reused. This changed with commit
1213 # 14ee2d03f4665fe20d19a85286f78d39d924237e, to support uploads
1214 # split into multiple parts and uploaded in parallel, and at
1215 # the time of that commit this check was added because otherwise
1216 # older programs would get a success status and upload an empty
1217 # object. Unfortuantely, it's very inefficient for fp's implemented
1218 # by KeyFile (used, for example, by gsutil when copying between
1219 # providers). So, we skip the check for the KeyFile case.
1220 # TODO: At some point consider removing this seek/tell/seek
1221 # logic, after enough time has passed that it's unlikely any
1222 # programs remain that assume the older auto-rewind interface.
1223 if not isinstance(fp, KeyFile):
1224 spos = fp.tell()
1225 fp.seek(0, os.SEEK_END)
1226 if fp.tell() == spos:
1227 fp.seek(0, os.SEEK_SET)
1228 if fp.tell() != spos:
1229 # Raise an exception as this is likely a programming
1230 # error whereby there is data before the fp but nothing
1231 # after it.
1232 fp.seek(spos)
1233 raise AttributeError('fp is at EOF. Use rewind option '
1234 'or seek() to data start.')
1235 # seek back to the correct position.
1236 fp.seek(spos)
1237
1238 if reduced_redundancy:
1239 self.storage_class = 'REDUCED_REDUNDANCY'
1240 if provider.storage_class_header:
1241 headers[provider.storage_class_header] = self.storage_class
1242 # TODO - What if provider doesn't support reduced reduncancy?
1243 # What if different providers provide different classes?
1244 if hasattr(fp, 'name'):
1245 self.path = fp.name
1246 if self.bucket is not None:
1247 if not md5 and provider.supports_chunked_transfer():
1248 # defer md5 calculation to on the fly and
1249 # we don't know anything about size yet.
1250 chunked_transfer = True
1251 self.size = None
1252 else:
1253 chunked_transfer = False
1254 if isinstance(fp, KeyFile):
1255 # Avoid EOF seek for KeyFile case as it's very inefficient.
1256 key = fp.getkey()
1257 size = key.size - fp.tell()
1258 self.size = size
1259 # At present both GCS and S3 use MD5 for the etag for
1260 # non-multipart-uploaded objects. If the etag is 32 hex
1261 # chars use it as an MD5, to avoid having to read the file
1262 # twice while transferring.
1263 if (re.match('^"[a-fA-F0-9]{32}"$', key.etag)):
1264 etag = key.etag.strip('"')
1265 md5 = (etag, base64.b64encode(binascii.unhexlify(etag)))
1266 if not md5:
1267 # compute_md5() and also set self.size to actual
1268 # size of the bytes read computing the md5.
1269 md5 = self.compute_md5(fp, size)
1270 # adjust size if required
1271 size = self.size
1272 elif size:
1273 self.size = size
1274 else:
1275 # If md5 is provided, still need to size so
1276 # calculate based on bytes to end of content
1277 spos = fp.tell()
1278 fp.seek(0, os.SEEK_END)
1279 self.size = fp.tell() - spos
1280 fp.seek(spos)
1281 size = self.size
1282 self.md5 = md5[0]
1283 self.base64md5 = md5[1]
1284
1285 if self.name is None:
1286 self.name = self.md5
1287 if not replace:
1288 if self.bucket.lookup(self.name):
1289 return
1290
1291 self.send_file(fp, headers=headers, cb=cb, num_cb=num_cb,
1292 query_args=query_args,
1293 chunked_transfer=chunked_transfer, size=size)
1294 # return number of bytes written.
1295 return self.size
1296
1297 def set_contents_from_filename(self, filename, headers=None, replace=True,
1298 cb=None, num_cb=10, policy=None, md5=None,
1299 reduced_redundancy=False,
1300 encrypt_key=False):
1301 """
1302 Store an object in S3 using the name of the Key object as the
1303 key in S3 and the contents of the file named by 'filename'.
1304 See set_contents_from_file method for details about the
1305 parameters.
1306
1307 :type filename: string
1308 :param filename: The name of the file that you want to put onto S3
1309
1310 :type headers: dict
1311 :param headers: Additional headers to pass along with the
1312 request to AWS.
1313
1314 :type replace: bool
1315 :param replace: If True, replaces the contents of the file
1316 if it already exists.
1317
1318 :type cb: function
1319 :param cb: a callback function that will be called to report
1320 progress on the upload. The callback should accept two
1321 integer parameters, the first representing the number of
1322 bytes that have been successfully transmitted to S3 and
1323 the second representing the size of the to be transmitted
1324 object.
1325
1326 :type cb: int
1327 :param num_cb: (optional) If a callback is specified with the
1328 cb parameter this parameter determines the granularity of
1329 the callback by defining the maximum number of times the
1330 callback will be called during the file transfer.
1331
1332 :type policy: :class:`boto.s3.acl.CannedACLStrings`
1333 :param policy: A canned ACL policy that will be applied to the
1334 new key in S3.
1335
1336 :type md5: A tuple containing the hexdigest version of the MD5
1337 checksum of the file as the first element and the
1338 Base64-encoded version of the plain checksum as the second
1339 element. This is the same format returned by the
1340 compute_md5 method.
1341 :param md5: If you need to compute the MD5 for any reason
1342 prior to upload, it's silly to have to do it twice so this
1343 param, if present, will be used as the MD5 values of the
1344 file. Otherwise, the checksum will be computed.
1345
1346 :type reduced_redundancy: bool
1347 :param reduced_redundancy: If True, this will set the storage
1348 class of the new Key to be REDUCED_REDUNDANCY. The Reduced
1349 Redundancy Storage (RRS) feature of S3, provides lower
1350 redundancy at lower storage cost. :type encrypt_key: bool
1351 :param encrypt_key: If True, the new copy of the object
1352 will be encrypted on the server-side by S3 and will be
1353 stored in an encrypted form while at rest in S3.
1354
1355 :rtype: int
1356 :return: The number of bytes written to the key.
1357 """
1358 with open(filename, 'rb') as fp:
1359 return self.set_contents_from_file(fp, headers, replace, cb,
1360 num_cb, policy, md5,
1361 reduced_redundancy,
1362 encrypt_key=encrypt_key)
1363
1364 def set_contents_from_string(self, string_data, headers=None, replace=True,
1365 cb=None, num_cb=10, policy=None, md5=None,
1366 reduced_redundancy=False,
1367 encrypt_key=False):
1368 """
1369 Store an object in S3 using the name of the Key object as the
1370 key in S3 and the string 's' as the contents.
1371 See set_contents_from_file method for details about the
1372 parameters.
1373
1374 :type headers: dict
1375 :param headers: Additional headers to pass along with the
1376 request to AWS.
1377
1378 :type replace: bool
1379 :param replace: If True, replaces the contents of the file if
1380 it already exists.
1381
1382 :type cb: function
1383 :param cb: a callback function that will be called to report
1384 progress on the upload. The callback should accept two
1385 integer parameters, the first representing the number of
1386 bytes that have been successfully transmitted to S3 and
1387 the second representing the size of the to be transmitted
1388 object.
1389
1390 :type cb: int
1391 :param num_cb: (optional) If a callback is specified with the
1392 cb parameter this parameter determines the granularity of
1393 the callback by defining the maximum number of times the
1394 callback will be called during the file transfer.
1395
1396 :type policy: :class:`boto.s3.acl.CannedACLStrings`
1397 :param policy: A canned ACL policy that will be applied to the
1398 new key in S3.
1399
1400 :type md5: A tuple containing the hexdigest version of the MD5
1401 checksum of the file as the first element and the
1402 Base64-encoded version of the plain checksum as the second
1403 element. This is the same format returned by the
1404 compute_md5 method.
1405 :param md5: If you need to compute the MD5 for any reason
1406 prior to upload, it's silly to have to do it twice so this
1407 param, if present, will be used as the MD5 values of the
1408 file. Otherwise, the checksum will be computed.
1409
1410 :type reduced_redundancy: bool
1411 :param reduced_redundancy: If True, this will set the storage
1412 class of the new Key to be REDUCED_REDUNDANCY. The Reduced
1413 Redundancy Storage (RRS) feature of S3, provides lower
1414 redundancy at lower storage cost.
1415
1416 :type encrypt_key: bool
1417 :param encrypt_key: If True, the new copy of the object will
1418 be encrypted on the server-side by S3 and will be stored
1419 in an encrypted form while at rest in S3.
1420 """
1421 if not isinstance(string_data, bytes):
1422 string_data = string_data.encode("utf-8")
1423 fp = BytesIO(string_data)
1424 r = self.set_contents_from_file(fp, headers, replace, cb, num_cb,
1425 policy, md5, reduced_redundancy,
1426 encrypt_key=encrypt_key)
1427 fp.close()
1428 return r
1429
1430 def get_file(self, fp, headers=None, cb=None, num_cb=10,
1431 torrent=False, version_id=None, override_num_retries=None,
1432 response_headers=None):
1433 """
1434 Retrieves a file from an S3 Key
1435
1436 :type fp: file
1437 :param fp: File pointer to put the data into
1438
1439 :type headers: string
1440 :param: headers to send when retrieving the files
1441
1442 :type cb: function
1443 :param cb: a callback function that will be called to report
1444 progress on the upload. The callback should accept two
1445 integer parameters, the first representing the number of
1446 bytes that have been successfully transmitted to S3 and
1447 the second representing the size of the to be transmitted
1448 object.
1449
1450 :type cb: int
1451 :param num_cb: (optional) If a callback is specified with the
1452 cb parameter this parameter determines the granularity of
1453 the callback by defining the maximum number of times the
1454 callback will be called during the file transfer.
1455
1456 :type torrent: bool
1457 :param torrent: Flag for whether to get a torrent for the file
1458
1459 :type override_num_retries: int
1460 :param override_num_retries: If not None will override configured
1461 num_retries parameter for underlying GET.
1462
1463 :type response_headers: dict
1464 :param response_headers: A dictionary containing HTTP
1465 headers/values that will override any headers associated
1466 with the stored object in the response. See
1467 http://goo.gl/EWOPb for details.
1468
1469 :type version_id: str
1470 :param version_id: The ID of a particular version of the object.
1471 If this parameter is not supplied but the Key object has
1472 a ``version_id`` attribute, that value will be used when
1473 retrieving the object. You can set the Key object's
1474 ``version_id`` attribute to None to always grab the latest
1475 version from a version-enabled bucket.
1476 """
1477 self._get_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb,
1478 torrent=torrent, version_id=version_id,
1479 override_num_retries=override_num_retries,
1480 response_headers=response_headers,
1481 hash_algs=None,
1482 query_args=None)
1483
1484 def _get_file_internal(self, fp, headers=None, cb=None, num_cb=10,
1485 torrent=False, version_id=None, override_num_retries=None,
1486 response_headers=None, hash_algs=None, query_args=None):
1487 if headers is None:
1488 headers = {}
1489 save_debug = self.bucket.connection.debug
1490 if self.bucket.connection.debug == 1:
1491 self.bucket.connection.debug = 0
1492
1493 query_args = query_args or []
1494 if torrent:
1495 query_args.append('torrent')
1496
1497 if hash_algs is None and not torrent:
1498 hash_algs = {'md5': md5}
1499 digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {})
1500
1501 # If a version_id is passed in, use that. If not, check to see
1502 # if the Key object has an explicit version_id and, if so, use that.
1503 # Otherwise, don't pass a version_id query param.
1504 if version_id is None:
1505 version_id = self.version_id
1506 if version_id:
1507 query_args.append('versionId=%s' % version_id)
1508 if response_headers:
1509 for key in response_headers:
1510 query_args.append('%s=%s' % (
1511 key, urllib.parse.quote(response_headers[key])))
1512 query_args = '&'.join(query_args)
1513 self.open('r', headers, query_args=query_args,
1514 override_num_retries=override_num_retries)
1515
1516 data_len = 0
1517 if cb:
1518 if self.size is None:
1519 cb_size = 0
1520 else:
1521 cb_size = self.size
1522 if self.size is None and num_cb != -1:
1523 # If size is not available due to chunked transfer for example,
1524 # we'll call the cb for every 1MB of data transferred.
1525 cb_count = (1024 * 1024) / self.BufferSize
1526 elif num_cb > 1:
1527 cb_count = int(math.ceil(cb_size/self.BufferSize/(num_cb-1.0)))
1528 elif num_cb < 0:
1529 cb_count = -1
1530 else:
1531 cb_count = 0
1532 i = 0
1533 cb(data_len, cb_size)
1534 try:
1535 for bytes in self:
1536 fp.write(bytes)
1537 data_len += len(bytes)
1538 for alg in digesters:
1539 digesters[alg].update(bytes)
1540 if cb:
1541 if cb_size > 0 and data_len >= cb_size:
1542 break
1543 i += 1
1544 if i == cb_count or cb_count == -1:
1545 cb(data_len, cb_size)
1546 i = 0
1547 except IOError as e:
1548 if e.errno == errno.ENOSPC:
1549 raise StorageDataError('Out of space for destination file '
1550 '%s' % fp.name)
1551 raise
1552 if cb and (cb_count <= 1 or i > 0) and data_len > 0:
1553 cb(data_len, cb_size)
1554 for alg in digesters:
1555 self.local_hashes[alg] = digesters[alg].digest()
1556 if self.size is None and not torrent and "Range" not in headers:
1557 self.size = data_len
1558 self.close()
1559 self.bucket.connection.debug = save_debug
1560
1561 def get_torrent_file(self, fp, headers=None, cb=None, num_cb=10):
1562 """
1563 Get a torrent file (see to get_file)
1564
1565 :type fp: file
1566 :param fp: The file pointer of where to put the torrent
1567
1568 :type headers: dict
1569 :param headers: Headers to be passed
1570
1571 :type cb: function
1572 :param cb: a callback function that will be called to report
1573 progress on the upload. The callback should accept two
1574 integer parameters, the first representing the number of
1575 bytes that have been successfully transmitted to S3 and
1576 the second representing the size of the to be transmitted
1577 object.
1578
1579 :type cb: int
1580 :param num_cb: (optional) If a callback is specified with the
1581 cb parameter this parameter determines the granularity of
1582 the callback by defining the maximum number of times the
1583 callback will be called during the file transfer.
1584
1585 """
1586 return self.get_file(fp, headers, cb, num_cb, torrent=True)
1587
1588 def get_contents_to_file(self, fp, headers=None,
1589 cb=None, num_cb=10,
1590 torrent=False,
1591 version_id=None,
1592 res_download_handler=None,
1593 response_headers=None):
1594 """
1595 Retrieve an object from S3 using the name of the Key object as the
1596 key in S3. Write the contents of the object to the file pointed
1597 to by 'fp'.
1598
1599 :type fp: File -like object
1600 :param fp:
1601
1602 :type headers: dict
1603 :param headers: additional HTTP headers that will be sent with
1604 the GET request.
1605
1606 :type cb: function
1607 :param cb: a callback function that will be called to report
1608 progress on the upload. The callback should accept two
1609 integer parameters, the first representing the number of
1610 bytes that have been successfully transmitted to S3 and
1611 the second representing the size of the to be transmitted
1612 object.
1613
1614 :type cb: int
1615 :param num_cb: (optional) If a callback is specified with the
1616 cb parameter this parameter determines the granularity of
1617 the callback by defining the maximum number of times the
1618 callback will be called during the file transfer.
1619
1620 :type torrent: bool
1621 :param torrent: If True, returns the contents of a torrent
1622 file as a string.
1623
1624 :type res_upload_handler: ResumableDownloadHandler
1625 :param res_download_handler: If provided, this handler will
1626 perform the download.
1627
1628 :type response_headers: dict
1629 :param response_headers: A dictionary containing HTTP
1630 headers/values that will override any headers associated
1631 with the stored object in the response. See
1632 http://goo.gl/EWOPb for details.
1633
1634 :type version_id: str
1635 :param version_id: The ID of a particular version of the object.
1636 If this parameter is not supplied but the Key object has
1637 a ``version_id`` attribute, that value will be used when
1638 retrieving the object. You can set the Key object's
1639 ``version_id`` attribute to None to always grab the latest
1640 version from a version-enabled bucket.
1641 """
1642 if self.bucket is not None:
1643 if res_download_handler:
1644 res_download_handler.get_file(self, fp, headers, cb, num_cb,
1645 torrent=torrent,
1646 version_id=version_id)
1647 else:
1648 self.get_file(fp, headers, cb, num_cb, torrent=torrent,
1649 version_id=version_id,
1650 response_headers=response_headers)
1651
1652 def get_contents_to_filename(self, filename, headers=None,
1653 cb=None, num_cb=10,
1654 torrent=False,
1655 version_id=None,
1656 res_download_handler=None,
1657 response_headers=None):
1658 """
1659 Retrieve an object from S3 using the name of the Key object as the
1660 key in S3. Store contents of the object to a file named by 'filename'.
1661 See get_contents_to_file method for details about the
1662 parameters.
1663
1664 :type filename: string
1665 :param filename: The filename of where to put the file contents
1666
1667 :type headers: dict
1668 :param headers: Any additional headers to send in the request
1669
1670 :type cb: function
1671 :param cb: a callback function that will be called to report
1672 progress on the upload. The callback should accept two
1673 integer parameters, the first representing the number of
1674 bytes that have been successfully transmitted to S3 and
1675 the second representing the size of the to be transmitted
1676 object.
1677
1678 :type num_cb: int
1679 :param num_cb: (optional) If a callback is specified with the
1680 cb parameter this parameter determines the granularity of
1681 the callback by defining the maximum number of times the
1682 callback will be called during the file transfer.
1683
1684 :type torrent: bool
1685 :param torrent: If True, returns the contents of a torrent file
1686 as a string.
1687
1688 :type res_upload_handler: ResumableDownloadHandler
1689 :param res_download_handler: If provided, this handler will
1690 perform the download.
1691
1692 :type response_headers: dict
1693 :param response_headers: A dictionary containing HTTP
1694 headers/values that will override any headers associated
1695 with the stored object in the response. See
1696 http://goo.gl/EWOPb for details.
1697
1698 :type version_id: str
1699 :param version_id: The ID of a particular version of the object.
1700 If this parameter is not supplied but the Key object has
1701 a ``version_id`` attribute, that value will be used when
1702 retrieving the object. You can set the Key object's
1703 ``version_id`` attribute to None to always grab the latest
1704 version from a version-enabled bucket.
1705 """
1706 try:
1707 with open(filename, 'wb') as fp:
1708 self.get_contents_to_file(fp, headers, cb, num_cb,
1709 torrent=torrent,
1710 version_id=version_id,
1711 res_download_handler=res_download_handler,
1712 response_headers=response_headers)
1713 except Exception:
1714 os.remove(filename)
1715 raise
1716 # if last_modified date was sent from s3, try to set file's timestamp
1717 if self.last_modified is not None:
1718 try:
1719 modified_tuple = email.utils.parsedate_tz(self.last_modified)
1720 modified_stamp = int(email.utils.mktime_tz(modified_tuple))
1721 os.utime(fp.name, (modified_stamp, modified_stamp))
1722 except Exception:
1723 pass
1724
1725 def get_contents_as_string(self, headers=None,
1726 cb=None, num_cb=10,
1727 torrent=False,
1728 version_id=None,
1729 response_headers=None, encoding=None):
1730 """
1731 Retrieve an object from S3 using the name of the Key object as the
1732 key in S3. Return the contents of the object as a string.
1733 See get_contents_to_file method for details about the
1734 parameters.
1735
1736 :type headers: dict
1737 :param headers: Any additional headers to send in the request
1738
1739 :type cb: function
1740 :param cb: a callback function that will be called to report
1741 progress on the upload. The callback should accept two
1742 integer parameters, the first representing the number of
1743 bytes that have been successfully transmitted to S3 and
1744 the second representing the size of the to be transmitted
1745 object.
1746
1747 :type cb: int
1748 :param num_cb: (optional) If a callback is specified with the
1749 cb parameter this parameter determines the granularity of
1750 the callback by defining the maximum number of times the
1751 callback will be called during the file transfer.
1752
1753 :type torrent: bool
1754 :param torrent: If True, returns the contents of a torrent file
1755 as a string.
1756
1757 :type response_headers: dict
1758 :param response_headers: A dictionary containing HTTP
1759 headers/values that will override any headers associated
1760 with the stored object in the response. See
1761 http://goo.gl/EWOPb for details.
1762
1763 :type version_id: str
1764 :param version_id: The ID of a particular version of the object.
1765 If this parameter is not supplied but the Key object has
1766 a ``version_id`` attribute, that value will be used when
1767 retrieving the object. You can set the Key object's
1768 ``version_id`` attribute to None to always grab the latest
1769 version from a version-enabled bucket.
1770
1771 :type encoding: str
1772 :param encoding: The text encoding to use, such as ``utf-8``
1773 or ``iso-8859-1``. If set, then a string will be returned.
1774 Defaults to ``None`` and returns bytes.
1775
1776 :rtype: bytes or str
1777 :returns: The contents of the file as bytes or a string
1778 """
1779 fp = BytesIO()
1780 self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent,
1781 version_id=version_id,
1782 response_headers=response_headers)
1783 value = fp.getvalue()
1784
1785 if encoding is not None:
1786 value = value.decode(encoding)
1787
1788 return value
1789
1790 def add_email_grant(self, permission, email_address, headers=None):
1791 """
1792 Convenience method that provides a quick way to add an email grant
1793 to a key. This method retrieves the current ACL, creates a new
1794 grant based on the parameters passed in, adds that grant to the ACL
1795 and then PUT's the new ACL back to S3.
1796
1797 :type permission: string
1798 :param permission: The permission being granted. Should be one of:
1799 (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL).
1800
1801 :type email_address: string
1802 :param email_address: The email address associated with the AWS
1803 account your are granting the permission to.
1804
1805 :type recursive: boolean
1806 :param recursive: A boolean value to controls whether the
1807 command will apply the grant to all keys within the bucket
1808 or not. The default value is False. By passing a True
1809 value, the call will iterate through all keys in the
1810 bucket and apply the same grant to each key. CAUTION: If
1811 you have a lot of keys, this could take a long time!
1812 """
1813 policy = self.get_acl(headers=headers)
1814 policy.acl.add_email_grant(permission, email_address)
1815 self.set_acl(policy, headers=headers)
1816
1817 def add_user_grant(self, permission, user_id, headers=None,
1818 display_name=None):
1819 """
1820 Convenience method that provides a quick way to add a canonical
1821 user grant to a key. This method retrieves the current ACL,
1822 creates a new grant based on the parameters passed in, adds that
1823 grant to the ACL and then PUT's the new ACL back to S3.
1824
1825 :type permission: string
1826 :param permission: The permission being granted. Should be one of:
1827 (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL).
1828
1829 :type user_id: string
1830 :param user_id: The canonical user id associated with the AWS
1831 account your are granting the permission to.
1832
1833 :type display_name: string
1834 :param display_name: An option string containing the user's
1835 Display Name. Only required on Walrus.
1836 """
1837 policy = self.get_acl(headers=headers)
1838 policy.acl.add_user_grant(permission, user_id,
1839 display_name=display_name)
1840 self.set_acl(policy, headers=headers)
1841
1842 def _normalize_metadata(self, metadata):
1843 if type(metadata) == set:
1844 norm_metadata = set()
1845 for k in metadata:
1846 norm_metadata.add(k.lower())
1847 else:
1848 norm_metadata = {}
1849 for k in metadata:
1850 norm_metadata[k.lower()] = metadata[k]
1851 return norm_metadata
1852
1853 def _get_remote_metadata(self, headers=None):
1854 """
1855 Extracts metadata from existing URI into a dict, so we can
1856 overwrite/delete from it to form the new set of metadata to apply to a
1857 key.
1858 """
1859 metadata = {}
1860 for underscore_name in self._underscore_base_user_settable_fields:
1861 if hasattr(self, underscore_name):
1862 value = getattr(self, underscore_name)
1863 if value:
1864 # Generate HTTP field name corresponding to "_" named field.
1865 field_name = underscore_name.replace('_', '-')
1866 metadata[field_name.lower()] = value
1867 # self.metadata contains custom metadata, which are all user-settable.
1868 prefix = self.provider.metadata_prefix
1869 for underscore_name in self.metadata:
1870 field_name = underscore_name.replace('_', '-')
1871 metadata['%s%s' % (prefix, field_name.lower())] = (
1872 self.metadata[underscore_name])
1873 return metadata
1874
1875 def set_remote_metadata(self, metadata_plus, metadata_minus, preserve_acl,
1876 headers=None):
1877 metadata_plus = self._normalize_metadata(metadata_plus)
1878 metadata_minus = self._normalize_metadata(metadata_minus)
1879 metadata = self._get_remote_metadata()
1880 metadata.update(metadata_plus)
1881 for h in metadata_minus:
1882 if h in metadata:
1883 del metadata[h]
1884 src_bucket = self.bucket
1885 # Boto prepends the meta prefix when adding headers, so strip prefix in
1886 # metadata before sending back in to copy_key() call.
1887 rewritten_metadata = {}
1888 for h in metadata:
1889 if (h.startswith('x-goog-meta-') or h.startswith('x-amz-meta-')):
1890 rewritten_h = (h.replace('x-goog-meta-', '')
1891 .replace('x-amz-meta-', ''))
1892 else:
1893 rewritten_h = h
1894 rewritten_metadata[rewritten_h] = metadata[h]
1895 metadata = rewritten_metadata
1896 src_bucket.copy_key(self.name, self.bucket.name, self.name,
1897 metadata=metadata, preserve_acl=preserve_acl,
1898 headers=headers)
1899
1900 def restore(self, days, headers=None):
1901 """Restore an object from an archive.
1902
1903 :type days: int
1904 :param days: The lifetime of the restored object (must
1905 be at least 1 day). If the object is already restored
1906 then this parameter can be used to readjust the lifetime
1907 of the restored object. In this case, the days
1908 param is with respect to the initial time of the request.
1909 If the object has not been restored, this param is with
1910 respect to the completion time of the request.
1911
1912 """
1913 response = self.bucket.connection.make_request(
1914 'POST', self.bucket.name, self.name,
1915 data=self.RestoreBody % days,
1916 headers=headers, query_args='restore')
1917 if response.status not in (200, 202):
1918 provider = self.bucket.connection.provider
1919 raise provider.storage_response_error(response.status,
1920 response.reason,
1921 response.read())