Mercurial > repos > bcclaywell > argo_navis
comparison venv/lib/python2.7/site-packages/requests_toolbelt/multipart/encoder.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author | bcclaywell |
---|---|
date | Mon, 12 Oct 2015 17:43:33 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d67268158946 |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """ | |
3 | |
4 requests_toolbelt.multipart.encoder | |
5 =================================== | |
6 | |
7 This holds all of the implementation details of the MultipartEncoder | |
8 | |
9 """ | |
10 | |
11 from requests.utils import super_len | |
12 from requests.packages.urllib3 import fields | |
13 from uuid import uuid4 | |
14 | |
15 import contextlib | |
16 import io | |
17 | |
18 | |
19 class MultipartEncoder(object): | |
20 | |
21 """ | |
22 | |
23 The ``MultipartEncoder`` oject is a generic interface to the engine that | |
24 will create a ``multipart/form-data`` body for you. | |
25 | |
26 The basic usage is: | |
27 | |
28 .. code-block:: python | |
29 | |
30 import requests | |
31 from requests_toolbelt import MultipartEncoder | |
32 | |
33 encoder = MultipartEncoder({'field': 'value', | |
34 'other_field', 'other_value'}) | |
35 r = requests.post('https://httpbin.org/post', data=encoder, | |
36 headers={'Content-Type': encoder.content_type}) | |
37 | |
38 If you do not need to take advantage of streaming the post body, you can | |
39 also do: | |
40 | |
41 .. code-block:: python | |
42 | |
43 r = requests.post('https://httpbin.org/post', | |
44 data=encoder.to_string(), | |
45 headers={'Content-Type': encoder.content_type}) | |
46 | |
47 If you want the encoder to use a specific order, you can use an | |
48 OrderedDict or more simply, a list of tuples: | |
49 | |
50 .. code-block:: python | |
51 | |
52 encoder = MultipartEncoder([('field', 'value'), | |
53 ('other_field', 'other_value')]) | |
54 | |
55 .. versionchanged:: 0.4.0 | |
56 | |
57 You can also provide tuples as part values as you would provide them to | |
58 requests' ``files`` parameter. | |
59 | |
60 .. code-block:: python | |
61 | |
62 encoder = MultipartEncoder({ | |
63 'field': ('file_name', b'{"a": "b"}', 'application/json', | |
64 {'X-My-Header': 'my-value'}) | |
65 ]) | |
66 | |
67 .. warning:: | |
68 | |
69 This object will end up directly in :mod:`httplib`. Currently, | |
70 :mod:`httplib` has a hard-coded read size of **8192 bytes**. This | |
71 means that it will loop until the file has been read and your upload | |
72 could take a while. This is **not** a bug in requests. A feature is | |
73 being considered for this object to allow you, the user, to specify | |
74 what size should be returned on a read. If you have opinions on this, | |
75 please weigh in on `this issue`_. | |
76 | |
77 .. _this issue: | |
78 https://github.com/sigmavirus24/requests-toolbelt/issues/75 | |
79 | |
80 """ | |
81 | |
82 def __init__(self, fields, boundary=None, encoding='utf-8'): | |
83 #: Boundary value either passed in by the user or created | |
84 self.boundary_value = boundary or uuid4().hex | |
85 | |
86 # Computed boundary | |
87 self.boundary = '--{0}'.format(self.boundary_value) | |
88 | |
89 #: Encoding of the data being passed in | |
90 self.encoding = encoding | |
91 | |
92 # Pre-encoded boundary | |
93 self._encoded_boundary = b''.join([ | |
94 encode_with(self.boundary, self.encoding), | |
95 encode_with('\r\n', self.encoding) | |
96 ]) | |
97 | |
98 #: Fields provided by the user | |
99 self.fields = fields | |
100 | |
101 #: Whether or not the encoder is finished | |
102 self.finished = False | |
103 | |
104 #: Pre-computed parts of the upload | |
105 self.parts = [] | |
106 | |
107 # Pre-computed parts iterator | |
108 self._iter_parts = iter([]) | |
109 | |
110 # The part we're currently working with | |
111 self._current_part = None | |
112 | |
113 # Cached computation of the body's length | |
114 self._len = None | |
115 | |
116 # Our buffer | |
117 self._buffer = CustomBytesIO(encoding=encoding) | |
118 | |
119 # Pre-compute each part's headers | |
120 self._prepare_parts() | |
121 | |
122 # Load boundary into buffer | |
123 self._write_boundary() | |
124 | |
125 @property | |
126 def len(self): | |
127 """Length of the multipart/form-data body. | |
128 | |
129 requests will first attempt to get the length of the body by calling | |
130 ``len(body)`` and then by checking for the ``len`` attribute. | |
131 | |
132 On 32-bit systems, the ``__len__`` method cannot return anything | |
133 larger than an integer (in C) can hold. If the total size of the body | |
134 is even slightly larger than 4GB users will see an OverflowError. This | |
135 manifested itself in `bug #80`_. | |
136 | |
137 As such, we now calculate the length lazily as a property. | |
138 | |
139 .. _bug #80: | |
140 https://github.com/sigmavirus24/requests-toolbelt/issues/80 | |
141 """ | |
142 # If _len isn't already calculated, calculate, return, and set it | |
143 return self._len or self._calculate_length() | |
144 | |
145 def __repr__(self): | |
146 return '<MultipartEncoder: {0!r}>'.format(self.fields) | |
147 | |
148 def _calculate_length(self): | |
149 """ | |
150 This uses the parts to calculate the length of the body. | |
151 | |
152 This returns the calculated length so __len__ can be lazy. | |
153 """ | |
154 boundary_len = len(self.boundary) # Length of --{boundary} | |
155 # boundary length + header length + body length + len('\r\n') * 2 | |
156 self._len = sum( | |
157 (boundary_len + super_len(p) + 4) for p in self.parts | |
158 ) + boundary_len + 4 | |
159 return self._len | |
160 | |
161 def _calculate_load_amount(self, read_size): | |
162 """This calculates how many bytes need to be added to the buffer. | |
163 | |
164 When a consumer read's ``x`` from the buffer, there are two cases to | |
165 satisfy: | |
166 | |
167 1. Enough data in the buffer to return the requested amount | |
168 2. Not enough data | |
169 | |
170 This function uses the amount of unread bytes in the buffer and | |
171 determines how much the Encoder has to load before it can return the | |
172 requested amount of bytes. | |
173 | |
174 :param int read_size: the number of bytes the consumer requests | |
175 :returns: int -- the number of bytes that must be loaded into the | |
176 buffer before the read can be satisfied. This will be strictly | |
177 non-negative | |
178 """ | |
179 amount = read_size - super_len(self._buffer) | |
180 return amount if amount > 0 else 0 | |
181 | |
182 def _load(self, amount): | |
183 """Load ``amount`` number of bytes into the buffer.""" | |
184 self._buffer.smart_truncate() | |
185 part = self._current_part or self._next_part() | |
186 while amount == -1 or amount > 0: | |
187 written = 0 | |
188 if not part.bytes_left_to_write(): | |
189 written += self._write(b'\r\n') | |
190 written += self._write_boundary() | |
191 part = self._next_part() | |
192 | |
193 if not part: | |
194 written += self._write_closing_boundary() | |
195 self.finished = True | |
196 break | |
197 | |
198 written += part.write_to(self._buffer, amount) | |
199 | |
200 if amount != -1: | |
201 amount -= written | |
202 | |
203 def _next_part(self): | |
204 try: | |
205 p = self._current_part = next(self._iter_parts) | |
206 except StopIteration: | |
207 p = None | |
208 return p | |
209 | |
210 def _iter_fields(self): | |
211 _fields = self.fields | |
212 if hasattr(self.fields, 'items'): | |
213 _fields = list(self.fields.items()) | |
214 for k, v in _fields: | |
215 file_name = None | |
216 file_type = None | |
217 file_headers = None | |
218 if isinstance(v, (list, tuple)): | |
219 if len(v) == 2: | |
220 file_name, file_pointer = v | |
221 elif len(v) == 3: | |
222 file_name, file_pointer, file_type = v | |
223 else: | |
224 file_name, file_pointer, file_type, file_headers = v | |
225 else: | |
226 file_pointer = v | |
227 | |
228 field = fields.RequestField(name=k, data=file_pointer, | |
229 filename=file_name, | |
230 headers=file_headers) | |
231 field.make_multipart(content_type=file_type) | |
232 yield field | |
233 | |
234 def _prepare_parts(self): | |
235 """This uses the fields provided by the user and creates Part objects. | |
236 | |
237 It populates the `parts` attribute and uses that to create a | |
238 generator for iteration. | |
239 """ | |
240 enc = self.encoding | |
241 self.parts = [Part.from_field(f, enc) for f in self._iter_fields()] | |
242 self._iter_parts = iter(self.parts) | |
243 | |
244 def _write(self, bytes_to_write): | |
245 """Write the bytes to the end of the buffer. | |
246 | |
247 :param bytes bytes_to_write: byte-string (or bytearray) to append to | |
248 the buffer | |
249 :returns: int -- the number of bytes written | |
250 """ | |
251 return self._buffer.append(bytes_to_write) | |
252 | |
253 def _write_boundary(self): | |
254 """Write the boundary to the end of the buffer.""" | |
255 return self._write(self._encoded_boundary) | |
256 | |
257 def _write_closing_boundary(self): | |
258 """Write the bytes necessary to finish a multipart/form-data body.""" | |
259 with reset(self._buffer): | |
260 self._buffer.seek(-2, 2) | |
261 self._buffer.write(b'--\r\n') | |
262 return 2 | |
263 | |
264 def _write_headers(self, headers): | |
265 """Write the current part's headers to the buffer.""" | |
266 return self._write(encode_with(headers, self.encoding)) | |
267 | |
268 @property | |
269 def content_type(self): | |
270 return str( | |
271 'multipart/form-data; boundary={0}'.format(self.boundary_value) | |
272 ) | |
273 | |
274 def to_string(self): | |
275 return self.read() | |
276 | |
277 def read(self, size=-1): | |
278 """Read data from the streaming encoder. | |
279 | |
280 :param int size: (optional), If provided, ``read`` will return exactly | |
281 that many bytes. If it is not provided, it will return the | |
282 remaining bytes. | |
283 :returns: bytes | |
284 """ | |
285 if self.finished: | |
286 return self._buffer.read(size) | |
287 | |
288 bytes_to_load = size | |
289 if bytes_to_load != -1 and bytes_to_load is not None: | |
290 bytes_to_load = self._calculate_load_amount(int(size)) | |
291 | |
292 self._load(bytes_to_load) | |
293 return self._buffer.read(size) | |
294 | |
295 | |
296 def IDENTITY(monitor): | |
297 return monitor | |
298 | |
299 | |
300 class MultipartEncoderMonitor(object): | |
301 | |
302 """ | |
303 An object used to monitor the progress of a :class:`MultipartEncoder`. | |
304 | |
305 The :class:`MultipartEncoder` should only be responsible for preparing and | |
306 streaming the data. For anyone who wishes to monitor it, they shouldn't be | |
307 using that instance to manage that as well. Using this class, they can | |
308 monitor an encoder and register a callback. The callback receives the | |
309 instance of the monitor. | |
310 | |
311 To use this monitor, you construct your :class:`MultipartEncoder` as you | |
312 normally would. | |
313 | |
314 .. code-block:: python | |
315 | |
316 from requests_toolbelt import (MultipartEncoder, | |
317 MultipartEncoderMonitor) | |
318 import requests | |
319 | |
320 def callback(encoder, bytes_read): | |
321 # Do something with this information | |
322 pass | |
323 | |
324 m = MultipartEncoder(fields={'field0': 'value0'}) | |
325 monitor = MultipartEncoderMonitor(m, callback) | |
326 headers = {'Content-Type': montior.content_type} | |
327 r = requests.post('https://httpbin.org/post', data=monitor, | |
328 headers=headers) | |
329 | |
330 Alternatively, if your use case is very simple, you can use the following | |
331 pattern. | |
332 | |
333 .. code-block:: python | |
334 | |
335 from requests_toolbelt import MultipartEncoderMonitor | |
336 import requests | |
337 | |
338 def callback(encoder, bytes_read): | |
339 # Do something with this information | |
340 pass | |
341 | |
342 monitor = MultipartEncoderMonitor.from_fields( | |
343 fields={'field0': 'value0'}, callback | |
344 ) | |
345 headers = {'Content-Type': montior.content_type} | |
346 r = requests.post('https://httpbin.org/post', data=monitor, | |
347 headers=headers) | |
348 | |
349 """ | |
350 | |
351 def __init__(self, encoder, callback=None): | |
352 #: Instance of the :class:`MultipartEncoder` being monitored | |
353 self.encoder = encoder | |
354 | |
355 #: Optionally function to call after a read | |
356 self.callback = callback or IDENTITY | |
357 | |
358 #: Number of bytes already read from the :class:`MultipartEncoder` | |
359 #: instance | |
360 self.bytes_read = 0 | |
361 | |
362 #: Avoid the same problem in bug #80 | |
363 self.len = self.encoder.len | |
364 | |
365 @classmethod | |
366 def from_fields(cls, fields, boundary=None, encoding='utf-8', | |
367 callback=None): | |
368 encoder = MultipartEncoder(fields, boundary, encoding) | |
369 return cls(encoder, callback) | |
370 | |
371 @property | |
372 def content_type(self): | |
373 return self.encoder.content_type | |
374 | |
375 def to_string(self): | |
376 return self.read() | |
377 | |
378 def read(self, size=-1): | |
379 string = self.encoder.read(size) | |
380 self.bytes_read += len(string) | |
381 self.callback(self) | |
382 return string | |
383 | |
384 | |
385 def encode_with(string, encoding): | |
386 """Encoding ``string`` with ``encoding`` if necessary. | |
387 | |
388 :param str string: If string is a bytes object, it will not encode it. | |
389 Otherwise, this function will encode it with the provided encoding. | |
390 :param str encoding: The encoding with which to encode string. | |
391 :returns: encoded bytes object | |
392 """ | |
393 if not (string is None or isinstance(string, bytes)): | |
394 return string.encode(encoding) | |
395 return string | |
396 | |
397 | |
398 def readable_data(data, encoding): | |
399 """Coerce the data to an object with a ``read`` method.""" | |
400 if hasattr(data, 'read'): | |
401 return data | |
402 | |
403 return CustomBytesIO(data, encoding) | |
404 | |
405 | |
406 @contextlib.contextmanager | |
407 def reset(buffer): | |
408 """Keep track of the buffer's current position and write to the end. | |
409 | |
410 This is a context manager meant to be used when adding data to the buffer. | |
411 It eliminates the need for every function to be concerned with the | |
412 position of the cursor in the buffer. | |
413 """ | |
414 original_position = buffer.tell() | |
415 buffer.seek(0, 2) | |
416 yield | |
417 buffer.seek(original_position, 0) | |
418 | |
419 | |
420 def coerce_data(data, encoding): | |
421 """Ensure that every object's __len__ behaves uniformly.""" | |
422 if not isinstance(data, CustomBytesIO): | |
423 if hasattr(data, 'getvalue'): | |
424 return CustomBytesIO(data.getvalue(), encoding) | |
425 | |
426 if hasattr(data, 'fileno'): | |
427 return FileWrapper(data) | |
428 | |
429 if not hasattr(data, 'read'): | |
430 return CustomBytesIO(data, encoding) | |
431 | |
432 return data | |
433 | |
434 | |
435 def to_list(fields): | |
436 if hasattr(fields, 'items'): | |
437 return list(fields.items()) | |
438 return list(fields) | |
439 | |
440 | |
441 class Part(object): | |
442 def __init__(self, headers, body): | |
443 self.headers = headers | |
444 self.body = body | |
445 self.headers_unread = True | |
446 self.len = len(self.headers) + super_len(self.body) | |
447 | |
448 @classmethod | |
449 def from_field(cls, field, encoding): | |
450 """Create a part from a Request Field generated by urllib3.""" | |
451 headers = encode_with(field.render_headers(), encoding) | |
452 body = coerce_data(field.data, encoding) | |
453 return cls(headers, body) | |
454 | |
455 def bytes_left_to_write(self): | |
456 """Determine if there are bytes left to write. | |
457 | |
458 :returns: bool -- ``True`` if there are bytes left to write, otherwise | |
459 ``False`` | |
460 """ | |
461 to_read = 0 | |
462 if self.headers_unread: | |
463 to_read += len(self.headers) | |
464 | |
465 return (to_read + super_len(self.body)) > 0 | |
466 | |
467 def write_to(self, buffer, size): | |
468 """Write the requested amount of bytes to the buffer provided. | |
469 | |
470 The number of bytes written may exceed size on the first read since we | |
471 load the headers ambitiously. | |
472 | |
473 :param CustomBytesIO buffer: buffer we want to write bytes to | |
474 :param int size: number of bytes requested to be written to the buffer | |
475 :returns: int -- number of bytes actually written | |
476 """ | |
477 written = 0 | |
478 if self.headers_unread: | |
479 written += buffer.append(self.headers) | |
480 self.headers_unread = False | |
481 | |
482 while super_len(self.body) > 0 and (size == -1 or written < size): | |
483 amount_to_read = size | |
484 if size != -1: | |
485 amount_to_read = size - written | |
486 written += buffer.append(self.body.read(amount_to_read)) | |
487 | |
488 return written | |
489 | |
490 | |
491 class CustomBytesIO(io.BytesIO): | |
492 def __init__(self, buffer=None, encoding='utf-8'): | |
493 buffer = encode_with(buffer, encoding) | |
494 super(CustomBytesIO, self).__init__(buffer) | |
495 | |
496 def _get_end(self): | |
497 current_pos = self.tell() | |
498 self.seek(0, 2) | |
499 length = self.tell() | |
500 self.seek(current_pos, 0) | |
501 return length | |
502 | |
503 @property | |
504 def len(self): | |
505 length = self._get_end() | |
506 return length - self.tell() | |
507 | |
508 def append(self, bytes): | |
509 with reset(self): | |
510 written = self.write(bytes) | |
511 return written | |
512 | |
513 def smart_truncate(self): | |
514 to_be_read = super_len(self) | |
515 already_read = self._get_end() - to_be_read | |
516 | |
517 if already_read >= to_be_read: | |
518 old_bytes = self.read() | |
519 self.seek(0, 0) | |
520 self.truncate() | |
521 self.write(old_bytes) | |
522 self.seek(0, 0) # We want to be at the beginning | |
523 | |
524 | |
525 class FileWrapper(object): | |
526 def __init__(self, file_object): | |
527 self.fd = file_object | |
528 | |
529 @property | |
530 def len(self): | |
531 return super_len(self.fd) - self.fd.tell() | |
532 | |
533 def read(self, length=-1): | |
534 return self.fd.read(length) |