comparison venv/lib/python2.7/site-packages/requests_toolbelt/multipart/encoder.py @ 0:d67268158946 draft

planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author bcclaywell
date Mon, 12 Oct 2015 17:43:33 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d67268158946
1 # -*- coding: utf-8 -*-
2 """
3
4 requests_toolbelt.multipart.encoder
5 ===================================
6
7 This holds all of the implementation details of the MultipartEncoder
8
9 """
10
11 from requests.utils import super_len
12 from requests.packages.urllib3 import fields
13 from uuid import uuid4
14
15 import contextlib
16 import io
17
18
19 class MultipartEncoder(object):
20
21 """
22
23 The ``MultipartEncoder`` oject is a generic interface to the engine that
24 will create a ``multipart/form-data`` body for you.
25
26 The basic usage is:
27
28 .. code-block:: python
29
30 import requests
31 from requests_toolbelt import MultipartEncoder
32
33 encoder = MultipartEncoder({'field': 'value',
34 'other_field', 'other_value'})
35 r = requests.post('https://httpbin.org/post', data=encoder,
36 headers={'Content-Type': encoder.content_type})
37
38 If you do not need to take advantage of streaming the post body, you can
39 also do:
40
41 .. code-block:: python
42
43 r = requests.post('https://httpbin.org/post',
44 data=encoder.to_string(),
45 headers={'Content-Type': encoder.content_type})
46
47 If you want the encoder to use a specific order, you can use an
48 OrderedDict or more simply, a list of tuples:
49
50 .. code-block:: python
51
52 encoder = MultipartEncoder([('field', 'value'),
53 ('other_field', 'other_value')])
54
55 .. versionchanged:: 0.4.0
56
57 You can also provide tuples as part values as you would provide them to
58 requests' ``files`` parameter.
59
60 .. code-block:: python
61
62 encoder = MultipartEncoder({
63 'field': ('file_name', b'{"a": "b"}', 'application/json',
64 {'X-My-Header': 'my-value'})
65 ])
66
67 .. warning::
68
69 This object will end up directly in :mod:`httplib`. Currently,
70 :mod:`httplib` has a hard-coded read size of **8192 bytes**. This
71 means that it will loop until the file has been read and your upload
72 could take a while. This is **not** a bug in requests. A feature is
73 being considered for this object to allow you, the user, to specify
74 what size should be returned on a read. If you have opinions on this,
75 please weigh in on `this issue`_.
76
77 .. _this issue:
78 https://github.com/sigmavirus24/requests-toolbelt/issues/75
79
80 """
81
82 def __init__(self, fields, boundary=None, encoding='utf-8'):
83 #: Boundary value either passed in by the user or created
84 self.boundary_value = boundary or uuid4().hex
85
86 # Computed boundary
87 self.boundary = '--{0}'.format(self.boundary_value)
88
89 #: Encoding of the data being passed in
90 self.encoding = encoding
91
92 # Pre-encoded boundary
93 self._encoded_boundary = b''.join([
94 encode_with(self.boundary, self.encoding),
95 encode_with('\r\n', self.encoding)
96 ])
97
98 #: Fields provided by the user
99 self.fields = fields
100
101 #: Whether or not the encoder is finished
102 self.finished = False
103
104 #: Pre-computed parts of the upload
105 self.parts = []
106
107 # Pre-computed parts iterator
108 self._iter_parts = iter([])
109
110 # The part we're currently working with
111 self._current_part = None
112
113 # Cached computation of the body's length
114 self._len = None
115
116 # Our buffer
117 self._buffer = CustomBytesIO(encoding=encoding)
118
119 # Pre-compute each part's headers
120 self._prepare_parts()
121
122 # Load boundary into buffer
123 self._write_boundary()
124
125 @property
126 def len(self):
127 """Length of the multipart/form-data body.
128
129 requests will first attempt to get the length of the body by calling
130 ``len(body)`` and then by checking for the ``len`` attribute.
131
132 On 32-bit systems, the ``__len__`` method cannot return anything
133 larger than an integer (in C) can hold. If the total size of the body
134 is even slightly larger than 4GB users will see an OverflowError. This
135 manifested itself in `bug #80`_.
136
137 As such, we now calculate the length lazily as a property.
138
139 .. _bug #80:
140 https://github.com/sigmavirus24/requests-toolbelt/issues/80
141 """
142 # If _len isn't already calculated, calculate, return, and set it
143 return self._len or self._calculate_length()
144
145 def __repr__(self):
146 return '<MultipartEncoder: {0!r}>'.format(self.fields)
147
148 def _calculate_length(self):
149 """
150 This uses the parts to calculate the length of the body.
151
152 This returns the calculated length so __len__ can be lazy.
153 """
154 boundary_len = len(self.boundary) # Length of --{boundary}
155 # boundary length + header length + body length + len('\r\n') * 2
156 self._len = sum(
157 (boundary_len + super_len(p) + 4) for p in self.parts
158 ) + boundary_len + 4
159 return self._len
160
161 def _calculate_load_amount(self, read_size):
162 """This calculates how many bytes need to be added to the buffer.
163
164 When a consumer read's ``x`` from the buffer, there are two cases to
165 satisfy:
166
167 1. Enough data in the buffer to return the requested amount
168 2. Not enough data
169
170 This function uses the amount of unread bytes in the buffer and
171 determines how much the Encoder has to load before it can return the
172 requested amount of bytes.
173
174 :param int read_size: the number of bytes the consumer requests
175 :returns: int -- the number of bytes that must be loaded into the
176 buffer before the read can be satisfied. This will be strictly
177 non-negative
178 """
179 amount = read_size - super_len(self._buffer)
180 return amount if amount > 0 else 0
181
182 def _load(self, amount):
183 """Load ``amount`` number of bytes into the buffer."""
184 self._buffer.smart_truncate()
185 part = self._current_part or self._next_part()
186 while amount == -1 or amount > 0:
187 written = 0
188 if not part.bytes_left_to_write():
189 written += self._write(b'\r\n')
190 written += self._write_boundary()
191 part = self._next_part()
192
193 if not part:
194 written += self._write_closing_boundary()
195 self.finished = True
196 break
197
198 written += part.write_to(self._buffer, amount)
199
200 if amount != -1:
201 amount -= written
202
203 def _next_part(self):
204 try:
205 p = self._current_part = next(self._iter_parts)
206 except StopIteration:
207 p = None
208 return p
209
210 def _iter_fields(self):
211 _fields = self.fields
212 if hasattr(self.fields, 'items'):
213 _fields = list(self.fields.items())
214 for k, v in _fields:
215 file_name = None
216 file_type = None
217 file_headers = None
218 if isinstance(v, (list, tuple)):
219 if len(v) == 2:
220 file_name, file_pointer = v
221 elif len(v) == 3:
222 file_name, file_pointer, file_type = v
223 else:
224 file_name, file_pointer, file_type, file_headers = v
225 else:
226 file_pointer = v
227
228 field = fields.RequestField(name=k, data=file_pointer,
229 filename=file_name,
230 headers=file_headers)
231 field.make_multipart(content_type=file_type)
232 yield field
233
234 def _prepare_parts(self):
235 """This uses the fields provided by the user and creates Part objects.
236
237 It populates the `parts` attribute and uses that to create a
238 generator for iteration.
239 """
240 enc = self.encoding
241 self.parts = [Part.from_field(f, enc) for f in self._iter_fields()]
242 self._iter_parts = iter(self.parts)
243
244 def _write(self, bytes_to_write):
245 """Write the bytes to the end of the buffer.
246
247 :param bytes bytes_to_write: byte-string (or bytearray) to append to
248 the buffer
249 :returns: int -- the number of bytes written
250 """
251 return self._buffer.append(bytes_to_write)
252
253 def _write_boundary(self):
254 """Write the boundary to the end of the buffer."""
255 return self._write(self._encoded_boundary)
256
257 def _write_closing_boundary(self):
258 """Write the bytes necessary to finish a multipart/form-data body."""
259 with reset(self._buffer):
260 self._buffer.seek(-2, 2)
261 self._buffer.write(b'--\r\n')
262 return 2
263
264 def _write_headers(self, headers):
265 """Write the current part's headers to the buffer."""
266 return self._write(encode_with(headers, self.encoding))
267
268 @property
269 def content_type(self):
270 return str(
271 'multipart/form-data; boundary={0}'.format(self.boundary_value)
272 )
273
274 def to_string(self):
275 return self.read()
276
277 def read(self, size=-1):
278 """Read data from the streaming encoder.
279
280 :param int size: (optional), If provided, ``read`` will return exactly
281 that many bytes. If it is not provided, it will return the
282 remaining bytes.
283 :returns: bytes
284 """
285 if self.finished:
286 return self._buffer.read(size)
287
288 bytes_to_load = size
289 if bytes_to_load != -1 and bytes_to_load is not None:
290 bytes_to_load = self._calculate_load_amount(int(size))
291
292 self._load(bytes_to_load)
293 return self._buffer.read(size)
294
295
296 def IDENTITY(monitor):
297 return monitor
298
299
300 class MultipartEncoderMonitor(object):
301
302 """
303 An object used to monitor the progress of a :class:`MultipartEncoder`.
304
305 The :class:`MultipartEncoder` should only be responsible for preparing and
306 streaming the data. For anyone who wishes to monitor it, they shouldn't be
307 using that instance to manage that as well. Using this class, they can
308 monitor an encoder and register a callback. The callback receives the
309 instance of the monitor.
310
311 To use this monitor, you construct your :class:`MultipartEncoder` as you
312 normally would.
313
314 .. code-block:: python
315
316 from requests_toolbelt import (MultipartEncoder,
317 MultipartEncoderMonitor)
318 import requests
319
320 def callback(encoder, bytes_read):
321 # Do something with this information
322 pass
323
324 m = MultipartEncoder(fields={'field0': 'value0'})
325 monitor = MultipartEncoderMonitor(m, callback)
326 headers = {'Content-Type': montior.content_type}
327 r = requests.post('https://httpbin.org/post', data=monitor,
328 headers=headers)
329
330 Alternatively, if your use case is very simple, you can use the following
331 pattern.
332
333 .. code-block:: python
334
335 from requests_toolbelt import MultipartEncoderMonitor
336 import requests
337
338 def callback(encoder, bytes_read):
339 # Do something with this information
340 pass
341
342 monitor = MultipartEncoderMonitor.from_fields(
343 fields={'field0': 'value0'}, callback
344 )
345 headers = {'Content-Type': montior.content_type}
346 r = requests.post('https://httpbin.org/post', data=monitor,
347 headers=headers)
348
349 """
350
351 def __init__(self, encoder, callback=None):
352 #: Instance of the :class:`MultipartEncoder` being monitored
353 self.encoder = encoder
354
355 #: Optionally function to call after a read
356 self.callback = callback or IDENTITY
357
358 #: Number of bytes already read from the :class:`MultipartEncoder`
359 #: instance
360 self.bytes_read = 0
361
362 #: Avoid the same problem in bug #80
363 self.len = self.encoder.len
364
365 @classmethod
366 def from_fields(cls, fields, boundary=None, encoding='utf-8',
367 callback=None):
368 encoder = MultipartEncoder(fields, boundary, encoding)
369 return cls(encoder, callback)
370
371 @property
372 def content_type(self):
373 return self.encoder.content_type
374
375 def to_string(self):
376 return self.read()
377
378 def read(self, size=-1):
379 string = self.encoder.read(size)
380 self.bytes_read += len(string)
381 self.callback(self)
382 return string
383
384
385 def encode_with(string, encoding):
386 """Encoding ``string`` with ``encoding`` if necessary.
387
388 :param str string: If string is a bytes object, it will not encode it.
389 Otherwise, this function will encode it with the provided encoding.
390 :param str encoding: The encoding with which to encode string.
391 :returns: encoded bytes object
392 """
393 if not (string is None or isinstance(string, bytes)):
394 return string.encode(encoding)
395 return string
396
397
398 def readable_data(data, encoding):
399 """Coerce the data to an object with a ``read`` method."""
400 if hasattr(data, 'read'):
401 return data
402
403 return CustomBytesIO(data, encoding)
404
405
406 @contextlib.contextmanager
407 def reset(buffer):
408 """Keep track of the buffer's current position and write to the end.
409
410 This is a context manager meant to be used when adding data to the buffer.
411 It eliminates the need for every function to be concerned with the
412 position of the cursor in the buffer.
413 """
414 original_position = buffer.tell()
415 buffer.seek(0, 2)
416 yield
417 buffer.seek(original_position, 0)
418
419
420 def coerce_data(data, encoding):
421 """Ensure that every object's __len__ behaves uniformly."""
422 if not isinstance(data, CustomBytesIO):
423 if hasattr(data, 'getvalue'):
424 return CustomBytesIO(data.getvalue(), encoding)
425
426 if hasattr(data, 'fileno'):
427 return FileWrapper(data)
428
429 if not hasattr(data, 'read'):
430 return CustomBytesIO(data, encoding)
431
432 return data
433
434
435 def to_list(fields):
436 if hasattr(fields, 'items'):
437 return list(fields.items())
438 return list(fields)
439
440
441 class Part(object):
442 def __init__(self, headers, body):
443 self.headers = headers
444 self.body = body
445 self.headers_unread = True
446 self.len = len(self.headers) + super_len(self.body)
447
448 @classmethod
449 def from_field(cls, field, encoding):
450 """Create a part from a Request Field generated by urllib3."""
451 headers = encode_with(field.render_headers(), encoding)
452 body = coerce_data(field.data, encoding)
453 return cls(headers, body)
454
455 def bytes_left_to_write(self):
456 """Determine if there are bytes left to write.
457
458 :returns: bool -- ``True`` if there are bytes left to write, otherwise
459 ``False``
460 """
461 to_read = 0
462 if self.headers_unread:
463 to_read += len(self.headers)
464
465 return (to_read + super_len(self.body)) > 0
466
467 def write_to(self, buffer, size):
468 """Write the requested amount of bytes to the buffer provided.
469
470 The number of bytes written may exceed size on the first read since we
471 load the headers ambitiously.
472
473 :param CustomBytesIO buffer: buffer we want to write bytes to
474 :param int size: number of bytes requested to be written to the buffer
475 :returns: int -- number of bytes actually written
476 """
477 written = 0
478 if self.headers_unread:
479 written += buffer.append(self.headers)
480 self.headers_unread = False
481
482 while super_len(self.body) > 0 and (size == -1 or written < size):
483 amount_to_read = size
484 if size != -1:
485 amount_to_read = size - written
486 written += buffer.append(self.body.read(amount_to_read))
487
488 return written
489
490
491 class CustomBytesIO(io.BytesIO):
492 def __init__(self, buffer=None, encoding='utf-8'):
493 buffer = encode_with(buffer, encoding)
494 super(CustomBytesIO, self).__init__(buffer)
495
496 def _get_end(self):
497 current_pos = self.tell()
498 self.seek(0, 2)
499 length = self.tell()
500 self.seek(current_pos, 0)
501 return length
502
503 @property
504 def len(self):
505 length = self._get_end()
506 return length - self.tell()
507
508 def append(self, bytes):
509 with reset(self):
510 written = self.write(bytes)
511 return written
512
513 def smart_truncate(self):
514 to_be_read = super_len(self)
515 already_read = self._get_end() - to_be_read
516
517 if already_read >= to_be_read:
518 old_bytes = self.read()
519 self.seek(0, 0)
520 self.truncate()
521 self.write(old_bytes)
522 self.seek(0, 0) # We want to be at the beginning
523
524
525 class FileWrapper(object):
526 def __init__(self, file_object):
527 self.fd = file_object
528
529 @property
530 def len(self):
531 return super_len(self.fd) - self.fd.tell()
532
533 def read(self, length=-1):
534 return self.fd.read(length)