Mercurial > repos > davidvanzessen > upload_zip_file
comparison uploadzip.py @ 2:30d16d36d536 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Mon, 30 Mar 2015 07:58:53 -0400 |
| parents | 4f3d79062c18 |
| children | 6f24bce6817e |
comparison
equal
deleted
inserted
replaced
| 1:fb547483e7bd | 2:30d16d36d536 |
|---|---|
| 115 else: | 115 else: |
| 116 type_info = Binary.is_sniffable_binary( dataset.path ) | 116 type_info = Binary.is_sniffable_binary( dataset.path ) |
| 117 if type_info: | 117 if type_info: |
| 118 data_type = type_info[0] | 118 data_type = type_info[0] |
| 119 ext = type_info[1] | 119 ext = type_info[1] |
| 120 data_type="binary" | |
| 120 if not data_type: | 121 if not data_type: |
| 121 shutil.move( dataset.path, output_path ) | 122 # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress |
| 122 #data_type = "data" | 123 is_gzipped, is_valid = check_gzip( dataset.path ) |
| 124 if is_gzipped and not is_valid: | |
| 125 file_err( 'The gzipped uploaded file contains inappropriate content', dataset, json_file ) | |
| 126 return | |
| 127 elif is_gzipped and is_valid: | |
| 128 if link_data_only == 'copy_files': | |
| 129 # We need to uncompress the temp_name file, but BAM files must remain compressed in the BGZF format | |
| 130 CHUNK_SIZE = 2**20 # 1Mb | |
| 131 fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_gunzip_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False ) | |
| 132 gzipped_file = gzip.GzipFile( dataset.path, 'rb' ) | |
| 133 while 1: | |
| 134 try: | |
| 135 chunk = gzipped_file.read( CHUNK_SIZE ) | |
| 136 except IOError: | |
| 137 os.close( fd ) | |
| 138 os.remove( uncompressed ) | |
| 139 file_err( 'Problem decompressing gzipped data', dataset, json_file ) | |
| 140 return | |
| 141 if not chunk: | |
| 142 break | |
| 143 os.write( fd, chunk ) | |
| 144 os.close( fd ) | |
| 145 gzipped_file.close() | |
| 146 # Replace the gzipped file with the decompressed file if it's safe to do so | |
| 147 if dataset.type in ( 'server_dir', 'path_paste' ) or not in_place: | |
| 148 dataset.path = uncompressed | |
| 149 else: | |
| 150 shutil.move( uncompressed, dataset.path ) | |
| 151 os.chmod(dataset.path, 0644) | |
| 152 dataset.name = dataset.name.rstrip( '.gz' ) | |
| 153 data_type = 'gzip' | |
| 154 if not data_type and bz2 is not None: | |
| 155 # See if we have a bz2 file, much like gzip | |
| 156 is_bzipped, is_valid = check_bz2( dataset.path ) | |
| 157 if is_bzipped and not is_valid: | |
| 158 file_err( 'The gzipped uploaded file contains inappropriate content', dataset, json_file ) | |
| 159 return | |
| 160 elif is_bzipped and is_valid: | |
| 161 if link_data_only == 'copy_files': | |
| 162 # We need to uncompress the temp_name file | |
| 163 CHUNK_SIZE = 2**20 # 1Mb | |
| 164 fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_bunzip2_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False ) | |
| 165 bzipped_file = bz2.BZ2File( dataset.path, 'rb' ) | |
| 166 while 1: | |
| 167 try: | |
| 168 chunk = bzipped_file.read( CHUNK_SIZE ) | |
| 169 except IOError: | |
| 170 os.close( fd ) | |
| 171 os.remove( uncompressed ) | |
| 172 file_err( 'Problem decompressing bz2 compressed data', dataset, json_file ) | |
| 173 return | |
| 174 if not chunk: | |
| 175 break | |
| 176 os.write( fd, chunk ) | |
| 177 os.close( fd ) | |
| 178 bzipped_file.close() | |
| 179 # Replace the bzipped file with the decompressed file if it's safe to do so | |
| 180 if dataset.type in ( 'server_dir', 'path_paste' ) or not in_place: | |
| 181 dataset.path = uncompressed | |
| 182 else: | |
| 183 shutil.move( uncompressed, dataset.path ) | |
| 184 os.chmod(dataset.path, 0644) | |
| 185 dataset.name = dataset.name.rstrip( '.bz2' ) | |
| 186 data_type = 'bz2' | |
| 187 if not data_type: | |
| 188 # See if we have a zip archive | |
| 189 is_zipped = check_zip( dataset.path ) | |
| 190 if is_zipped: | |
| 191 if link_data_only == 'copy_files': | |
| 192 CHUNK_SIZE = 2**20 # 1Mb | |
| 193 uncompressed = None | |
| 194 uncompressed_name = None | |
| 195 unzipped = False | |
| 196 z = zipfile.ZipFile( dataset.path ) | |
| 197 for name in z.namelist(): | |
| 198 if name.endswith('/'): | |
| 199 continue | |
| 200 if unzipped: | |
| 201 stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.' | |
| 202 break | |
| 203 fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_zip_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False ) | |
| 204 if sys.version_info[:2] >= ( 2, 6 ): | |
| 205 zipped_file = z.open( name ) | |
| 206 while 1: | |
| 207 try: | |
| 208 chunk = zipped_file.read( CHUNK_SIZE ) | |
| 209 except IOError: | |
| 210 os.close( fd ) | |
| 211 os.remove( uncompressed ) | |
| 212 file_err( 'Problem decompressing zipped data', dataset, json_file ) | |
| 213 return | |
| 214 if not chunk: | |
| 215 break | |
| 216 os.write( fd, chunk ) | |
| 217 os.close( fd ) | |
| 218 zipped_file.close() | |
| 219 uncompressed_name = name | |
| 220 unzipped = True | |
| 221 else: | |
| 222 # python < 2.5 doesn't have a way to read members in chunks(!) | |
| 223 try: | |
| 224 outfile = open( uncompressed, 'wb' ) | |
| 225 outfile.write( z.read( name ) ) | |
| 226 outfile.close() | |
| 227 uncompressed_name = name | |
| 228 unzipped = True | |
| 229 except IOError: | |
| 230 os.close( fd ) | |
| 231 os.remove( uncompressed ) | |
| 232 file_err( 'Problem decompressing zipped data', dataset, json_file ) | |
| 233 return | |
| 234 z.close() | |
| 235 # Replace the zipped file with the decompressed file if it's safe to do so | |
| 236 if uncompressed is not None: | |
| 237 if dataset.type in ( 'server_dir', 'path_paste' ) or not in_place: | |
| 238 dataset.path = uncompressed | |
| 239 else: | |
| 240 shutil.move( uncompressed, dataset.path ) | |
| 241 os.chmod(dataset.path, 0644) | |
| 242 dataset.name = uncompressed_name | |
| 243 data_type = 'zip' | |
| 244 if not data_type: | |
| 245 if check_binary( dataset.path ): | |
| 246 # We have a binary dataset, but it is not Bam, Sff or Pdf | |
| 247 data_type = 'binary' | |
| 248 #binary_ok = False | |
| 249 parts = dataset.name.split( "." ) | |
| 250 if len( parts ) > 1: | |
| 251 ext = parts[-1].strip().lower() | |
| 252 if not Binary.is_ext_unsniffable(ext): | |
| 253 file_err( 'The uploaded binary file contains inappropriate content', dataset, json_file ) | |
| 254 return | |
| 255 elif Binary.is_ext_unsniffable(ext) and dataset.file_type != ext: | |
| 256 err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % ( ext.capitalize(), ext ) | |
| 257 file_err( err_msg, dataset, json_file ) | |
| 258 return | |
| 259 if not data_type: | |
| 260 # We must have a text file | |
| 261 if check_html( dataset.path ): | |
| 262 file_err( 'The uploaded file contains inappropriate HTML content', dataset, json_file ) | |
| 263 return | |
| 264 if data_type != 'binary': | |
| 265 if link_data_only == 'copy_files': | |
| 266 if dataset.type in ( 'server_dir', 'path_paste' ) and data_type not in [ 'gzip', 'bz2', 'zip' ]: | |
| 267 in_place = False | |
| 268 # Convert universal line endings to Posix line endings, but allow the user to turn it off, | |
| 269 # so that is becomes possible to upload gzip, bz2 or zip files with binary data without | |
| 270 # corrupting the content of those files. | |
| 271 if dataset.to_posix_lines: | |
| 272 if dataset.space_to_tab: | |
| 273 line_count, converted_path = sniff.convert_newlines_sep2tabs( dataset.path, in_place=in_place ) | |
| 274 else: | |
| 275 line_count, converted_path = sniff.convert_newlines( dataset.path, in_place=in_place ) | |
| 276 if dataset.file_type == 'auto': | |
| 277 ext = sniff.guess_ext( dataset.path, registry.sniff_order ) | |
| 278 else: | |
| 279 ext = dataset.file_type | |
| 280 data_type = ext | |
| 123 # Save job info for the framework | 281 # Save job info for the framework |
| 124 if ext == 'auto' and dataset.ext: | 282 if ext == 'auto' and dataset.ext: |
| 125 ext = dataset.ext | 283 ext = dataset.ext |
| 126 if ext == 'auto': | 284 if ext == 'auto': |
| 127 ext = 'data' | 285 ext = 'data' |
| 143 pass | 301 pass |
| 144 else: | 302 else: |
| 145 # This should not happen, but it's here just in case | 303 # This should not happen, but it's here just in case |
| 146 shutil.copy( dataset.path, output_path ) | 304 shutil.copy( dataset.path, output_path ) |
| 147 elif link_data_only == 'copy_files': | 305 elif link_data_only == 'copy_files': |
| 148 if os.path.exists(dataset.path): | 306 shutil.move( dataset.path, output_path ) |
| 149 shutil.move( dataset.path, output_path ) | |
| 150 # Write the job info | 307 # Write the job info |
| 151 stdout = stdout or 'uploaded %s file' % data_type | 308 stdout = stdout or 'uploaded %s file' % data_type |
| 152 info = dict( type = 'dataset', | 309 info = dict( type = 'dataset', |
| 153 dataset_id = dataset.dataset_id, | 310 dataset_id = dataset.dataset_id, |
| 154 ext = ext, | 311 ext = ext, |
