Mercurial > repos > davidvanzessen > upload_zip_file
comparison uploadzip.py @ 3:6f24bce6817e draft
Uploaded
| author | davidvanzessen | 
|---|---|
| date | Mon, 30 Mar 2015 10:13:25 -0400 | 
| parents | 30d16d36d536 | 
| children | a2f200121dda | 
   comparison
  equal
  deleted
  inserted
  replaced
| 2:30d16d36d536 | 3:6f24bce6817e | 
|---|---|
| 34 | 34 | 
| 35 def stop_err( msg, ret=1 ): | 35 def stop_err( msg, ret=1 ): | 
| 36 sys.stderr.write( msg ) | 36 sys.stderr.write( msg ) | 
| 37 sys.exit( ret ) | 37 sys.exit( ret ) | 
| 38 def file_err( msg, dataset, json_file ): | 38 def file_err( msg, dataset, json_file ): | 
| 39 json_file.write( to_json_string( dict( type = 'dataset', | 39 json_file.write( dumps( dict( type = 'dataset', | 
| 40 ext = 'data', | 40 ext = 'data', | 
| 41 dataset_id = dataset.dataset_id, | 41 dataset_id = dataset.dataset_id, | 
| 42 stderr = msg ) ) + "\n" ) | 42 stderr = msg ) ) + "\n" ) | 
| 43 # never remove a server-side upload | 43 # never remove a server-side upload | 
| 44 if dataset.type in ( 'server_dir', 'path_paste' ): | 44 if dataset.type in ( 'server_dir', 'path_paste' ): | 
| 111 elif dataset.is_multi_byte: | 111 elif dataset.is_multi_byte: | 
| 112 data_type = 'multi-byte char' | 112 data_type = 'multi-byte char' | 
| 113 ext = sniff.guess_ext( dataset.path, is_multi_byte=True ) | 113 ext = sniff.guess_ext( dataset.path, is_multi_byte=True ) | 
| 114 # Is dataset content supported sniffable binary? | 114 # Is dataset content supported sniffable binary? | 
| 115 else: | 115 else: | 
| 116 # FIXME: This ignores the declared sniff order in datatype_conf.xml | |
| 117 # resulting in improper behavior | |
| 116 type_info = Binary.is_sniffable_binary( dataset.path ) | 118 type_info = Binary.is_sniffable_binary( dataset.path ) | 
| 117 if type_info: | 119 if type_info: | 
| 118 data_type = type_info[0] | 120 data_type = type_info[0] | 
| 119 ext = type_info[1] | 121 ext = type_info[1] | 
| 120 data_type="binary" | 122 data_type = 'compressed archive' #upload zip file modification | 
| 121 if not data_type: | 123 if not data_type: | 
| 122 # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress | 124 root_datatype = registry.get_datatype_by_extension( dataset.file_type ) | 
| 123 is_gzipped, is_valid = check_gzip( dataset.path ) | 125 if getattr( root_datatype, 'compressed', False ): | 
| 124 if is_gzipped and not is_valid: | 126 data_type = 'compressed archive' | 
| 125 file_err( 'The gzipped uploaded file contains inappropriate content', dataset, json_file ) | 127 ext = dataset.file_type | 
| 126 return | 128 else: | 
| 127 elif is_gzipped and is_valid: | 129 # See if we have a gzipped file, which, if it passes our restrictions, we'll uncompress | 
| 128 if link_data_only == 'copy_files': | 130 is_gzipped, is_valid = check_gzip( dataset.path ) | 
| 129 # We need to uncompress the temp_name file, but BAM files must remain compressed in the BGZF format | 131 if is_gzipped and not is_valid: | 
| 130 CHUNK_SIZE = 2**20 # 1Mb | |
| 131 fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_gunzip_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False ) | |
| 132 gzipped_file = gzip.GzipFile( dataset.path, 'rb' ) | |
| 133 while 1: | |
| 134 try: | |
| 135 chunk = gzipped_file.read( CHUNK_SIZE ) | |
| 136 except IOError: | |
| 137 os.close( fd ) | |
| 138 os.remove( uncompressed ) | |
| 139 file_err( 'Problem decompressing gzipped data', dataset, json_file ) | |
| 140 return | |
| 141 if not chunk: | |
| 142 break | |
| 143 os.write( fd, chunk ) | |
| 144 os.close( fd ) | |
| 145 gzipped_file.close() | |
| 146 # Replace the gzipped file with the decompressed file if it's safe to do so | |
| 147 if dataset.type in ( 'server_dir', 'path_paste' ) or not in_place: | |
| 148 dataset.path = uncompressed | |
| 149 else: | |
| 150 shutil.move( uncompressed, dataset.path ) | |
| 151 os.chmod(dataset.path, 0644) | |
| 152 dataset.name = dataset.name.rstrip( '.gz' ) | |
| 153 data_type = 'gzip' | |
| 154 if not data_type and bz2 is not None: | |
| 155 # See if we have a bz2 file, much like gzip | |
| 156 is_bzipped, is_valid = check_bz2( dataset.path ) | |
| 157 if is_bzipped and not is_valid: | |
| 158 file_err( 'The gzipped uploaded file contains inappropriate content', dataset, json_file ) | 132 file_err( 'The gzipped uploaded file contains inappropriate content', dataset, json_file ) | 
| 159 return | 133 return | 
| 160 elif is_bzipped and is_valid: | 134 elif is_gzipped and is_valid: | 
| 161 if link_data_only == 'copy_files': | 135 if link_data_only == 'copy_files': | 
| 162 # We need to uncompress the temp_name file | 136 # We need to uncompress the temp_name file, but BAM files must remain compressed in the BGZF format | 
| 163 CHUNK_SIZE = 2**20 # 1Mb | 137 CHUNK_SIZE = 2**20 # 1Mb | 
| 164 fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_bunzip2_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False ) | 138 fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_gunzip_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False ) | 
| 165 bzipped_file = bz2.BZ2File( dataset.path, 'rb' ) | 139 gzipped_file = gzip.GzipFile( dataset.path, 'rb' ) | 
| 166 while 1: | 140 while 1: | 
| 167 try: | 141 try: | 
| 168 chunk = bzipped_file.read( CHUNK_SIZE ) | 142 chunk = gzipped_file.read( CHUNK_SIZE ) | 
| 169 except IOError: | 143 except IOError: | 
| 170 os.close( fd ) | 144 os.close( fd ) | 
| 171 os.remove( uncompressed ) | 145 os.remove( uncompressed ) | 
| 172 file_err( 'Problem decompressing bz2 compressed data', dataset, json_file ) | 146 file_err( 'Problem decompressing gzipped data', dataset, json_file ) | 
| 173 return | 147 return | 
| 174 if not chunk: | 148 if not chunk: | 
| 175 break | 149 break | 
| 176 os.write( fd, chunk ) | 150 os.write( fd, chunk ) | 
| 177 os.close( fd ) | 151 os.close( fd ) | 
| 178 bzipped_file.close() | 152 gzipped_file.close() | 
| 179 # Replace the bzipped file with the decompressed file if it's safe to do so | 153 # Replace the gzipped file with the decompressed file if it's safe to do so | 
| 180 if dataset.type in ( 'server_dir', 'path_paste' ) or not in_place: | 154 if dataset.type in ( 'server_dir', 'path_paste' ) or not in_place: | 
| 181 dataset.path = uncompressed | 155 dataset.path = uncompressed | 
| 182 else: | 156 else: | 
| 183 shutil.move( uncompressed, dataset.path ) | 157 shutil.move( uncompressed, dataset.path ) | 
| 184 os.chmod(dataset.path, 0644) | 158 os.chmod(dataset.path, 0644) | 
| 185 dataset.name = dataset.name.rstrip( '.bz2' ) | 159 dataset.name = dataset.name.rstrip( '.gz' ) | 
| 186 data_type = 'bz2' | 160 data_type = 'gzip' | 
| 187 if not data_type: | 161 if not data_type and bz2 is not None: | 
| 188 # See if we have a zip archive | 162 # See if we have a bz2 file, much like gzip | 
| 189 is_zipped = check_zip( dataset.path ) | 163 is_bzipped, is_valid = check_bz2( dataset.path ) | 
| 190 if is_zipped: | 164 if is_bzipped and not is_valid: | 
| 191 if link_data_only == 'copy_files': | 165 file_err( 'The gzipped uploaded file contains inappropriate content', dataset, json_file ) | 
| 192 CHUNK_SIZE = 2**20 # 1Mb | 166 return | 
| 193 uncompressed = None | 167 elif is_bzipped and is_valid: | 
| 194 uncompressed_name = None | 168 if link_data_only == 'copy_files': | 
| 195 unzipped = False | 169 # We need to uncompress the temp_name file | 
| 196 z = zipfile.ZipFile( dataset.path ) | 170 CHUNK_SIZE = 2**20 # 1Mb | 
| 197 for name in z.namelist(): | 171 fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_bunzip2_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False ) | 
| 198 if name.endswith('/'): | 172 bzipped_file = bz2.BZ2File( dataset.path, 'rb' ) | 
| 199 continue | 173 while 1: | 
| 200 if unzipped: | 174 try: | 
| 201 stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.' | 175 chunk = bzipped_file.read( CHUNK_SIZE ) | 
| 202 break | 176 except IOError: | 
| 203 fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_zip_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False ) | 177 os.close( fd ) | 
| 204 if sys.version_info[:2] >= ( 2, 6 ): | 178 os.remove( uncompressed ) | 
| 205 zipped_file = z.open( name ) | 179 file_err( 'Problem decompressing bz2 compressed data', dataset, json_file ) | 
| 206 while 1: | 180 return | 
| 181 if not chunk: | |
| 182 break | |
| 183 os.write( fd, chunk ) | |
| 184 os.close( fd ) | |
| 185 bzipped_file.close() | |
| 186 # Replace the bzipped file with the decompressed file if it's safe to do so | |
| 187 if dataset.type in ( 'server_dir', 'path_paste' ) or not in_place: | |
| 188 dataset.path = uncompressed | |
| 189 else: | |
| 190 shutil.move( uncompressed, dataset.path ) | |
| 191 os.chmod(dataset.path, 0644) | |
| 192 dataset.name = dataset.name.rstrip( '.bz2' ) | |
| 193 data_type = 'bz2' | |
| 194 if not data_type: | |
| 195 # See if we have a zip archive | |
| 196 is_zipped = check_zip( dataset.path ) | |
| 197 if is_zipped: | |
| 198 if link_data_only == 'copy_files': | |
| 199 CHUNK_SIZE = 2**20 # 1Mb | |
| 200 uncompressed = None | |
| 201 uncompressed_name = None | |
| 202 unzipped = False | |
| 203 z = zipfile.ZipFile( dataset.path ) | |
| 204 for name in z.namelist(): | |
| 205 if name.endswith('/'): | |
| 206 continue | |
| 207 if unzipped: | |
| 208 stdout = 'ZIP file contained more than one file, only the first file was added to Galaxy.' | |
| 209 break | |
| 210 fd, uncompressed = tempfile.mkstemp( prefix='data_id_%s_upload_zip_' % dataset.dataset_id, dir=os.path.dirname( output_path ), text=False ) | |
| 211 if sys.version_info[:2] >= ( 2, 6 ): | |
| 212 zipped_file = z.open( name ) | |
| 213 while 1: | |
| 214 try: | |
| 215 chunk = zipped_file.read( CHUNK_SIZE ) | |
| 216 except IOError: | |
| 217 os.close( fd ) | |
| 218 os.remove( uncompressed ) | |
| 219 file_err( 'Problem decompressing zipped data', dataset, json_file ) | |
| 220 return | |
| 221 if not chunk: | |
| 222 break | |
| 223 os.write( fd, chunk ) | |
| 224 os.close( fd ) | |
| 225 zipped_file.close() | |
| 226 uncompressed_name = name | |
| 227 unzipped = True | |
| 228 else: | |
| 229 # python < 2.5 doesn't have a way to read members in chunks(!) | |
| 207 try: | 230 try: | 
| 208 chunk = zipped_file.read( CHUNK_SIZE ) | 231 outfile = open( uncompressed, 'wb' ) | 
| 232 outfile.write( z.read( name ) ) | |
| 233 outfile.close() | |
| 234 uncompressed_name = name | |
| 235 unzipped = True | |
| 209 except IOError: | 236 except IOError: | 
| 210 os.close( fd ) | 237 os.close( fd ) | 
| 211 os.remove( uncompressed ) | 238 os.remove( uncompressed ) | 
| 212 file_err( 'Problem decompressing zipped data', dataset, json_file ) | 239 file_err( 'Problem decompressing zipped data', dataset, json_file ) | 
| 213 return | 240 return | 
| 214 if not chunk: | 241 z.close() | 
| 215 break | 242 # Replace the zipped file with the decompressed file if it's safe to do so | 
| 216 os.write( fd, chunk ) | 243 if uncompressed is not None: | 
| 217 os.close( fd ) | 244 if dataset.type in ( 'server_dir', 'path_paste' ) or not in_place: | 
| 218 zipped_file.close() | 245 dataset.path = uncompressed | 
| 219 uncompressed_name = name | 246 else: | 
| 220 unzipped = True | 247 shutil.move( uncompressed, dataset.path ) | 
| 248 os.chmod(dataset.path, 0644) | |
| 249 dataset.name = uncompressed_name | |
| 250 data_type = 'zip' | |
| 251 if not data_type: | |
| 252 # TODO refactor this logic. check_binary isn't guaranteed to be | |
| 253 # correct since it only looks at whether the first 100 chars are | |
| 254 # printable or not. If someone specifies a known unsniffable | |
| 255 # binary datatype and check_binary fails, the file gets mangled. | |
| 256 if check_binary( dataset.path ) or Binary.is_ext_unsniffable(dataset.file_type): | |
| 257 # We have a binary dataset, but it is not Bam, Sff or Pdf | |
| 258 data_type = 'binary' | |
| 259 #binary_ok = False | |
| 260 parts = dataset.name.split( "." ) | |
| 261 if len( parts ) > 1: | |
| 262 ext = parts[-1].strip().lower() | |
| 263 if not Binary.is_ext_unsniffable(ext): | |
| 264 file_err( 'The uploaded binary file contains inappropriate content', dataset, json_file ) | |
| 265 return | |
| 266 elif Binary.is_ext_unsniffable(ext) and dataset.file_type != ext: | |
| 267 err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % ( ext.capitalize(), ext ) | |
| 268 file_err( err_msg, dataset, json_file ) | |
| 269 return | |
| 270 if not data_type: | |
| 271 # We must have a text file | |
| 272 if check_html( dataset.path ): | |
| 273 file_err( 'The uploaded file contains inappropriate HTML content', dataset, json_file ) | |
| 274 return | |
| 275 if data_type != 'binary': | |
| 276 if link_data_only == 'copy_files': | |
| 277 if dataset.type in ( 'server_dir', 'path_paste' ) and data_type not in [ 'gzip', 'bz2', 'zip' ]: | |
| 278 in_place = False | |
| 279 # Convert universal line endings to Posix line endings, but allow the user to turn it off, | |
| 280 # so that is becomes possible to upload gzip, bz2 or zip files with binary data without | |
| 281 # corrupting the content of those files. | |
| 282 if dataset.to_posix_lines: | |
| 283 tmpdir = output_adjacent_tmpdir( output_path ) | |
| 284 tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id | |
| 285 if dataset.space_to_tab: | |
| 286 line_count, converted_path = sniff.convert_newlines_sep2tabs( dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix ) | |
| 221 else: | 287 else: | 
| 222 # python < 2.5 doesn't have a way to read members in chunks(!) | 288 line_count, converted_path = sniff.convert_newlines( dataset.path, in_place=in_place, tmp_dir=tmpdir, tmp_prefix=tmp_prefix ) | 
| 223 try: | 289 if dataset.file_type == 'auto': | 
| 224 outfile = open( uncompressed, 'wb' ) | 290 ext = sniff.guess_ext( dataset.path, registry.sniff_order ) | 
| 225 outfile.write( z.read( name ) ) | 291 else: | 
| 226 outfile.close() | 292 ext = dataset.file_type | 
| 227 uncompressed_name = name | 293 data_type = ext | 
| 228 unzipped = True | |
| 229 except IOError: | |
| 230 os.close( fd ) | |
| 231 os.remove( uncompressed ) | |
| 232 file_err( 'Problem decompressing zipped data', dataset, json_file ) | |
| 233 return | |
| 234 z.close() | |
| 235 # Replace the zipped file with the decompressed file if it's safe to do so | |
| 236 if uncompressed is not None: | |
| 237 if dataset.type in ( 'server_dir', 'path_paste' ) or not in_place: | |
| 238 dataset.path = uncompressed | |
| 239 else: | |
| 240 shutil.move( uncompressed, dataset.path ) | |
| 241 os.chmod(dataset.path, 0644) | |
| 242 dataset.name = uncompressed_name | |
| 243 data_type = 'zip' | |
| 244 if not data_type: | |
| 245 if check_binary( dataset.path ): | |
| 246 # We have a binary dataset, but it is not Bam, Sff or Pdf | |
| 247 data_type = 'binary' | |
| 248 #binary_ok = False | |
| 249 parts = dataset.name.split( "." ) | |
| 250 if len( parts ) > 1: | |
| 251 ext = parts[-1].strip().lower() | |
| 252 if not Binary.is_ext_unsniffable(ext): | |
| 253 file_err( 'The uploaded binary file contains inappropriate content', dataset, json_file ) | |
| 254 return | |
| 255 elif Binary.is_ext_unsniffable(ext) and dataset.file_type != ext: | |
| 256 err_msg = "You must manually set the 'File Format' to '%s' when uploading %s files." % ( ext.capitalize(), ext ) | |
| 257 file_err( err_msg, dataset, json_file ) | |
| 258 return | |
| 259 if not data_type: | |
| 260 # We must have a text file | |
| 261 if check_html( dataset.path ): | |
| 262 file_err( 'The uploaded file contains inappropriate HTML content', dataset, json_file ) | |
| 263 return | |
| 264 if data_type != 'binary': | |
| 265 if link_data_only == 'copy_files': | |
| 266 if dataset.type in ( 'server_dir', 'path_paste' ) and data_type not in [ 'gzip', 'bz2', 'zip' ]: | |
| 267 in_place = False | |
| 268 # Convert universal line endings to Posix line endings, but allow the user to turn it off, | |
| 269 # so that is becomes possible to upload gzip, bz2 or zip files with binary data without | |
| 270 # corrupting the content of those files. | |
| 271 if dataset.to_posix_lines: | |
| 272 if dataset.space_to_tab: | |
| 273 line_count, converted_path = sniff.convert_newlines_sep2tabs( dataset.path, in_place=in_place ) | |
| 274 else: | |
| 275 line_count, converted_path = sniff.convert_newlines( dataset.path, in_place=in_place ) | |
| 276 if dataset.file_type == 'auto': | |
| 277 ext = sniff.guess_ext( dataset.path, registry.sniff_order ) | |
| 278 else: | |
| 279 ext = dataset.file_type | |
| 280 data_type = ext | |
| 281 # Save job info for the framework | 294 # Save job info for the framework | 
| 282 if ext == 'auto' and dataset.ext: | 295 if ext == 'auto' and dataset.ext: | 
| 283 ext = dataset.ext | 296 ext = dataset.ext | 
| 284 if ext == 'auto': | 297 if ext == 'auto': | 
| 285 ext = 'data' | 298 ext = 'data' | 
| 312 stdout = stdout, | 325 stdout = stdout, | 
| 313 name = dataset.name, | 326 name = dataset.name, | 
| 314 line_count = line_count ) | 327 line_count = line_count ) | 
| 315 if dataset.get('uuid', None) is not None: | 328 if dataset.get('uuid', None) is not None: | 
| 316 info['uuid'] = dataset.get('uuid') | 329 info['uuid'] = dataset.get('uuid') | 
| 317 json_file.write( to_json_string( info ) + "\n" ) | 330 json_file.write( dumps( info ) + "\n" ) | 
| 318 | 331 | 
| 319 if link_data_only == 'copy_files' and datatype.dataset_content_needs_grooming( output_path ): | 332 if link_data_only == 'copy_files' and datatype.dataset_content_needs_grooming( output_path ): | 
| 320 # Groom the dataset content if necessary | 333 # Groom the dataset content if necessary | 
| 321 datatype.groom_dataset_content( output_path ) | 334 datatype.groom_dataset_content( output_path ) | 
| 322 | 335 | 
| 338 file_err( 'Unable to fetch %s\n%s' % ( dp, str( e ) ), dataset, json_file ) | 351 file_err( 'Unable to fetch %s\n%s' % ( dp, str( e ) ), dataset, json_file ) | 
| 339 return | 352 return | 
| 340 dataset.path = temp_name | 353 dataset.path = temp_name | 
| 341 dp = temp_name | 354 dp = temp_name | 
| 342 if not value.is_binary: | 355 if not value.is_binary: | 
| 356 tmpdir = output_adjacent_tmpdir( output_path ) | |
| 357 tmp_prefix = 'data_id_%s_convert_' % dataset.dataset_id | |
| 343 if dataset.composite_file_paths[ value.name ].get( 'space_to_tab', value.space_to_tab ): | 358 if dataset.composite_file_paths[ value.name ].get( 'space_to_tab', value.space_to_tab ): | 
| 344 sniff.convert_newlines_sep2tabs( dp ) | 359 sniff.convert_newlines_sep2tabs( dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix ) | 
| 345 else: | 360 else: | 
| 346 sniff.convert_newlines( dp ) | 361 sniff.convert_newlines( dp, tmp_dir=tmpdir, tmp_prefix=tmp_prefix ) | 
| 347 shutil.move( dp, os.path.join( files_path, name ) ) | 362 shutil.move( dp, os.path.join( files_path, name ) ) | 
| 348 # Move the dataset to its "real" path | 363 # Move the dataset to its "real" path | 
| 349 shutil.move( dataset.primary_file, output_path ) | 364 shutil.move( dataset.primary_file, output_path ) | 
| 350 # Write the job info | 365 # Write the job info | 
| 351 info = dict( type = 'dataset', | 366 info = dict( type = 'dataset', | 
| 352 dataset_id = dataset.dataset_id, | 367 dataset_id = dataset.dataset_id, | 
| 353 stdout = 'uploaded %s file' % dataset.file_type ) | 368 stdout = 'uploaded %s file' % dataset.file_type ) | 
| 354 json_file.write( to_json_string( info ) + "\n" ) | 369 json_file.write( dumps( info ) + "\n" ) | 
| 370 | |
| 371 | |
| 372 def output_adjacent_tmpdir( output_path ): | |
| 373 """ For temp files that will ultimately be moved to output_path anyway | |
| 374 just create the file directly in output_path's directory so shutil.move | |
| 375 will work optimially. | |
| 376 """ | |
| 377 return os.path.dirname( output_path ) | |
| 378 | |
| 355 | 379 | 
| 356 def __main__(): | 380 def __main__(): | 
| 357 | 381 | 
| 358 if len( sys.argv ) < 4: | 382 if len( sys.argv ) < 4: | 
| 359 print >>sys.stderr, 'usage: upload.py <root> <datatypes_conf> <json paramfile> <output spec> ...' | 383 print >>sys.stderr, 'usage: upload.py <root> <datatypes_conf> <json paramfile> <output spec> ...' | 
| 364 | 388 | 
| 365 registry = Registry() | 389 registry = Registry() | 
| 366 registry.load_datatypes( root_dir=sys.argv[1], config=sys.argv[2] ) | 390 registry.load_datatypes( root_dir=sys.argv[1], config=sys.argv[2] ) | 
| 367 | 391 | 
| 368 for line in open( sys.argv[3], 'r' ): | 392 for line in open( sys.argv[3], 'r' ): | 
| 369 dataset = from_json_string( line ) | 393 dataset = loads( line ) | 
| 370 dataset = util.bunch.Bunch( **safe_dict( dataset ) ) | 394 dataset = util.bunch.Bunch( **safe_dict( dataset ) ) | 
| 371 try: | 395 try: | 
| 372 output_path = output_paths[int( dataset.dataset_id )][0] | 396 output_path = output_paths[int( dataset.dataset_id )][0] | 
| 373 except: | 397 except: | 
| 374 print >>sys.stderr, 'Output path for dataset %s not found on command line' % dataset.dataset_id | 398 print >>sys.stderr, 'Output path for dataset %s not found on command line' % dataset.dataset_id | 
