comparison cwpair2_util.py @ 5:2e0ddcc726f9 draft

Uploaded
author greg
date Tue, 24 Nov 2015 08:15:40 -0500
parents 279cdc63bcff
children 1fc26b8e618d
comparison
equal deleted inserted replaced
4:9ed566138ecb 5:2e0ddcc726f9
223 os.mkdir('%s_%s' % (match_method, SIMPLES)) 223 os.mkdir('%s_%s' % (match_method, SIMPLES))
224 os.mkdir('%s_%s' % (match_method, STATS_GRAPH)) 224 os.mkdir('%s_%s' % (match_method, STATS_GRAPH))
225 225
226 226
227 def process_file(dataset_path, galaxy_hid, method, threshold, up_distance, 227 def process_file(dataset_path, galaxy_hid, method, threshold, up_distance,
228 down_distance, binsize, output_files, sort_chromosome, sort_score): 228 down_distance, binsize, output_files, sort_score):
229 if method == 'all': 229 if method == 'all':
230 match_methods = METHODS.keys() 230 match_methods = METHODS.keys()
231 else: 231 else:
232 match_methods = [method] 232 match_methods = [method]
233 statistics = [] 233 statistics = []
238 threshold, 238 threshold,
239 up_distance, 239 up_distance,
240 down_distance, 240 down_distance,
241 binsize, 241 binsize,
242 output_files, 242 output_files,
243 sort_chromosome,
244 sort_score) 243 sort_score)
245 statistics.append(stats) 244 statistics.append(stats)
246 if output_files == 'all' and method == 'all': 245 if output_files == 'all' and method == 'all':
247 frequency_plot([s['dist'] for s in statistics], 246 frequency_plot([s['dist'] for s in statistics],
248 statistics[0]['graph_path'], 247 statistics[0]['graph_path'],
249 labels=METHODS.keys()) 248 labels=METHODS.keys())
250 return statistics 249 return statistics
251 250
252 251
253 def perform_process(dataset_path, galaxy_hid, method, threshold, up_distance, 252 def perform_process(dataset_path, galaxy_hid, method, threshold, up_distance,
254 down_distance, binsize, output_files, sort_chromosome, sort_score): 253 down_distance, binsize, output_files, sort_score):
255 output_details = output_files in ["all", "simple_orphan_detail"] 254 output_details = output_files in ["all", "simple_orphan_detail"]
256 output_plots = output_files in ["all"] 255 output_plots = output_files in ["all"]
257 output_orphans = output_files in ["all", "simple_orphan", "simple_orphan_detail"] 256 output_orphans = output_files in ["all", "simple_orphan", "simple_orphan_detail"]
258 # Keep track of statistics for the output file 257 # Keep track of statistics for the output file
259 statistics = {} 258 statistics = {}
365 if output_orphans: 364 if output_orphans:
366 for cpeak in crick: 365 for cpeak in crick:
367 orphan_output.writerow((cname, cpeak[0], cpeak[1], cpeak[2], cpeak[3])) 366 orphan_output.writerow((cname, cpeak[0], cpeak[1], cpeak[2], cpeak[3]))
368 # Keep track of orphans for statistics. 367 # Keep track of orphans for statistics.
369 orphans += len(crick) 368 orphans += len(crick)
370 # Sort output by chromosome if specified.
371 if sort_chromosome == "asc":
372 try:
373 x.sort(key=lambda data: int(data[3]))
374 x.sort(key=lambda data: int(data[0]))
375 except:
376 # Cannot sort because chromosome number is not a numeric.
377 pass
378 elif sort_chromosome == "desc":
379 try:
380 x.sort(key=lambda data: int(data[0]), reverse=True)
381 x.sort(key=lambda data: int(data[3]), reverse=True)
382 except:
383 # Cannot sort because chromosome number is not a numeric.
384 pass
385 # Sort output by score if specified. 369 # Sort output by score if specified.
386 if sort_score == "desc": 370 if sort_score == "desc":
387 x.sort(key=lambda data: float(data[5]), reverse=True) 371 x.sort(key=lambda data: float(data[5]), reverse=True)
388 elif sort_score == "asc": 372 elif sort_score == "asc":
389 x.sort(key=lambda data: float(data[5])) 373 x.sort(key=lambda data: float(data[5]))
390 # Writing a summary to txt or gff format file 374 # Writing a summary to gff format file
391 for row in x: 375 for row in x:
392 row_tmp = list(row) 376 row_tmp = list(row)
393 # Dataset in tuple cannot be modified in Python, so row will 377 # Dataset in tuple cannot be modified in Python, so row will
394 # be converted to list format to add 'chr'. 378 # be converted to list format to add 'chr'.
395 if row_tmp[0] == "999": 379 if row_tmp[0] == "999":