annotate compare_humann2_output.py @ 0:035a848cb73e draft

planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
author bebatut
date Tue, 12 Apr 2016 02:56:13 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
1 #!/usr/bin/env python
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
2 # -*- coding: utf-8 -*-
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
3
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
4 import sys
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
5 import os
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
6 import argparse
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
7 import re
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
8
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
9 def extract_abundances(filepath, nb_charact_to_extract):
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
10 abundances = {}
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
11 more_abund_charact = []
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
12 abund_sum = 0
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
13 with open(filepath, 'r') as abundance_file:
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
14 for line in abundance_file.readlines()[1:]:
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
15 split_line = line[:-1].split('\t')
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
16 charact_id = split_line[0]
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
17 abund = float(split_line[1])
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
18 abundances[charact_id] = 100*abund
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
19 abund_sum += abundances[charact_id]
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
20
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
21 if len(more_abund_charact) < nb_charact_to_extract:
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
22 more_abund_charact.append(charact_id)
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
23 else:
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
24 best_pos = None
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
25 for i in range(len(more_abund_charact)-1,-1,-1):
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
26 if abundances[more_abund_charact[i]] < abund:
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
27 best_pos = i
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
28 else:
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
29 break
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
30 if best_pos != None:
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
31 tmp_more_abund_charact = more_abund_charact
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
32 more_abund_charact = tmp_more_abund_charact[:best_pos]
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
33 more_abund_charact += [charact_id]
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
34 more_abund_charact += tmp_more_abund_charact[best_pos:-1]
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
35 return abundances, more_abund_charact
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
36
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
37 def format_characteristic_name(all_name):
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
38 if all_name.find(':') != -1:
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
39 charact_id = all_name.split(':')[0]
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
40 charact_name = all_name.split(':')[1][1:]
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
41 else:
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
42 charact_id = all_name
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
43 charact_name = ''
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
44
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
45 charact_name = charact_name.replace('/',' ')
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
46 charact_name = charact_name.replace('-',' ')
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
47 charact_name = charact_name.replace("'",'')
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
48 if charact_name.find('(') != -1 and charact_name.find(')') != -1:
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
49 open_bracket = charact_name.find('(')
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
50 close_bracket = charact_name.find(')')+1
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
51 charact_name = charact_name[:open_bracket] + charact_name[close_bracket:]
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
52 return charact_id,charact_name
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
53
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
54 def write_more_abundant_charat(abundances,more_abund_charact, output_filepath):
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
55 with open(output_filepath,'w') as output_file:
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
56 output_file.write('id\tname\t')
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
57 output_file.write('\t'.join(abundances.keys()) + '\n')
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
58
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
59 for mac in more_abund_charact:
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
60 charact_id,charact_name = format_characteristic_name(mac)
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
61 output_file.write(charact_id + '\t' + charact_name)
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
62 for sample in abundances:
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
63 abund = abundances[sample].get(mac, 0)
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
64 output_file.write('\t' + str(abund))
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
65 output_file.write('\n')
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
66
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
67 def extract_similar_characteristics(abundances, sim_output_filepath,
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
68 specific_output_files):
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
69 sim_characteristics = set(abundances[abundances.keys()[0]].keys())
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
70 for sample in abundances.keys()[1:]:
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
71 sim_characteristics.intersection_update(abundances[sample].keys())
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
72 print 'Similar between all samples:', len(sim_characteristics)
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
73
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
74 with open(sim_output_filepath, 'w') as sim_output_file:
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
75 sim_output_file.write('id\tname\t' + '\t'.join(abundances.keys()) + '\n')
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
76 for charact in list(sim_characteristics):
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
77 charact_id,charact_name = format_characteristic_name(charact)
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
78 sim_output_file.write(charact_id + '\t' + charact_name)
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
79 for sample in abundances.keys():
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
80 sim_output_file.write('\t' + str(abundances[sample][charact]))
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
81 sim_output_file.write('\n')
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
82
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
83 print 'Specific to samples:'
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
84 diff_characteristics = {}
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
85 for i in range(len(abundances.keys())):
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
86 sample = abundances.keys()[i]
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
87 print ' ', sample, ""
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
88 print ' All:', len(abundances[sample].keys())
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
89 diff_characteristics[sample] = set(abundances[sample].keys())
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
90 diff_characteristics[sample].difference_update(sim_characteristics)
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
91 print ' Number of specific characteristics:',
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
92 print len(diff_characteristics[sample])
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
93 print ' Percentage of specific characteristics:',
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
94 print 100*len(diff_characteristics[sample])/(1.*len(abundances[sample].keys()))
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
95
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
96 relative_abundance = 0
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
97 with open(specific_output_files[i], 'w') as output_file:
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
98 output_file.write('id\tname\tabundances\n')
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
99 for charact in list(diff_characteristics[sample]):
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
100 charact_id,charact_name = format_characteristic_name(charact)
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
101 output_file.write(charact_id + '\t' + charact_name + '\t')
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
102 output_file.write(str(abundances[sample][charact]) + '\n')
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
103 relative_abundance += abundances[sample][charact]
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
104 print ' Relative abundance of specific characteristics(%):', relative_abundance
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
105
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
106 return sim_characteristics
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
107
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
108 def compare_humann2_output(args):
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
109 abundances = {}
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
110 more_abund_charact = []
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
111
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
112 for i in range(len(args.sample_name)):
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
113 abundances[args.sample_name[i]], mac = extract_abundances(args.charact_input_file[i],
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
114 args.most_abundant_characteristics_to_extract)
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
115 more_abund_charact += mac
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
116
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
117 write_more_abundant_charat(abundances, list(set(more_abund_charact)),
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
118 args.more_abundant_output_file)
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
119 sim_characteristics = extract_similar_characteristics(abundances,
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
120 args.similar_output_file, args.specific_output_file)
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
121
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
122 if __name__ == '__main__':
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
123 parser = argparse.ArgumentParser()
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
124 parser.add_argument('--sample_name', required=True, action='append')
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
125 parser.add_argument('--charact_input_file', required=True, action='append')
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
126 parser.add_argument('--most_abundant_characteristics_to_extract', required=True,
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
127 type = int)
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
128 parser.add_argument('--more_abundant_output_file', required=True)
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
129 parser.add_argument('--similar_output_file', required=True)
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
130 parser.add_argument('--specific_output_file', required=True,action='append')
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
131 args = parser.parse_args()
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
132
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
133 if len(args.sample_name) != len(args.charact_input_file):
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
134 raise ValueError("Same number of values (in same order) are expected for --sample_name and --charact_input_file")
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
135 if len(args.sample_name) != len(args.specific_output_file):
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
136 raise ValueError("Same number of values (in same order) are expected for --sample_name and --specific_output_file")
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
137
035a848cb73e planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit 5c45ed58045ce1686aa069403f8a9426ea20bac5-dirty
bebatut
parents:
diff changeset
138 compare_humann2_output(args)