Mercurial > repos > bgruening > sucos_clustering
comparison sucos_max.py @ 1:dbfcc048cbbc draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit 6f1ee2812cca091561a2b2e464498dae2f913b8d"
| author | bgruening |
|---|---|
| date | Thu, 19 Mar 2020 13:09:24 +0000 |
| parents | c0e3a335dbfc |
| children | 3120058a3058 |
comparison
equal
deleted
inserted
replaced
| 0:c0e3a335dbfc | 1:dbfcc048cbbc |
|---|---|
| 37 import sucos, utils | 37 import sucos, utils |
| 38 import argparse, gzip, os | 38 import argparse, gzip, os |
| 39 from rdkit import Chem | 39 from rdkit import Chem |
| 40 | 40 |
| 41 | 41 |
| 42 def process(inputfilename, clusterfilenames, outputfilename, mode): | 42 def process(inputfilename, clusterfilenames, outputfilename): |
| 43 | |
| 44 all_clusters = {} | 43 all_clusters = {} |
| 45 for filename in clusterfilenames: | 44 for filename in clusterfilenames: |
| 46 cluster = [] | 45 cluster = [] |
| 47 cluster_file = utils.open_file_for_reading(filename) | 46 cluster_file = utils.open_file_for_reading(filename) |
| 48 suppl = Chem.ForwardSDMolSupplier(cluster_file) | 47 suppl = Chem.ForwardSDMolSupplier(cluster_file) |
| 77 try: | 76 try: |
| 78 query_features = sucos.getRawFeatures(mol) | 77 query_features = sucos.getRawFeatures(mol) |
| 79 except: | 78 except: |
| 80 utils.log("WARNING: failed to generate features for molecule", mol_num, "in input") | 79 utils.log("WARNING: failed to generate features for molecule", mol_num, "in input") |
| 81 continue | 80 continue |
| 82 scores = [0, 0, 0] | 81 scores_max = [0, 0, 0] |
| 82 scores_cum = [0, 0, 0] | |
| 83 for clusterfilename in all_clusters: | 83 for clusterfilename in all_clusters: |
| 84 cluster = all_clusters[clusterfilename] | 84 cluster = all_clusters[clusterfilename] |
| 85 index = 0 | 85 index = 0 |
| 86 for entry in cluster: | 86 for entry in cluster: |
| 87 hit = entry[0] | 87 hit = entry[0] |
| 88 ref_features = entry[1] | 88 ref_features = entry[1] |
| 89 index += 1 | 89 index += 1 |
| 90 comparisons += 1 | 90 comparisons += 1 |
| 91 sucos_score, fm_score, vol_score = sucos.get_SucosScore(hit, mol, | 91 sucos_score, fm_score, vol_score = sucos.get_SucosScore(hit, mol, |
| 92 tani=False, ref_features=ref_features, query_features=query_features) | 92 tani=False, ref_features=ref_features, |
| 93 if mode == 'max': | 93 query_features=query_features) |
| 94 if sucos_score > scores[0]: | |
| 95 scores[0] = sucos_score | |
| 96 scores[1] = fm_score | |
| 97 scores[2] = vol_score | |
| 98 cluster_name = clusterfilename | |
| 99 cluster_index = index | |
| 100 elif mode == 'cum': | |
| 101 scores[0] += sucos_score | |
| 102 scores[1] += fm_score | |
| 103 scores[2] += vol_score | |
| 104 else: | |
| 105 raise ValueError("Invalid mode: " + mode) | |
| 106 | 94 |
| 107 if scores[0] > 0: | 95 if sucos_score > scores_max[0]: |
| 108 if mode == 'max': | 96 scores_max[0] = sucos_score |
| 109 cluster_file_name_only = cluster_name.split(os.sep)[-1] | 97 scores_max[1] = fm_score |
| 110 #utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index) | 98 scores_max[2] = vol_score |
| 111 mol.SetDoubleProp("Max_SuCOS_Score", scores[0]) | 99 cluster_name = clusterfilename |
| 112 mol.SetDoubleProp("Max_SuCOS_FeatureMap_Score", scores[1]) | 100 cluster_index = index |
| 113 mol.SetDoubleProp("Max_SuCOS_Protrude_Score", scores[2]) | |
| 114 mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only) | |
| 115 mol.SetIntProp("Max_SuCOS_Index", cluster_index) | |
| 116 | 101 |
| 117 else: | 102 scores_cum[0] += sucos_score |
| 118 #utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2]) | 103 scores_cum[1] += fm_score |
| 119 mol.SetDoubleProp("Cum_SuCOS_Score", scores[0]) | 104 scores_cum[2] += vol_score |
| 120 mol.SetDoubleProp("Cum_SuCOS_FeatureMap_Score", scores[1]) | |
| 121 mol.SetDoubleProp("Cum_SuCOS_Protrude_Score", scores[2]) | |
| 122 | 105 |
| 123 writer.write(mol) | 106 if scores_max[0] > 0: |
| 107 cluster_file_name_only = cluster_name.split(os.sep)[-1] | |
| 108 # utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index) | |
| 109 mol.SetDoubleProp("Max_SuCOS_Score", scores_max[0]) | |
| 110 mol.SetDoubleProp("Max_SuCOS_FeatureMap_Score", scores_max[1]) | |
| 111 mol.SetDoubleProp("Max_SuCOS_Protrude_Score", scores_max[2]) | |
| 112 mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only) | |
| 113 mol.SetIntProp("Max_SuCOS_Index", cluster_index) | |
| 124 | 114 |
| 125 else: | 115 # utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2]) |
| 126 utils.log("Molecule", mol_num, "did not overlay. Omitting from results") | 116 mol.SetDoubleProp("Cum_SuCOS_Score", scores_cum[0]) |
| 117 mol.SetDoubleProp("Cum_SuCOS_FeatureMap_Score", scores_cum[1]) | |
| 118 mol.SetDoubleProp("Cum_SuCOS_Protrude_Score", scores_cum[2]) | |
| 127 | 119 |
| 120 writer.write(mol) | |
| 128 | 121 |
| 129 input_file.close() | 122 input_file.close() |
| 130 writer.flush() | 123 writer.flush() |
| 131 writer.close() | 124 writer.close() |
| 132 output_file.close() | 125 output_file.close() |
| 138 | 131 |
| 139 def main(): | 132 def main(): |
| 140 parser = argparse.ArgumentParser(description='Max SuCOS scores with RDKit') | 133 parser = argparse.ArgumentParser(description='Max SuCOS scores with RDKit') |
| 141 parser.add_argument('-i', '--input', help='Input file to score in SDF format. Can be gzipped (*.gz).') | 134 parser.add_argument('-i', '--input', help='Input file to score in SDF format. Can be gzipped (*.gz).') |
| 142 parser.add_argument('-o', '--output', help='Output file in SDF format. Can be gzipped (*.gz).') | 135 parser.add_argument('-o', '--output', help='Output file in SDF format. Can be gzipped (*.gz).') |
| 143 parser.add_argument('-m', '--mode', choices=['max', 'cum'], | |
| 144 default='max', help='Score mode: max = best score, cum = sum of all scores') | |
| 145 parser.add_argument('clusters', nargs='*', help="One or more SDF files with the clustered hits") | 136 parser.add_argument('clusters', nargs='*', help="One or more SDF files with the clustered hits") |
| 146 | 137 |
| 147 args = parser.parse_args() | 138 args = parser.parse_args() |
| 148 utils.log("Max SuCOS Args: ", args) | 139 utils.log("Max SuCOS Args: ", args) |
| 149 | 140 |
| 150 process(args.input, args.clusters, args.output, args.mode) | 141 process(args.input, args.clusters, args.output) |
| 151 | 142 |
| 152 | 143 |
| 153 if __name__ == "__main__": | 144 if __name__ == "__main__": |
| 154 main() | 145 main() |
