Mercurial > repos > bgruening > sucos_clustering
comparison sucos_max.py @ 6:ba1a2eba3f3d draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit 05dc325ce687441e5d3bdbdedcc0e3529cd5e070"
| author | bgruening |
|---|---|
| date | Wed, 14 Apr 2021 09:29:01 +0000 |
| parents | 2b60b2b236a4 |
| children |
comparison
equal
deleted
inserted
replaced
| 5:aaa3f95add3e | 6:ba1a2eba3f3d |
|---|---|
| 32 SuCOS is the work of Susan Leung. | 32 SuCOS is the work of Susan Leung. |
| 33 GitHub: https://github.com/susanhleung/SuCOS | 33 GitHub: https://github.com/susanhleung/SuCOS |
| 34 Publication: https://doi.org/10.26434/chemrxiv.8100203.v1 | 34 Publication: https://doi.org/10.26434/chemrxiv.8100203.v1 |
| 35 """ | 35 """ |
| 36 | 36 |
| 37 import sucos, utils | 37 import argparse |
| 38 import argparse, gzip, os | 38 import os |
| 39 | |
| 40 import sucos | |
| 41 import utils | |
| 39 from rdkit import Chem | 42 from rdkit import Chem |
| 40 | 43 |
| 41 | 44 |
| 42 def process(inputfilename, clusterfilenames, outputfilename, filter_value, filter_field): | 45 def process( |
| 46 inputfilename, clusterfilenames, outputfilename, filter_value, filter_field | |
| 47 ): | |
| 43 all_clusters = {} | 48 all_clusters = {} |
| 44 for filename in clusterfilenames: | 49 for filename in clusterfilenames: |
| 45 cluster = [] | 50 cluster = [] |
| 46 cluster_file = utils.open_file_for_reading(filename) | 51 cluster_file = utils.open_file_for_reading(filename) |
| 47 suppl = Chem.ForwardSDMolSupplier(cluster_file) | 52 suppl = Chem.ForwardSDMolSupplier(cluster_file) |
| 48 i = 0 | 53 i = 0 |
| 49 for mol in suppl: | 54 for mol in suppl: |
| 50 i += 1 | 55 i += 1 |
| 51 if not mol: | 56 if not mol: |
| 52 utils.log("WARNING: failed to generate molecule", i, "in cluster", filename) | 57 utils.log( |
| 58 "WARNING: failed to generate molecule", i, "in cluster", filename | |
| 59 ) | |
| 53 continue | 60 continue |
| 54 try: | 61 try: |
| 55 features = sucos.getRawFeatures(mol) | 62 features = sucos.getRawFeatures(mol) |
| 56 cluster.append((mol, features)) | 63 cluster.append((mol, features)) |
| 57 except: | 64 except Exception: |
| 58 utils.log("WARNING: failed to generate features for molecule", i, "in cluster", filename) | 65 utils.log( |
| 66 "WARNING: failed to generate features for molecule", | |
| 67 i, | |
| 68 "in cluster", | |
| 69 filename, | |
| 70 ) | |
| 59 | 71 |
| 60 cluster_file.close() | 72 cluster_file.close() |
| 61 all_clusters[filename] = cluster | 73 all_clusters[filename] = cluster |
| 62 | 74 |
| 63 input_file = utils.open_file_for_reading(inputfilename) | 75 input_file = utils.open_file_for_reading(inputfilename) |
| 73 if not mol: | 85 if not mol: |
| 74 utils.log("WARNING: failed to generate molecule", mol_num, "in input") | 86 utils.log("WARNING: failed to generate molecule", mol_num, "in input") |
| 75 continue | 87 continue |
| 76 try: | 88 try: |
| 77 query_features = sucos.getRawFeatures(mol) | 89 query_features = sucos.getRawFeatures(mol) |
| 78 except: | 90 except Exception: |
| 79 utils.log("WARNING: failed to generate features for molecule", mol_num, "in input") | 91 utils.log( |
| 92 "WARNING: failed to generate features for molecule", mol_num, "in input" | |
| 93 ) | |
| 80 continue | 94 continue |
| 81 scores_max = [0, 0, 0] | 95 scores_max = [0, 0, 0] |
| 82 scores_cum = [0, 0, 0] | 96 scores_cum = [0, 0, 0] |
| 83 cluster_name = None | 97 cluster_name = None |
| 84 for clusterfilename in all_clusters: | 98 for clusterfilename in all_clusters: |
| 87 for entry in cluster: | 101 for entry in cluster: |
| 88 hit = entry[0] | 102 hit = entry[0] |
| 89 ref_features = entry[1] | 103 ref_features = entry[1] |
| 90 index += 1 | 104 index += 1 |
| 91 comparisons += 1 | 105 comparisons += 1 |
| 92 sucos_score, fm_score, vol_score = sucos.get_SucosScore(hit, mol, | 106 sucos_score, fm_score, vol_score = sucos.get_SucosScore( |
| 93 tani=False, ref_features=ref_features, | 107 hit, |
| 94 query_features=query_features) | 108 mol, |
| 109 tani=False, | |
| 110 ref_features=ref_features, | |
| 111 query_features=query_features, | |
| 112 ) | |
| 95 | 113 |
| 96 if sucos_score > scores_max[0]: | 114 if sucos_score > scores_max[0]: |
| 97 scores_max[0] = sucos_score | 115 scores_max[0] = sucos_score |
| 98 scores_max[1] = fm_score | 116 scores_max[1] = fm_score |
| 99 scores_max[2] = vol_score | 117 scores_max[2] = vol_score |
| 102 | 120 |
| 103 scores_cum[0] += sucos_score | 121 scores_cum[0] += sucos_score |
| 104 scores_cum[1] += fm_score | 122 scores_cum[1] += fm_score |
| 105 scores_cum[2] += vol_score | 123 scores_cum[2] += vol_score |
| 106 | 124 |
| 107 | |
| 108 # utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index) | 125 # utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index) |
| 109 mol.SetDoubleProp("Max_SuCOS_Score", scores_max[0] if scores_max[0] > 0 else 0) | 126 mol.SetDoubleProp("Max_SuCOS_Score", scores_max[0] if scores_max[0] > 0 else 0) |
| 110 mol.SetDoubleProp("Max_SuCOS_FeatureMap_Score", scores_max[1] if scores_max[1] > 0 else 0) | 127 mol.SetDoubleProp( |
| 111 mol.SetDoubleProp("Max_SuCOS_Protrude_Score", scores_max[2] if scores_max[2] > 0 else 0) | 128 "Max_SuCOS_FeatureMap_Score", scores_max[1] if scores_max[1] > 0 else 0 |
| 129 ) | |
| 130 mol.SetDoubleProp( | |
| 131 "Max_SuCOS_Protrude_Score", scores_max[2] if scores_max[2] > 0 else 0 | |
| 132 ) | |
| 112 | 133 |
| 113 if cluster_name: | 134 if cluster_name: |
| 114 cluster_file_name_only = cluster_name.split(os.sep)[-1] | 135 cluster_file_name_only = cluster_name.split(os.sep)[-1] |
| 115 mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only) | 136 mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only) |
| 116 mol.SetIntProp("Max_SuCOS_Index", cluster_index) | 137 mol.SetIntProp("Max_SuCOS_Index", cluster_index) |
| 117 | 138 |
| 118 # utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2]) | 139 # utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2]) |
| 119 mol.SetDoubleProp("Cum_SuCOS_Score", scores_cum[0] if scores_cum[0] > 0 else 0) | 140 mol.SetDoubleProp("Cum_SuCOS_Score", scores_cum[0] if scores_cum[0] > 0 else 0) |
| 120 mol.SetDoubleProp("Cum_SuCOS_FeatureMap_Score", scores_cum[1] if scores_cum[1] > 0 else 0) | 141 mol.SetDoubleProp( |
| 121 mol.SetDoubleProp("Cum_SuCOS_Protrude_Score", scores_cum[2] if scores_cum[2] > 0 else 0) | 142 "Cum_SuCOS_FeatureMap_Score", scores_cum[1] if scores_cum[1] > 0 else 0 |
| 143 ) | |
| 144 mol.SetDoubleProp( | |
| 145 "Cum_SuCOS_Protrude_Score", scores_cum[2] if scores_cum[2] > 0 else 0 | |
| 146 ) | |
| 122 | 147 |
| 123 if filter_value and filter_field: | 148 if filter_value and filter_field: |
| 124 if mol.HasProp(filter_field): | 149 if mol.HasProp(filter_field): |
| 125 val = mol.GetDoubleProp(filter_field) | 150 val = mol.GetDoubleProp(filter_field) |
| 126 if val > filter_value: | 151 if val > filter_value: |
| 134 output_file.close() | 159 output_file.close() |
| 135 | 160 |
| 136 utils.log("Completed", comparisons, "comparisons") | 161 utils.log("Completed", comparisons, "comparisons") |
| 137 | 162 |
| 138 | 163 |
| 139 ### start main execution ######################################### | 164 # start main execution ######################################### |
| 165 | |
| 140 | 166 |
| 141 def main(): | 167 def main(): |
| 142 parser = argparse.ArgumentParser(description='Max SuCOS scores with RDKit') | 168 parser = argparse.ArgumentParser(description="Max SuCOS scores with RDKit") |
| 143 parser.add_argument('-i', '--input', help='Input file to score in SDF format. Can be gzipped (*.gz).') | 169 parser.add_argument( |
| 144 parser.add_argument('-o', '--output', help='Output file in SDF format. Can be gzipped (*.gz).') | 170 "-i", |
| 145 parser.add_argument('clusters', nargs='*', help="One or more SDF files with the clustered hits") | 171 "--input", |
| 146 parser.add_argument('--filter-value', type=float, help='Filter out values with scores less than this.') | 172 help="Input file to score in SDF format. Can be gzipped (*.gz).", |
| 147 parser.add_argument('--filter-field', help='Field to use to filter values.') | 173 ) |
| 174 parser.add_argument( | |
| 175 "-o", "--output", help="Output file in SDF format. Can be gzipped (*.gz)." | |
| 176 ) | |
| 177 parser.add_argument( | |
| 178 "clusters", nargs="*", help="One or more SDF files with the clustered hits" | |
| 179 ) | |
| 180 parser.add_argument( | |
| 181 "--filter-value", | |
| 182 type=float, | |
| 183 help="Filter out values with scores less than this.", | |
| 184 ) | |
| 185 parser.add_argument("--filter-field", help="Field to use to filter values.") | |
| 148 | 186 |
| 149 args = parser.parse_args() | 187 args = parser.parse_args() |
| 150 utils.log("Max SuCOS Args: ", args) | 188 utils.log("Max SuCOS Args: ", args) |
| 151 | 189 |
| 152 process(args.input, args.clusters, args.output, args.filter_value, args.filter_field) | 190 process( |
| 191 args.input, args.clusters, args.output, args.filter_value, args.filter_field | |
| 192 ) | |
| 153 | 193 |
| 154 | 194 |
| 155 if __name__ == "__main__": | 195 if __name__ == "__main__": |
| 156 main() | 196 main() |
