Mercurial > repos > bgruening > chembl_structure_pipeline
comparison chembl.py @ 0:e32734922d34 draft default tip
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
| author | bgruening |
|---|---|
| date | Sat, 10 Oct 2020 09:42:09 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:e32734922d34 |
|---|---|
| 1 import argparse | |
| 2 | |
| 3 from chembl_webresource_client.new_client import new_client | |
| 4 from chembl_webresource_client.settings import Settings | |
| 5 | |
| 6 Settings.Instance().CACHING = False | |
| 7 | |
| 8 | |
| 9 def open_file(filename): | |
| 10 with open(filename) as f: | |
| 11 return f.readline().split()[0] | |
| 12 | |
| 13 | |
| 14 def get_smiles(res): | |
| 15 """ | |
| 16 Get a list of SMILES from function results | |
| 17 """ | |
| 18 smiles = set() | |
| 19 for smi in res: | |
| 20 try: | |
| 21 smiles.add('{}\t{}'.format(smi['molecule_structures']['canonical_smiles'], smi['molecule_chembl_id'])) | |
| 22 except TypeError: | |
| 23 continue | |
| 24 return smiles | |
| 25 | |
| 26 | |
| 27 def sim_search(smiles, tanimoto): | |
| 28 """ | |
| 29 Return compounds which are within a Tanimoto range of the SMILES input | |
| 30 """ | |
| 31 similarity = new_client.similarity | |
| 32 return similarity.filter(smiles=smiles, similarity=tanimoto).only(['molecule_structures', 'molecule_chembl_id']) | |
| 33 | |
| 34 | |
| 35 def substr_search(smiles): | |
| 36 """ | |
| 37 Return compounds which contain the SMILES substructure input | |
| 38 """ | |
| 39 substructure = new_client.substructure | |
| 40 return substructure.filter(smiles=smiles).only(['molecule_structures', 'molecule_chembl_id']) | |
| 41 | |
| 42 | |
| 43 def filter_drugs(mols): | |
| 44 """ | |
| 45 Return only compounds which are approved drugs | |
| 46 """ | |
| 47 return mols.filter(max_phase=4) | |
| 48 | |
| 49 | |
| 50 def filter_biotherapeutic(mols): | |
| 51 """ | |
| 52 Return only biotherapeutic molecules | |
| 53 """ | |
| 54 return mols.filter(biotherapeutic__isnull=False) | |
| 55 | |
| 56 | |
| 57 def filter_nat_prod(mols): | |
| 58 """ | |
| 59 Return only natural products | |
| 60 """ | |
| 61 return mols.filter(natural_product=1) | |
| 62 | |
| 63 | |
| 64 def filter_ro5(mols): | |
| 65 """ | |
| 66 Return only compounds with no RO5 violations | |
| 67 """ | |
| 68 return mols.filter(molecule_properties__num_ro5_violations=0) | |
| 69 | |
| 70 | |
| 71 def main(): | |
| 72 parser = argparse.ArgumentParser(description='Search ChEMBL database for compounds') | |
| 73 parser.add_argument('-i', '--input', help='SMILES input') | |
| 74 parser.add_argument('-f', '--file', help='SMILES input as file') | |
| 75 parser.add_argument('-o', '--output', help="SMILES output") | |
| 76 parser.add_argument('-t', '--tanimoto', type=int, help='Tanimoto similarity score') | |
| 77 parser.add_argument('-s', '--substructure', action='store_true', help='Substructure search using the SMILES input.') | |
| 78 parser.add_argument('-d', '--drugs', action='store_true', help='Filter approved drugs') | |
| 79 parser.add_argument('-b', '--biotherapeutic', action='store_true', help='Filter biotherapeutic molecules') | |
| 80 parser.add_argument('-n', '--nat-prod', action='store_true', help='Filter natural products') | |
| 81 parser.add_argument('-r', '--ro5', action='store_true', help='Filter compounds that pass Lipinski RO5') | |
| 82 | |
| 83 args = parser.parse_args() | |
| 84 | |
| 85 if args.file: # get SMILES from file rather than -i option | |
| 86 args.input = open_file(args.file) | |
| 87 | |
| 88 if len(args.input) < 5: | |
| 89 raise IOError('SMILES must be at least 5 characters long.') | |
| 90 | |
| 91 if args.substructure: # specify search type: substructure or similarity | |
| 92 mols = substr_search(args.input) | |
| 93 else: | |
| 94 mols = sim_search(args.input, args.tanimoto) | |
| 95 | |
| 96 # filter options: | |
| 97 if args.drugs: | |
| 98 mols = filter_drugs(mols) | |
| 99 | |
| 100 if args.biotherapeutic: | |
| 101 mols = filter_biotherapeutic(mols) | |
| 102 | |
| 103 if args.nat_prod: | |
| 104 mols = filter_nat_prod(mols) | |
| 105 | |
| 106 if args.ro5: | |
| 107 mols = filter_ro5(mols) | |
| 108 | |
| 109 # get SMILES from search output | |
| 110 mols = get_smiles(mols) | |
| 111 | |
| 112 # write to file | |
| 113 with open(args.output, 'w') as f: | |
| 114 f.write('\n'.join(mols)) | |
| 115 | |
| 116 | |
| 117 if __name__ == "__main__": | |
| 118 main() |
