Mercurial > repos > bgruening > enumerate_charges
annotate sdf_to_tab.py @ 4:9a2b0af78abc draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
author | bgruening |
---|---|
date | Wed, 17 Feb 2021 12:57:32 +0000 |
parents | 5981893078ad |
children | a9b3a2912c08 |
rev | line source |
---|---|
0
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
2 import argparse |
4
9a2b0af78abc
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents:
2
diff
changeset
|
3 |
0
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
4 import pandas as pd |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
5 from rdkit import Chem |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
6 |
4
9a2b0af78abc
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents:
2
diff
changeset
|
7 |
0
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
8 def sdf_to_tab(vars): |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
9 mols = Chem.SDMolSupplier(vars.inp, sanitize=False) |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
10 df = pd.DataFrame() # for output |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
11 |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
12 for n in range(len(mols)): |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
13 if mols[n]: |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
14 d = mols[n].GetPropsAsDict() |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
15 # filter dict for desired props |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
16 if vars.props.strip() == '': # none specified, return all |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
17 d = {prop: val for (prop, val) in d.items() if not any(x in str(val) for x in ['\n', '\t'])} # remove items containing newlines or tabs |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
18 else: |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
19 d = {prop: val for (prop, val) in d.items() if prop in vars.props.replace(' ', '').split(',')} # remove items not requested via CLI |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
20 if vars.name: |
1
d84dc786ccb9
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 09b22cceacb34dd4c6c1b42890f93232df128208"
bgruening
parents:
0
diff
changeset
|
21 d['SDFMoleculeName'] = mols[n].GetProp('_Name') |
0
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
22 if vars.smiles: |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
23 d['SMILES'] = Chem.MolToSmiles(mols[n], isomericSmiles=False) |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
24 d['Index'] = int(n) |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
25 |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
26 df = df.append(d, ignore_index=True) |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
27 else: |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
28 print("Molecule could not be read - skipped.") |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
29 |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
30 df = df.astype({'Index': int}).set_index('Index') |
2
5981893078ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit a03b1b7b283901a1510562f1e6eba41f70afaac4"
bgruening
parents:
1
diff
changeset
|
31 sorted_cols = sorted(df.columns.values.tolist()) |
5981893078ad
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit a03b1b7b283901a1510562f1e6eba41f70afaac4"
bgruening
parents:
1
diff
changeset
|
32 df.to_csv(vars.out, sep='\t', header=vars.header, columns=sorted_cols) |
0
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
33 |
4
9a2b0af78abc
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents:
2
diff
changeset
|
34 |
0
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
35 def main(): |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
36 parser = argparse.ArgumentParser(description="Convert SDF to tabular") |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
37 parser.add_argument('--inp', '-i', help="The input file", required=True) |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
38 parser.add_argument('--out', '-o', help="The output file", required=True) |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
39 parser.add_argument('--props', '-p', help="Properties to filter (leave blank for all)", required=True) |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
40 parser.add_argument('--header', '-t', action='store_true', |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
41 help="Write property name as the first row.") |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
42 parser.add_argument('--smiles', '-s', action='store_true', |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
43 help="Include SMILES in output.") |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
44 parser.add_argument('--name', '-n', action='store_true', |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
45 help="Include molecule name in output.") |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
46 sdf_to_tab(parser.parse_args()) |
4
9a2b0af78abc
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
bgruening
parents:
2
diff
changeset
|
47 |
0
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
48 |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
49 if __name__ == "__main__": |
759010d2e9cd
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
bgruening
parents:
diff
changeset
|
50 main() |