Mercurial > repos > bgruening > enumerate_charges
comparison sdf_to_tab.py @ 0:759010d2e9cd draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdkit commit 20df7e562341cd30e89a14d6bde9054956fadc06"
| author | bgruening |
|---|---|
| date | Tue, 10 Mar 2020 16:55:50 +0000 |
| parents | |
| children | d84dc786ccb9 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:759010d2e9cd |
|---|---|
| 1 #!/usr/bin/env python3 | |
| 2 import argparse | |
| 3 import pandas as pd | |
| 4 from rdkit import Chem | |
| 5 | |
| 6 def sdf_to_tab(vars): | |
| 7 mols = Chem.SDMolSupplier(vars.inp, sanitize=False) | |
| 8 df = pd.DataFrame() # for output | |
| 9 | |
| 10 for n in range(len(mols)): | |
| 11 if mols[n]: | |
| 12 d = mols[n].GetPropsAsDict() | |
| 13 # filter dict for desired props | |
| 14 if vars.props.strip() == '': # none specified, return all | |
| 15 d = {prop: val for (prop, val) in d.items() if not any(x in str(val) for x in ['\n', '\t'])} # remove items containing newlines or tabs | |
| 16 else: | |
| 17 d = {prop: val for (prop, val) in d.items() if prop in vars.props.replace(' ', '').split(',')} # remove items not requested via CLI | |
| 18 if vars.name: | |
| 19 d['Name'] = mols[n].GetProp('_Name') | |
| 20 if vars.smiles: | |
| 21 d['SMILES'] = Chem.MolToSmiles(mols[n], isomericSmiles=False) | |
| 22 d['Index'] = int(n) | |
| 23 | |
| 24 df = df.append(d, ignore_index=True) | |
| 25 else: | |
| 26 print("Molecule could not be read - skipped.") | |
| 27 | |
| 28 df = df.astype({'Index': int}).set_index('Index') | |
| 29 df.to_csv(vars.out, sep='\t', header=vars.header) | |
| 30 | |
| 31 def main(): | |
| 32 parser = argparse.ArgumentParser(description="Convert SDF to tabular") | |
| 33 parser.add_argument('--inp', '-i', help="The input file", required=True) | |
| 34 parser.add_argument('--out', '-o', help="The output file", required=True) | |
| 35 parser.add_argument('--props', '-p', help="Properties to filter (leave blank for all)", required=True) | |
| 36 parser.add_argument('--header', '-t', action='store_true', | |
| 37 help="Write property name as the first row.") | |
| 38 parser.add_argument('--smiles', '-s', action='store_true', | |
| 39 help="Include SMILES in output.") | |
| 40 parser.add_argument('--name', '-n', action='store_true', | |
| 41 help="Include molecule name in output.") | |
| 42 sdf_to_tab(parser.parse_args()) | |
| 43 | |
| 44 | |
| 45 if __name__ == "__main__": | |
| 46 main() |
