Mercurial > repos > recetox > filter_compounds
comparison filter_compounds.py @ 0:987357c6941c draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/filter_compounds commit ad6ca51c5fee52d533cfd6b8ccef5e44027ed298"
| author | recetox |
|---|---|
| date | Thu, 07 Jan 2021 11:30:13 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:987357c6941c |
|---|---|
| 1 import argparse | |
| 2 import re | |
| 3 | |
| 4 from openbabel import openbabel, pybel | |
| 5 openbabel.obErrorLog.StopLogging() | |
| 6 | |
| 7 | |
| 8 def parse_command_line(): | |
| 9 parser = argparse.ArgumentParser() | |
| 10 parser.add_argument('-i', '--input', required=True, help='Input file name') | |
| 11 parser.add_argument('-o', '--output', required=True, help='Output file name') | |
| 12 parser.add_argument('-m', '--met', required=False, action='store_true', help='Remove organometallic compounds') | |
| 13 parser.add_argument('-a', '--anorg', required=False, action='store_true', help='Remove anorganic compounds') | |
| 14 return parser.parse_args() | |
| 15 | |
| 16 | |
| 17 def filter_compounds(args, pattern): | |
| 18 print(pattern) | |
| 19 with open(args.input, "r") as infile, open(args.output, "w") as outfile: | |
| 20 for line in infile: | |
| 21 values = line.split('\t', 1) | |
| 22 | |
| 23 # check if input is list of SMILES or indexed table of SMILES | |
| 24 if values[0].isnumeric(): | |
| 25 mol = pybel.readstring('smi', values[1]).write('inchi').split('/')[1] if values[1].strip() else '' | |
| 26 | |
| 27 # check if both organometallic and anorganic filtering passes | |
| 28 # write original line if compound is organic without metals | |
| 29 if False not in ([bool(re.search(rf'{x}', mol)) for x in pattern]): | |
| 30 outfile.write(line) | |
| 31 else: | |
| 32 outfile.write(f'{values[0]}\t{""}\n') | |
| 33 else: | |
| 34 mol = pybel.readstring('smi', values[0]).write('inchi').split('/')[1] | |
| 35 if False not in ([bool(re.search(rf'{x}', mol)) for x in pattern]): | |
| 36 outfile.write(line) | |
| 37 | |
| 38 | |
| 39 def __main__(): | |
| 40 """ | |
| 41 Filter organometallics and/or anorganic compounds. | |
| 42 """ | |
| 43 args = parse_command_line() | |
| 44 | |
| 45 # check if user selected something to filter out, if not output file == input file | |
| 46 sel_pattern = [] | |
| 47 if args.met is False and args.anorg is False: | |
| 48 print("No filtering selected - user did not specify what to filter out.") | |
| 49 sel_pattern = r'^[a-zA-Z]+$' | |
| 50 # select patterns for filtering | |
| 51 if args.met: | |
| 52 sel_pattern.append(r'^(?:C|N|O|P|F|S|I|B|Si|Se|Cl|Br|Li|Na|H|K|[0-9]|\.)+$') | |
| 53 if args.anorg: | |
| 54 sel_pattern.append(r'[C][^abd-z]') | |
| 55 | |
| 56 filter_compounds(args, sel_pattern) | |
| 57 | |
| 58 | |
| 59 if __name__ == "__main__": | |
| 60 __main__() |
