comparison peptide_pi_annotator.py @ 0:4e84bf65f99a draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
author galaxyp
date Mon, 22 May 2017 05:08:04 -0400
parents
children 70757404c4f6
comparison
equal deleted inserted replaced
-1:000000000000 0:4e84bf65f99a
1 #!/usr/bin/env python
2
3 import re
4 import sys
5 import argparse
6
7
8 def main():
9 if sys.argv[1:] == []:
10 sys.argv.append('-h')
11 args = parse_commandline()
12 strips = {}
13 for i, strip in enumerate(args.pipatterns):
14 strips[strip] = {'intercept': args.intercepts[i],
15 'fr_width': args.fr_width[i]}
16 with open(args.outpeptable, 'w') as fp:
17 for outline in annotate_peptable(args.pipeps, args.peptable,
18 args.pepcol, args.frac_col,
19 args.stripcol, strips,
20 args.ignoremods):
21 fp.write('\t'.join([str(x) for x in outline]))
22 fp.write('\n')
23
24
25 def get_first_matching_pattern(patterns, string):
26 for pattern in patterns:
27 if re.search(pattern, string):
28 return pattern
29 return False
30
31
32 def annotate_peptable(predicted_peps_fn, peptable, seqcol, frac_col, stripcol,
33 strips, ignoremods):
34 if frac_col > 0:
35 frac_col -= 1
36 predicted_peps = {}
37 with open(predicted_peps_fn) as fp:
38 for line in fp:
39 line = line.strip('\n').split('\t')
40 predicted_peps[line[0]] = line[1]
41 not_predicted_count, predicted_count = 0, 0
42 with open(peptable) as fp:
43 header = next(fp).strip('\n').split('\t')
44 yield header + ['Experimental pI', 'Predicted pI', 'Delta pI']
45 for line in fp:
46 line = line.strip('\n').split('\t')
47 strip = strips[get_first_matching_pattern(strips.keys(),
48 line[stripcol - 1])]
49 exp_pi = (strip['fr_width'] * int(line[frac_col]) +
50 strip['intercept'])
51
52 sequence = line[seqcol - 1]
53 for weight in ignoremods:
54 if weight == '*':
55 regex = '[+-]\d*\.\d*'
56 else:
57 regex = '[+-]{}'.format(weight)
58 sequence = re.sub(regex, '', sequence)
59 try:
60 pred_pi = float(predicted_peps[sequence])
61 except KeyError:
62 print('CANNOT PREDICT', sequence)
63 not_predicted_count += 1
64 pred_pi, delta_pi = 'NA', 'NA'
65 else:
66 delta_pi = exp_pi - pred_pi
67 predicted_count += 1
68 yield line + [exp_pi, pred_pi, delta_pi]
69 print('Number of peptides without pI prediction: {}\n'
70 'Number of peptides with predicion: {}\n'.format(not_predicted_count,
71 predicted_count))
72
73
74 def parse_commandline():
75 parser = argparse.ArgumentParser(
76 formatter_class=argparse.RawTextHelpFormatter)
77 parser.add_argument('--out', dest='outpeptable', help='Output peptide '
78 'table')
79 parser.add_argument('-p', dest='peptable', help='Peptide/PSM table with '
80 'peptides, FDR, fraction numbers. Used to calculate'
81 'pI shift.')
82 parser.add_argument('-i', dest='pipeps', help='A tab-separated txt file '
83 'with peptide seq, pI value')
84 parser.add_argument('--pepcol', dest='pepcol', help='Peptide sequence '
85 'column number in peptide table. First column is 1.',
86 default=False, type=int)
87 parser.add_argument('--fraccol', dest='frac_col', help='Fraction number '
88 'column number in peptide table. First column is 1.',
89 type=int)
90 parser.add_argument('--ignoremods', dest='ignoremods', help='Regex to '
91 'identify modification weights to be ignored.',
92 default=[], nargs='+', type=str)
93 parser.add_argument('--stripcol', dest='stripcol', help='Strip name '
94 'column number in peptide table. Will be used to '
95 'detect strips if multiple are present using pattern '
96 'passed with --strippatterns. First column is nr. 1.',
97 default=False, type=int)
98 parser.add_argument('--strippatterns', dest='pipatterns',
99 help='Patterns to detect different pI ranges from e.g.'
100 ' file name in peptide table', nargs='+')
101 parser.add_argument('--intercepts', dest='intercepts',
102 help='pI Intercept of strips', nargs='+', type=float)
103 parser.add_argument('--widths', dest='fr_width', nargs='+',
104 help='Strip fraction widths in pI', type=float)
105 return parser.parse_args(sys.argv[1:])
106
107
108 if __name__ == '__main__':
109 main()