Mercurial > repos > galaxyp > hirieftools
comparison peptide_pi_annotator.py @ 0:4e84bf65f99a draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
author | galaxyp |
---|---|
date | Mon, 22 May 2017 05:08:04 -0400 |
parents | |
children | 70757404c4f6 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4e84bf65f99a |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import re | |
4 import sys | |
5 import argparse | |
6 | |
7 | |
8 def main(): | |
9 if sys.argv[1:] == []: | |
10 sys.argv.append('-h') | |
11 args = parse_commandline() | |
12 strips = {} | |
13 for i, strip in enumerate(args.pipatterns): | |
14 strips[strip] = {'intercept': args.intercepts[i], | |
15 'fr_width': args.fr_width[i]} | |
16 with open(args.outpeptable, 'w') as fp: | |
17 for outline in annotate_peptable(args.pipeps, args.peptable, | |
18 args.pepcol, args.frac_col, | |
19 args.stripcol, strips, | |
20 args.ignoremods): | |
21 fp.write('\t'.join([str(x) for x in outline])) | |
22 fp.write('\n') | |
23 | |
24 | |
25 def get_first_matching_pattern(patterns, string): | |
26 for pattern in patterns: | |
27 if re.search(pattern, string): | |
28 return pattern | |
29 return False | |
30 | |
31 | |
32 def annotate_peptable(predicted_peps_fn, peptable, seqcol, frac_col, stripcol, | |
33 strips, ignoremods): | |
34 if frac_col > 0: | |
35 frac_col -= 1 | |
36 predicted_peps = {} | |
37 with open(predicted_peps_fn) as fp: | |
38 for line in fp: | |
39 line = line.strip('\n').split('\t') | |
40 predicted_peps[line[0]] = line[1] | |
41 not_predicted_count, predicted_count = 0, 0 | |
42 with open(peptable) as fp: | |
43 header = next(fp).strip('\n').split('\t') | |
44 yield header + ['Experimental pI', 'Predicted pI', 'Delta pI'] | |
45 for line in fp: | |
46 line = line.strip('\n').split('\t') | |
47 strip = strips[get_first_matching_pattern(strips.keys(), | |
48 line[stripcol - 1])] | |
49 exp_pi = (strip['fr_width'] * int(line[frac_col]) + | |
50 strip['intercept']) | |
51 | |
52 sequence = line[seqcol - 1] | |
53 for weight in ignoremods: | |
54 if weight == '*': | |
55 regex = '[+-]\d*\.\d*' | |
56 else: | |
57 regex = '[+-]{}'.format(weight) | |
58 sequence = re.sub(regex, '', sequence) | |
59 try: | |
60 pred_pi = float(predicted_peps[sequence]) | |
61 except KeyError: | |
62 print('CANNOT PREDICT', sequence) | |
63 not_predicted_count += 1 | |
64 pred_pi, delta_pi = 'NA', 'NA' | |
65 else: | |
66 delta_pi = exp_pi - pred_pi | |
67 predicted_count += 1 | |
68 yield line + [exp_pi, pred_pi, delta_pi] | |
69 print('Number of peptides without pI prediction: {}\n' | |
70 'Number of peptides with predicion: {}\n'.format(not_predicted_count, | |
71 predicted_count)) | |
72 | |
73 | |
74 def parse_commandline(): | |
75 parser = argparse.ArgumentParser( | |
76 formatter_class=argparse.RawTextHelpFormatter) | |
77 parser.add_argument('--out', dest='outpeptable', help='Output peptide ' | |
78 'table') | |
79 parser.add_argument('-p', dest='peptable', help='Peptide/PSM table with ' | |
80 'peptides, FDR, fraction numbers. Used to calculate' | |
81 'pI shift.') | |
82 parser.add_argument('-i', dest='pipeps', help='A tab-separated txt file ' | |
83 'with peptide seq, pI value') | |
84 parser.add_argument('--pepcol', dest='pepcol', help='Peptide sequence ' | |
85 'column number in peptide table. First column is 1.', | |
86 default=False, type=int) | |
87 parser.add_argument('--fraccol', dest='frac_col', help='Fraction number ' | |
88 'column number in peptide table. First column is 1.', | |
89 type=int) | |
90 parser.add_argument('--ignoremods', dest='ignoremods', help='Regex to ' | |
91 'identify modification weights to be ignored.', | |
92 default=[], nargs='+', type=str) | |
93 parser.add_argument('--stripcol', dest='stripcol', help='Strip name ' | |
94 'column number in peptide table. Will be used to ' | |
95 'detect strips if multiple are present using pattern ' | |
96 'passed with --strippatterns. First column is nr. 1.', | |
97 default=False, type=int) | |
98 parser.add_argument('--strippatterns', dest='pipatterns', | |
99 help='Patterns to detect different pI ranges from e.g.' | |
100 ' file name in peptide table', nargs='+') | |
101 parser.add_argument('--intercepts', dest='intercepts', | |
102 help='pI Intercept of strips', nargs='+', type=float) | |
103 parser.add_argument('--widths', dest='fr_width', nargs='+', | |
104 help='Strip fraction widths in pI', type=float) | |
105 return parser.parse_args(sys.argv[1:]) | |
106 | |
107 | |
108 if __name__ == '__main__': | |
109 main() |