Mercurial > repos > galaxyp > hirieftools
annotate peptide_pi_annotator.py @ 1:70757404c4f6 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
| author | galaxyp | 
|---|---|
| date | Mon, 24 Jul 2017 05:25:05 -0400 | 
| parents | 4e84bf65f99a | 
| children | a6341e757422 | 
| rev | line source | 
|---|---|
| 
0
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
1 #!/usr/bin/env python | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
2 | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
3 import re | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
4 import sys | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
5 import argparse | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
6 | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
7 | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
8 def main(): | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
9 if sys.argv[1:] == []: | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
10 sys.argv.append('-h') | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
11 args = parse_commandline() | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
12 strips = {} | 
| 
1
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
13 if args.frac_col > 0: | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
14 frac_col = args.frac_col - 1 | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
15 elif args.frac_col: | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
16 frac_col = args.frac_col | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
17 elif args.frac_colpattern: | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
18 frac_col = get_col_by_pattern(args.peptable, args.frac_colpattern) | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
19 else: | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
20 raise RuntimeError('Must define fraction column') | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
21 if args.stripcol > 0: | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
22 stripcol = args.stripcol - 1 | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
23 elif args.stripcol: | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
24 stripcol = args.stripcol | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
25 elif args.stripcolpattern: | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
26 stripcol = get_col_by_pattern(args.peptable, args.stripcolpattern) | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
27 else: | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
28 raise RuntimeError('Must define strip column') | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
29 if args.pepcol: | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
30 pepcol = args.pepcol - 1 | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
31 elif args.pepcolpattern: | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
32 pepcol = get_col_by_pattern(args.peptable, args.pepcolpattern) | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
33 else: | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
34 raise RuntimeError('Must define peptide sequence column') | 
| 
0
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
35 for i, strip in enumerate(args.pipatterns): | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
36 strips[strip] = {'intercept': args.intercepts[i], | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
37 'fr_width': args.fr_width[i]} | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
38 with open(args.outpeptable, 'w') as fp: | 
| 
1
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
39 for outline in annotate_peptable(args.pipeps, args.peptable, pepcol, | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
40 frac_col, stripcol, strips, | 
| 
0
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
41 args.ignoremods): | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
42 fp.write('\t'.join([str(x) for x in outline])) | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
43 fp.write('\n') | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
44 | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
45 | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
46 def get_first_matching_pattern(patterns, string): | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
47 for pattern in patterns: | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
48 if re.search(pattern, string): | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
49 return pattern | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
50 return False | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
51 | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
52 | 
| 
1
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
53 def get_col_by_pattern(peptable, colpattern): | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
54 with open(peptable) as fp: | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
55 header = next(fp).strip('\n').split('\t') | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
56 for ix, field in enumerate(header): | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
57 if colpattern in field: | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
58 return ix | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
59 | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
60 | 
| 
0
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
61 def annotate_peptable(predicted_peps_fn, peptable, seqcol, frac_col, stripcol, | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
62 strips, ignoremods): | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
63 predicted_peps = {} | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
64 with open(predicted_peps_fn) as fp: | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
65 for line in fp: | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
66 line = line.strip('\n').split('\t') | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
67 predicted_peps[line[0]] = line[1] | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
68 not_predicted_count, predicted_count = 0, 0 | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
69 with open(peptable) as fp: | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
70 header = next(fp).strip('\n').split('\t') | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
71 yield header + ['Experimental pI', 'Predicted pI', 'Delta pI'] | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
72 for line in fp: | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
73 line = line.strip('\n').split('\t') | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
74 strip = strips[get_first_matching_pattern(strips.keys(), | 
| 
1
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
75 line[stripcol])] | 
| 
0
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
76 exp_pi = (strip['fr_width'] * int(line[frac_col]) + | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
77 strip['intercept']) | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
78 | 
| 
1
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
79 sequence = line[seqcol] | 
| 
0
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
80 for weight in ignoremods: | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
81 if weight == '*': | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
82 regex = '[+-]\d*\.\d*' | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
83 else: | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
84 regex = '[+-]{}'.format(weight) | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
85 sequence = re.sub(regex, '', sequence) | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
86 try: | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
87 pred_pi = float(predicted_peps[sequence]) | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
88 except KeyError: | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
89 print('CANNOT PREDICT', sequence) | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
90 not_predicted_count += 1 | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
91 pred_pi, delta_pi = 'NA', 'NA' | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
92 else: | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
93 delta_pi = exp_pi - pred_pi | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
94 predicted_count += 1 | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
95 yield line + [exp_pi, pred_pi, delta_pi] | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
96 print('Number of peptides without pI prediction: {}\n' | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
97 'Number of peptides with predicion: {}\n'.format(not_predicted_count, | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
98 predicted_count)) | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
99 | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
100 | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
101 def parse_commandline(): | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
102 parser = argparse.ArgumentParser( | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
103 formatter_class=argparse.RawTextHelpFormatter) | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
104 parser.add_argument('--out', dest='outpeptable', help='Output peptide ' | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
105 'table') | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
106 parser.add_argument('-p', dest='peptable', help='Peptide/PSM table with ' | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
107 'peptides, FDR, fraction numbers. Used to calculate' | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
108 'pI shift.') | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
109 parser.add_argument('-i', dest='pipeps', help='A tab-separated txt file ' | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
110 'with peptide seq, pI value') | 
| 
1
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
111 parser.add_argument('--pepcolpattern', dest='pepcolpattern', | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
112 help='Peptide sequence column pattern in peptide ' | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
113 'table.', default=False, type=str) | 
| 
0
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
114 parser.add_argument('--pepcol', dest='pepcol', help='Peptide sequence ' | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
115 'column number in peptide table. First column is 1.', | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
116 default=False, type=int) | 
| 
1
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
117 parser.add_argument('--fraccolpattern', dest='frac_colpattern', | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
118 help='Fraction number column pattern in peptide ' | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
119 'table.', default=False, type=str) | 
| 
0
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
120 parser.add_argument('--fraccol', dest='frac_col', help='Fraction number ' | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
121 'column number in peptide table. First column is 1.', | 
| 
1
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
122 default=False, type=int) | 
| 
0
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
123 parser.add_argument('--ignoremods', dest='ignoremods', help='Regex to ' | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
124 'identify modification weights to be ignored.', | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
125 default=[], nargs='+', type=str) | 
| 
1
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
126 parser.add_argument('--stripcolpattern', dest='stripcolpattern', | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
127 help='Strip name column pattern in peptide ' | 
| 
 
70757404c4f6
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
 
galaxyp 
parents: 
0 
diff
changeset
 | 
128 'table.', type=str, default=False) | 
| 
0
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
129 parser.add_argument('--stripcol', dest='stripcol', help='Strip name ' | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
130 'column number in peptide table. Will be used to ' | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
131 'detect strips if multiple are present using pattern ' | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
132 'passed with --strippatterns. First column is nr. 1.', | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
133 default=False, type=int) | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
134 parser.add_argument('--strippatterns', dest='pipatterns', | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
135 help='Patterns to detect different pI ranges from e.g.' | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
136 ' file name in peptide table', nargs='+') | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
137 parser.add_argument('--intercepts', dest='intercepts', | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
138 help='pI Intercept of strips', nargs='+', type=float) | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
139 parser.add_argument('--widths', dest='fr_width', nargs='+', | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
140 help='Strip fraction widths in pI', type=float) | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
141 return parser.parse_args(sys.argv[1:]) | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
142 | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
143 | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
144 if __name__ == '__main__': | 
| 
 
4e84bf65f99a
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
 
galaxyp 
parents:  
diff
changeset
 | 
145 main() | 
