Mercurial > repos > galaxyp > hirieftools
comparison peptide_pi_annotator.py @ 1:70757404c4f6 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
| author | galaxyp |
|---|---|
| date | Mon, 24 Jul 2017 05:25:05 -0400 |
| parents | 4e84bf65f99a |
| children | a6341e757422 |
comparison
equal
deleted
inserted
replaced
| 0:4e84bf65f99a | 1:70757404c4f6 |
|---|---|
| 8 def main(): | 8 def main(): |
| 9 if sys.argv[1:] == []: | 9 if sys.argv[1:] == []: |
| 10 sys.argv.append('-h') | 10 sys.argv.append('-h') |
| 11 args = parse_commandline() | 11 args = parse_commandline() |
| 12 strips = {} | 12 strips = {} |
| 13 if args.frac_col > 0: | |
| 14 frac_col = args.frac_col - 1 | |
| 15 elif args.frac_col: | |
| 16 frac_col = args.frac_col | |
| 17 elif args.frac_colpattern: | |
| 18 frac_col = get_col_by_pattern(args.peptable, args.frac_colpattern) | |
| 19 else: | |
| 20 raise RuntimeError('Must define fraction column') | |
| 21 if args.stripcol > 0: | |
| 22 stripcol = args.stripcol - 1 | |
| 23 elif args.stripcol: | |
| 24 stripcol = args.stripcol | |
| 25 elif args.stripcolpattern: | |
| 26 stripcol = get_col_by_pattern(args.peptable, args.stripcolpattern) | |
| 27 else: | |
| 28 raise RuntimeError('Must define strip column') | |
| 29 if args.pepcol: | |
| 30 pepcol = args.pepcol - 1 | |
| 31 elif args.pepcolpattern: | |
| 32 pepcol = get_col_by_pattern(args.peptable, args.pepcolpattern) | |
| 33 else: | |
| 34 raise RuntimeError('Must define peptide sequence column') | |
| 13 for i, strip in enumerate(args.pipatterns): | 35 for i, strip in enumerate(args.pipatterns): |
| 14 strips[strip] = {'intercept': args.intercepts[i], | 36 strips[strip] = {'intercept': args.intercepts[i], |
| 15 'fr_width': args.fr_width[i]} | 37 'fr_width': args.fr_width[i]} |
| 16 with open(args.outpeptable, 'w') as fp: | 38 with open(args.outpeptable, 'w') as fp: |
| 17 for outline in annotate_peptable(args.pipeps, args.peptable, | 39 for outline in annotate_peptable(args.pipeps, args.peptable, pepcol, |
| 18 args.pepcol, args.frac_col, | 40 frac_col, stripcol, strips, |
| 19 args.stripcol, strips, | |
| 20 args.ignoremods): | 41 args.ignoremods): |
| 21 fp.write('\t'.join([str(x) for x in outline])) | 42 fp.write('\t'.join([str(x) for x in outline])) |
| 22 fp.write('\n') | 43 fp.write('\n') |
| 23 | 44 |
| 24 | 45 |
| 27 if re.search(pattern, string): | 48 if re.search(pattern, string): |
| 28 return pattern | 49 return pattern |
| 29 return False | 50 return False |
| 30 | 51 |
| 31 | 52 |
| 53 def get_col_by_pattern(peptable, colpattern): | |
| 54 with open(peptable) as fp: | |
| 55 header = next(fp).strip('\n').split('\t') | |
| 56 for ix, field in enumerate(header): | |
| 57 if colpattern in field: | |
| 58 return ix | |
| 59 | |
| 60 | |
| 32 def annotate_peptable(predicted_peps_fn, peptable, seqcol, frac_col, stripcol, | 61 def annotate_peptable(predicted_peps_fn, peptable, seqcol, frac_col, stripcol, |
| 33 strips, ignoremods): | 62 strips, ignoremods): |
| 34 if frac_col > 0: | |
| 35 frac_col -= 1 | |
| 36 predicted_peps = {} | 63 predicted_peps = {} |
| 37 with open(predicted_peps_fn) as fp: | 64 with open(predicted_peps_fn) as fp: |
| 38 for line in fp: | 65 for line in fp: |
| 39 line = line.strip('\n').split('\t') | 66 line = line.strip('\n').split('\t') |
| 40 predicted_peps[line[0]] = line[1] | 67 predicted_peps[line[0]] = line[1] |
| 43 header = next(fp).strip('\n').split('\t') | 70 header = next(fp).strip('\n').split('\t') |
| 44 yield header + ['Experimental pI', 'Predicted pI', 'Delta pI'] | 71 yield header + ['Experimental pI', 'Predicted pI', 'Delta pI'] |
| 45 for line in fp: | 72 for line in fp: |
| 46 line = line.strip('\n').split('\t') | 73 line = line.strip('\n').split('\t') |
| 47 strip = strips[get_first_matching_pattern(strips.keys(), | 74 strip = strips[get_first_matching_pattern(strips.keys(), |
| 48 line[stripcol - 1])] | 75 line[stripcol])] |
| 49 exp_pi = (strip['fr_width'] * int(line[frac_col]) + | 76 exp_pi = (strip['fr_width'] * int(line[frac_col]) + |
| 50 strip['intercept']) | 77 strip['intercept']) |
| 51 | 78 |
| 52 sequence = line[seqcol - 1] | 79 sequence = line[seqcol] |
| 53 for weight in ignoremods: | 80 for weight in ignoremods: |
| 54 if weight == '*': | 81 if weight == '*': |
| 55 regex = '[+-]\d*\.\d*' | 82 regex = '[+-]\d*\.\d*' |
| 56 else: | 83 else: |
| 57 regex = '[+-]{}'.format(weight) | 84 regex = '[+-]{}'.format(weight) |
| 79 parser.add_argument('-p', dest='peptable', help='Peptide/PSM table with ' | 106 parser.add_argument('-p', dest='peptable', help='Peptide/PSM table with ' |
| 80 'peptides, FDR, fraction numbers. Used to calculate' | 107 'peptides, FDR, fraction numbers. Used to calculate' |
| 81 'pI shift.') | 108 'pI shift.') |
| 82 parser.add_argument('-i', dest='pipeps', help='A tab-separated txt file ' | 109 parser.add_argument('-i', dest='pipeps', help='A tab-separated txt file ' |
| 83 'with peptide seq, pI value') | 110 'with peptide seq, pI value') |
| 111 parser.add_argument('--pepcolpattern', dest='pepcolpattern', | |
| 112 help='Peptide sequence column pattern in peptide ' | |
| 113 'table.', default=False, type=str) | |
| 84 parser.add_argument('--pepcol', dest='pepcol', help='Peptide sequence ' | 114 parser.add_argument('--pepcol', dest='pepcol', help='Peptide sequence ' |
| 85 'column number in peptide table. First column is 1.', | 115 'column number in peptide table. First column is 1.', |
| 86 default=False, type=int) | 116 default=False, type=int) |
| 117 parser.add_argument('--fraccolpattern', dest='frac_colpattern', | |
| 118 help='Fraction number column pattern in peptide ' | |
| 119 'table.', default=False, type=str) | |
| 87 parser.add_argument('--fraccol', dest='frac_col', help='Fraction number ' | 120 parser.add_argument('--fraccol', dest='frac_col', help='Fraction number ' |
| 88 'column number in peptide table. First column is 1.', | 121 'column number in peptide table. First column is 1.', |
| 89 type=int) | 122 default=False, type=int) |
| 90 parser.add_argument('--ignoremods', dest='ignoremods', help='Regex to ' | 123 parser.add_argument('--ignoremods', dest='ignoremods', help='Regex to ' |
| 91 'identify modification weights to be ignored.', | 124 'identify modification weights to be ignored.', |
| 92 default=[], nargs='+', type=str) | 125 default=[], nargs='+', type=str) |
| 126 parser.add_argument('--stripcolpattern', dest='stripcolpattern', | |
| 127 help='Strip name column pattern in peptide ' | |
| 128 'table.', type=str, default=False) | |
| 93 parser.add_argument('--stripcol', dest='stripcol', help='Strip name ' | 129 parser.add_argument('--stripcol', dest='stripcol', help='Strip name ' |
| 94 'column number in peptide table. Will be used to ' | 130 'column number in peptide table. Will be used to ' |
| 95 'detect strips if multiple are present using pattern ' | 131 'detect strips if multiple are present using pattern ' |
| 96 'passed with --strippatterns. First column is nr. 1.', | 132 'passed with --strippatterns. First column is nr. 1.', |
| 97 default=False, type=int) | 133 default=False, type=int) |
