Mercurial > repos > galaxyp > hirieftools
comparison peptide_pi_annotator.py @ 0:4e84bf65f99a draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
| author | galaxyp |
|---|---|
| date | Mon, 22 May 2017 05:08:04 -0400 |
| parents | |
| children | 70757404c4f6 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4e84bf65f99a |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import re | |
| 4 import sys | |
| 5 import argparse | |
| 6 | |
| 7 | |
| 8 def main(): | |
| 9 if sys.argv[1:] == []: | |
| 10 sys.argv.append('-h') | |
| 11 args = parse_commandline() | |
| 12 strips = {} | |
| 13 for i, strip in enumerate(args.pipatterns): | |
| 14 strips[strip] = {'intercept': args.intercepts[i], | |
| 15 'fr_width': args.fr_width[i]} | |
| 16 with open(args.outpeptable, 'w') as fp: | |
| 17 for outline in annotate_peptable(args.pipeps, args.peptable, | |
| 18 args.pepcol, args.frac_col, | |
| 19 args.stripcol, strips, | |
| 20 args.ignoremods): | |
| 21 fp.write('\t'.join([str(x) for x in outline])) | |
| 22 fp.write('\n') | |
| 23 | |
| 24 | |
| 25 def get_first_matching_pattern(patterns, string): | |
| 26 for pattern in patterns: | |
| 27 if re.search(pattern, string): | |
| 28 return pattern | |
| 29 return False | |
| 30 | |
| 31 | |
| 32 def annotate_peptable(predicted_peps_fn, peptable, seqcol, frac_col, stripcol, | |
| 33 strips, ignoremods): | |
| 34 if frac_col > 0: | |
| 35 frac_col -= 1 | |
| 36 predicted_peps = {} | |
| 37 with open(predicted_peps_fn) as fp: | |
| 38 for line in fp: | |
| 39 line = line.strip('\n').split('\t') | |
| 40 predicted_peps[line[0]] = line[1] | |
| 41 not_predicted_count, predicted_count = 0, 0 | |
| 42 with open(peptable) as fp: | |
| 43 header = next(fp).strip('\n').split('\t') | |
| 44 yield header + ['Experimental pI', 'Predicted pI', 'Delta pI'] | |
| 45 for line in fp: | |
| 46 line = line.strip('\n').split('\t') | |
| 47 strip = strips[get_first_matching_pattern(strips.keys(), | |
| 48 line[stripcol - 1])] | |
| 49 exp_pi = (strip['fr_width'] * int(line[frac_col]) + | |
| 50 strip['intercept']) | |
| 51 | |
| 52 sequence = line[seqcol - 1] | |
| 53 for weight in ignoremods: | |
| 54 if weight == '*': | |
| 55 regex = '[+-]\d*\.\d*' | |
| 56 else: | |
| 57 regex = '[+-]{}'.format(weight) | |
| 58 sequence = re.sub(regex, '', sequence) | |
| 59 try: | |
| 60 pred_pi = float(predicted_peps[sequence]) | |
| 61 except KeyError: | |
| 62 print('CANNOT PREDICT', sequence) | |
| 63 not_predicted_count += 1 | |
| 64 pred_pi, delta_pi = 'NA', 'NA' | |
| 65 else: | |
| 66 delta_pi = exp_pi - pred_pi | |
| 67 predicted_count += 1 | |
| 68 yield line + [exp_pi, pred_pi, delta_pi] | |
| 69 print('Number of peptides without pI prediction: {}\n' | |
| 70 'Number of peptides with predicion: {}\n'.format(not_predicted_count, | |
| 71 predicted_count)) | |
| 72 | |
| 73 | |
| 74 def parse_commandline(): | |
| 75 parser = argparse.ArgumentParser( | |
| 76 formatter_class=argparse.RawTextHelpFormatter) | |
| 77 parser.add_argument('--out', dest='outpeptable', help='Output peptide ' | |
| 78 'table') | |
| 79 parser.add_argument('-p', dest='peptable', help='Peptide/PSM table with ' | |
| 80 'peptides, FDR, fraction numbers. Used to calculate' | |
| 81 'pI shift.') | |
| 82 parser.add_argument('-i', dest='pipeps', help='A tab-separated txt file ' | |
| 83 'with peptide seq, pI value') | |
| 84 parser.add_argument('--pepcol', dest='pepcol', help='Peptide sequence ' | |
| 85 'column number in peptide table. First column is 1.', | |
| 86 default=False, type=int) | |
| 87 parser.add_argument('--fraccol', dest='frac_col', help='Fraction number ' | |
| 88 'column number in peptide table. First column is 1.', | |
| 89 type=int) | |
| 90 parser.add_argument('--ignoremods', dest='ignoremods', help='Regex to ' | |
| 91 'identify modification weights to be ignored.', | |
| 92 default=[], nargs='+', type=str) | |
| 93 parser.add_argument('--stripcol', dest='stripcol', help='Strip name ' | |
| 94 'column number in peptide table. Will be used to ' | |
| 95 'detect strips if multiple are present using pattern ' | |
| 96 'passed with --strippatterns. First column is nr. 1.', | |
| 97 default=False, type=int) | |
| 98 parser.add_argument('--strippatterns', dest='pipatterns', | |
| 99 help='Patterns to detect different pI ranges from e.g.' | |
| 100 ' file name in peptide table', nargs='+') | |
| 101 parser.add_argument('--intercepts', dest='intercepts', | |
| 102 help='pI Intercept of strips', nargs='+', type=float) | |
| 103 parser.add_argument('--widths', dest='fr_width', nargs='+', | |
| 104 help='Strip fraction widths in pI', type=float) | |
| 105 return parser.parse_args(sys.argv[1:]) | |
| 106 | |
| 107 | |
| 108 if __name__ == '__main__': | |
| 109 main() |
