Mercurial > repos > galaxyp > hirieftools
annotate align_dbspec.py @ 2:c093af6f2a6c draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
author | galaxyp |
---|---|
date | Fri, 01 Sep 2017 03:14:37 -0400 |
parents | |
children |
rev | line source |
---|---|
2
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
1 #!/usr/bin/env python |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
2 import sys |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
3 import os |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
4 import argparse |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
5 import re |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
6 from Bio import SeqIO |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
7 |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
8 |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
9 def create_spectra_maps(specfiles, dbfiles, frregex, firstfr): |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
10 """Output something like |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
11 {'fr01', 'fr04'} # Normal filename set |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
12 and |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
13 {'fr03': ['fr02', 'fr03']} # pool definition |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
14 and |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
15 {'fr04': 'fr04', 'fr04b': 'fr04'} # rerun fraction, rerun may also be pool |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
16 """ |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
17 specrange = get_fn_fractionmap(specfiles, frregex) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
18 to_pool = [] |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
19 poolmap, rerun_map, normal_fns = {}, [], set() |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
20 for i in range(0, len(dbfiles)): |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
21 num = i + firstfr |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
22 if num not in specrange: |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
23 to_pool.append(i) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
24 elif to_pool and num in specrange: |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
25 to_pool.append(i) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
26 poolmap[specrange[num][0]] = to_pool |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
27 to_pool = [] |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
28 if not to_pool and specrange[num][0] in poolmap: |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
29 if poolmap[specrange[num][0]][-1] != i: |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
30 normal_fns.add((dbfiles[num - 1], |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
31 specfiles[specrange[num][0]])) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
32 elif not to_pool: |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
33 normal_fns.add((dbfiles[num - 1], specfiles[specrange[num][0]])) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
34 for num in sorted(specrange.keys()): |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
35 if len(specrange[num]) > 1: |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
36 rerun_map.append(specrange[num]) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
37 return normal_fns, rerun_map, poolmap |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
38 |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
39 |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
40 def get_fn_fractionmap(files, frregex): |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
41 fnfrmap = {} |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
42 for f_ix, fn in enumerate(files): |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
43 fnum = int(re.sub(frregex, '\\1', fn)) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
44 try: |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
45 fnfrmap[fnum].append(f_ix) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
46 except KeyError: |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
47 fnfrmap[fnum] = [f_ix] |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
48 return fnfrmap |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
49 |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
50 |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
51 def pool_fasta_files(poolfiles): |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
52 acc_seq = {} |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
53 for fr in poolfiles: |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
54 for seq in SeqIO.parse(fr, 'fasta'): |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
55 sequence = str(seq.seq.upper()) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
56 try: |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
57 if sequence in acc_seq[seq.id]: |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
58 continue |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
59 except KeyError: |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
60 acc_seq[seq.id] = {sequence: 1} |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
61 yield seq |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
62 else: |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
63 acc_seq[seq.id][sequence] = 1 |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
64 yield seq |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
65 |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
66 |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
67 def write_pooled_fasta(poolmap, specnames, dbfiles): |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
68 """Runs through poolmap and pooles output files, filtering out |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
69 duplicates""" |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
70 for outfr, infrs in poolmap.items(): |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
71 outfn = os.path.join('aligned_out', os.path.basename(specnames[outfr])) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
72 print('Pooling FASTA files {} - {} into: {}'.format( |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
73 dbfiles[infrs[0]], dbfiles[infrs[-1]], outfn)) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
74 with open(outfn, 'w') as fp: |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
75 SeqIO.write(pool_fasta_files([dbfiles[x] for x in infrs]), fp, |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
76 'fasta') |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
77 |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
78 |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
79 def write_nonpooled_fasta(fractions): |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
80 """Symlinks nonpooled db files""" |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
81 print('Symlinking non-pooled non-rerun files', |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
82 [(fr[0], os.path.join('aligned_out', os.path.basename(fr[1]))) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
83 for fr in fractions]) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
84 [os.symlink(fr[0], os.path.join('aligned_out', os.path.basename(fr[1]))) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
85 for fr in fractions] |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
86 |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
87 |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
88 def copy_rerun_fasta(rerun_map, specnames): |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
89 for dst_indices in rerun_map: |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
90 src = os.path.join(specnames[dst_indices[0]]) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
91 for outfn in [specnames[x] for x in dst_indices[1:]]: |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
92 print('Symlinking {} to {}'.format(src, outfn)) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
93 os.symlink(src, os.path.join('aligned_out', outfn)) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
94 |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
95 |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
96 def main(): |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
97 args = parse_commandline() |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
98 with open(args.spectranames) as fp: |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
99 spectranames = [x.strip() for x in fp.read().strip().split('\n')] |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
100 vanilla_fr, rerun_map, poolmap = create_spectra_maps(spectranames, |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
101 args.dbfiles, |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
102 args.frspecregex, |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
103 args.firstfr) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
104 write_pooled_fasta(poolmap, spectranames, args.dbfiles) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
105 write_nonpooled_fasta(vanilla_fr) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
106 copy_rerun_fasta(rerun_map, spectranames) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
107 |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
108 |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
109 def parse_commandline(): |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
110 parser = argparse.ArgumentParser( |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
111 formatter_class=argparse.RawTextHelpFormatter) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
112 parser.add_argument('--specnames', dest='spectranames', help='File ' |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
113 'containing spectra filenames with fractions. ' |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
114 'Test data example illustrates reruns (fr03b, 09b) and' |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
115 ' pooled samples (fr05-09 are inside fr09 and fr09b).', |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
116 required=True) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
117 parser.add_argument('--dbfiles', dest='dbfiles', help='FASTA db files', |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
118 nargs='+', required=True) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
119 parser.add_argument('--frspec', dest='frspecregex', help='Fraction regex ' |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
120 'to detect spectra fraction numbers', required=True) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
121 parser.add_argument('--firstfr', dest='firstfr', help='First fraction nr', |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
122 type=int, required=True) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
123 return parser.parse_args(sys.argv[1:]) |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
124 |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
125 |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
126 if __name__ == '__main__': |
c093af6f2a6c
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
galaxyp
parents:
diff
changeset
|
127 main() |