Mercurial > repos > galaxyp > openms_idposteriorerrorprobability
annotate test-data/examples/simulation/FASTAProteinAbundanceSampling.py @ 18:6daaa75ccb99 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
| author | galaxyp |
|---|---|
| date | Sun, 13 Dec 2020 15:03:50 +0000 |
| parents | |
| children |
| rev | line source |
|---|---|
|
18
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
1 # -------------------------------------------------------------------------- |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
2 # OpenMS -- Open-Source Mass Spectrometry |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
3 # -------------------------------------------------------------------------- |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
4 # Copyright The OpenMS Team -- Eberhard Karls University Tuebingen, |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
5 # ETH Zurich, and Freie Universitaet Berlin 2002-2020. |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
6 # |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
7 # This software is released under a three-clause BSD license: |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
8 # * Redistributions of source code must retain the above copyright |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
9 # notice, this list of conditions and the following disclaimer. |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
10 # * Redistributions in binary form must reproduce the above copyright |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
11 # notice, this list of conditions and the following disclaimer in the |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
12 # documentation and/or other materials provided with the distribution. |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
13 # * Neither the name of any author or any participating institution |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
14 # may be used to endorse or promote products derived from this software |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
15 # without specific prior written permission. |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
16 # For a full list of authors, refer to the file AUTHORS. |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
17 # -------------------------------------------------------------------------- |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
19 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
20 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
21 # ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
22 # INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
23 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
24 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
25 # OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
26 # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
27 # OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
28 # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
29 # |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
30 # -------------------------------------------------------------------------- |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
31 # $Maintainer: Chris Bielow $ |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
32 # $Authors: Chris Bielow $ |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
33 # -------------------------------------------------------------------------- |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
34 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
35 import re |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
36 import random |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
37 import math |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
38 import sys |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
39 import argparse |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
40 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
41 ## holds FASTA header + sequence |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
42 class FASTAEntry: |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
43 pass |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
44 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
45 ## grab entries from FASTA file |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
46 def nextEntry(fileobj): |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
47 entry = FASTAEntry() |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
48 entry.header = fileobj.readline() |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
49 entry.sequence = "" |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
50 for line in fileobj: |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
51 if '>' == line[0]: |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
52 yield entry |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
53 entry.header = line |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
54 entry.sequence = "" |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
55 else: |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
56 entry.sequence += line |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
57 yield entry |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
58 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
59 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
60 ## sample abundance from Gaussian in log space |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
61 def sampleAbundance(mu=3, sigma=1): |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
62 return math.exp(random.gauss(mu, sigma)) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
63 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
64 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
65 def main(argv): |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
66 ## we use ArgumentParser, which requires 2.7 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
67 if sys.version_info < (2, 7): |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
68 raise "This script requires python 2.7 or greater" |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
69 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
70 ## add weight filtering functionality if BioPython is available |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
71 try: |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
72 from Bio.SeqUtils.ProtParam import ProteinAnalysis |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
73 has_biopython = 1 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
74 except : |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
75 has_biopython = 0 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
76 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
77 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
78 parser = argparse.ArgumentParser(description='Add abundance to FASTA files.') |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
79 parser.add_argument('infile', type=argparse.FileType('r'), help='Input FASTA file') |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
80 parser.add_argument('outfile', type=argparse.FileType('w'), help='Output FASTA file') |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
81 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
82 parser.add_argument('--mu', dest='mu', action='store', default=3, help='mean of gaussian in log space') |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
83 parser.add_argument('--sigma', dest='sigma', action='store', default=1, help='sd of gaussian in log space') |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
84 parser.add_argument('--sample', dest='sample', action='store', default=0, help='Number of entries to keep (for sampling a bigger FASTA file)') |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
85 parser.add_argument('--random', dest='random', action='store_true', help='Randomly shuffle entries before sampling (only if --sample is given). If not given, the first \'X\' samples are used.') |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
86 if (has_biopython): |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
87 parser.add_argument('--weight_low', dest='weight_low', action='store', default=0, help='minimum molecular weight of protein') |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
88 parser.add_argument('--weight_up', dest='weight_up', action='store', default=0, help='Maximum molecular weight of protein (use 0 for unlimited)') |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
89 else: |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
90 print ("Warning: protein weight filtering not supported, as BioPython module is not installed.") |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
91 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
92 ## argument parsing |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
93 args = parser.parse_args() |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
94 fileobj = args.infile |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
95 fileoutobj = args.outfile |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
96 sample_size = int(args.sample) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
97 sample_random = bool(args.random) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
98 if (has_biopython): |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
99 weight_low = float(args.weight_low) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
100 weight_up = float(args.weight_up) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
101 if (weight_up <= 0): weight_up = sys.float_info.max |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
102 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
103 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
104 ## list of final entries |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
105 fasta_entries = [] |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
106 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
107 for entry in nextEntry(fileobj): |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
108 header = entry.header |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
109 ## check if it contains 'intensity'? |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
110 rep = re.compile(r"\[# *(.*) *#\]") |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
111 m = rep.search(header) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
112 header_new = "" |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
113 other = [] |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
114 if (m): |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
115 header_new = header.replace(m.group(0), "") ## delete meta |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
116 for element in m.group(1).split(','): |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
117 #print "element:", element |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
118 if (element.find("intensity") == -1): |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
119 other.append(element) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
120 else: |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
121 header_new = header ## nothing to replace |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
122 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
123 ## create new metainfo array |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
124 i = "intensity=" + str(sampleAbundance(float(args.mu), float(args.sigma))) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
125 other.append(i) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
126 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
127 entry.header = header_new.rstrip() + "[# " + (", ").join(other) + " #]" |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
128 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
129 if (has_biopython): |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
130 sequence = "".join(entry.sequence.split("\n")) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
131 ## |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
132 ## BioPython does not like some AA letters - they need replacement |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
133 ## |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
134 ## replace "U" (Selenocystein) with "C" (Cystein) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
135 sequence = sequence.replace("U","C") |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
136 ## replace "X" (unknown) with "P" (Proline) [arbitrary choice - but weight of 115 is very close to averagine] |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
137 sequence = sequence.replace("X","P") |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
138 ## replace "B" (Asparagine or aspartic acid) with "N" (Asparagine) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
139 sequence = sequence.replace("B","N") |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
140 ## replace "Z" (Glutamine or glutamic acid) with "Q" (Glutamine) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
141 sequence = sequence.replace("Z","Q") |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
142 ## replace "Z" (Glutamine or glutamic acid) with "Q" (Glutamine) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
143 sequence = sequence.replace("Z","Q") |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
144 ## replace "J" (Leucine or Isoleucine) with "L" (Leucine) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
145 sequence = sequence.replace("J","L") |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
146 analysed_seq = ProteinAnalysis(sequence) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
147 weight = analysed_seq.molecular_weight() |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
148 if (not(weight_low <= weight and weight <= weight_up)): |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
149 continue |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
150 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
151 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
152 fasta_entries.append(entry.header + "\n" + entry.sequence) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
153 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
154 ## only read to sample size (the rest is thrown away anyways) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
155 if (sample_size > 0 and not(sample_random)): |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
156 if (len(fasta_entries) >= sample_size): |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
157 break |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
158 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
159 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
160 ## select subset (if required) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
161 if (sample_size > 0): |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
162 indices = range(0,len(fasta_entries)) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
163 ## random sampling only makes sense if we take a subset |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
164 if (sample_random and sample_size < len(fasta_entries)): |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
165 random.shuffle(indices) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
166 indices = [indices[i] for i in range(0,sample_size)] |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
167 fasta_entries = [fasta_entries[i] for i in indices] |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
168 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
169 ## write to file |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
170 for entry in fasta_entries: |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
171 fileoutobj.write(entry) |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
172 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
173 print ("Generated " + str(len(fasta_entries)) + " protein sequences") |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
174 |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
175 if __name__ == "__main__": |
|
6daaa75ccb99
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit 6e7368b7f178fbd1f08c28eea1b538add6943a65-dirty"
galaxyp
parents:
diff
changeset
|
176 main(sys.argv) |
