Mercurial > repos > galaxyp > meta_proteome_analyzer
annotate meta_proteome_analyzer.xml @ 2:eeb17bb3d332 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
| author | galaxyp |
|---|---|
| date | Sat, 04 Mar 2017 06:59:41 -0500 |
| parents | b41e6d379c5f |
| children | 763d5a3cd2b9 |
| rev | line source |
|---|---|
| 0 | 1 <tool id="meta_proteome_analyzer" name="MetaProteomeAnalyzer" version="1.4.1"> |
| 2 <description> | |
| 3 functional and taxonomic characterization of proteins | |
| 4 </description> | |
| 5 <requirements> | |
| 6 <requirement type="package" version="1.4.1">mpa-portable</requirement> | |
| 7 </requirements> | |
| 8 <command> | |
| 9 <![CDATA[ | |
| 10 #set $temp_stderr = "mpa_stderr" | |
| 11 | |
|
2
eeb17bb3d332
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
galaxyp
parents:
1
diff
changeset
|
12 cwd=`pwd` && |
|
eeb17bb3d332
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
galaxyp
parents:
1
diff
changeset
|
13 mkdir -p output_dir && |
| 0 | 14 ## copy mpa conf dir to working dir |
|
2
eeb17bb3d332
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
galaxyp
parents:
1
diff
changeset
|
15 jar_dir=`mpa-portable -get_jar_dir` && |
|
eeb17bb3d332
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
galaxyp
parents:
1
diff
changeset
|
16 cp -R \$jar_dir/conf . && |
| 0 | 17 |
| 18 ## echo the search engines to run | |
| 1 | 19 #set $search_engines = str($search_engines_options.engines).split(',') |
|
2
eeb17bb3d332
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
galaxyp
parents:
1
diff
changeset
|
20 echo "$search_engines_options.engines" && |
|
eeb17bb3d332
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
galaxyp
parents:
1
diff
changeset
|
21 echo "DB: ${input_database.display_name} sequences: ${input_database.metadata.sequences}" && |
| 0 | 22 |
| 23 #for $mgf in $peak_lists: | |
| 24 #set $input_name = $mgf.display_name.split('/')[-1].replace(".mgf", "") + ".mgf" | |
|
2
eeb17bb3d332
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
galaxyp
parents:
1
diff
changeset
|
25 ln -s -f '${mgf}' '${input_name}' && |
| 0 | 26 #set $encoded_id = $__app__.security.encode_id($mgf.id) |
|
2
eeb17bb3d332
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
galaxyp
parents:
1
diff
changeset
|
27 echo "Spectrums:${mgf.display_name}(API:${encoded_id}) " && |
| 0 | 28 #end for |
|
2
eeb17bb3d332
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
galaxyp
parents:
1
diff
changeset
|
29 cp "${input_database}" input_database.fasta && |
| 0 | 30 |
| 31 ###################### | |
| 32 ## MPA ## | |
| 33 ###################### | |
| 34 (mpa-portable de.mpa.cli.CmdLineInterface -Djava.awt.headless=true -Xmx2048m | |
| 35 -spectrum_files \$cwd | |
| 36 -database input_database.fasta | |
| 37 -missed_cleav $missed_cleavages | |
| 38 -prec_tol ${precursor_options.prec_tol}${precursor_options.prec_tol_units} | |
| 1 | 39 -frag_tol ${precursor_options.frag_tol}Da |
| 40 -xtandem #if 'X!Tandem' in $search_engines then 1 else 0# | |
| 41 -comet #if 'Comet' in $search_engines then 1 else 0# | |
| 42 -msgf #if 'MSGF' in $search_engines then 1 else 0# | |
| 0 | 43 -output_folder output_dir |
| 44 -threads "\${GALAXY_SLOTS:-12}" | |
| 45 2> $temp_stderr) && | |
|
2
eeb17bb3d332
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
galaxyp
parents:
1
diff
changeset
|
46 mv ./output_dir/*_metaproteins.csv metaproteins.csv && |
|
eeb17bb3d332
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
galaxyp
parents:
1
diff
changeset
|
47 mv ./output_dir/*_metaprotein_taxa.csv metaprotein_taxa.csv && |
|
eeb17bb3d332
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
galaxyp
parents:
1
diff
changeset
|
48 mv ./output_dir/*_peptides.csv peptides.csv && |
|
eeb17bb3d332
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
galaxyp
parents:
1
diff
changeset
|
49 mv ./output_dir/*_proteins.csv proteins.csv && |
|
eeb17bb3d332
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
galaxyp
parents:
1
diff
changeset
|
50 mv ./output_dir/*_psms.csv psms.csv && |
|
eeb17bb3d332
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
galaxyp
parents:
1
diff
changeset
|
51 mv ./output_dir/*_spectrum_ids.csv spectrum_ids.csv && |
|
eeb17bb3d332
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
galaxyp
parents:
1
diff
changeset
|
52 exit_code_for_galaxy=\$? && |
|
eeb17bb3d332
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
galaxyp
parents:
1
diff
changeset
|
53 cat $temp_stderr 2>&1 && |
| 0 | 54 (exit \$exit_code_for_galaxy) |
| 55 | |
| 56 ]]> | |
| 57 </command> | |
| 58 <inputs> | |
| 59 <param format="fasta" name="input_database" type="data" label="Protein Database" | |
| 60 help="Select FASTA database from history"/> | |
| 61 <param name="peak_lists" format="mgf" type="data" multiple="true" label="Input Peak Lists (mgf)" | |
| 62 help="Select appropriate MGF dataset(s) from history" /> | |
| 63 <param name="missed_cleavages" type="integer" value="2" label="Maximum Missed Cleavages" | |
| 64 help="Allow peptides to contain up to this many missed enzyme cleavage sites."/> | |
| 65 <section name="precursor_options" expanded="false" title="Precursor Options"> | |
| 66 <param name="prec_tol_units" type="select" label="Precursor Ion Tolerance Units" | |
| 67 help="Select based on instrument used, as different machines provide different quality of spectra. ppm is a standard for most precursor ions"> | |
| 68 <option value="ppm">Parts per million (ppm)</option> | |
| 69 <option value="Da">Daltons</option> | |
| 70 </param> | |
| 71 <param name="prec_tol" type="float" value="10" label="Percursor Ion Tolerance" | |
| 72 help="Provide error value for precursor ion, based on instrument used. 10 ppm recommended for Orbitrap instrument"/> | |
| 73 <param name="frag_tol" type="float" value="0.5" label="Fragment Tolerance (Daltons)" | |
| 74 help="Provide error value for fragment ions, based on instrument used"/> | |
| 75 </section> | |
| 76 <!-- Search Engine Selection --> | |
| 77 <section name="search_engines_options" expanded="false" title="Search Engine Options"> | |
| 78 <param name="engines" type="select" display="checkboxes" multiple="True" label="DB-Search Engines"> | |
| 79 <help>Comet and Tide shouldn't both be selected since they use a similar algoritm.</help> | |
| 80 <option value="X!Tandem" selected="True">X!Tandem</option> | |
| 81 <option value="MSGF">MS-GF+</option> | |
| 82 <option value="Comet">Comet</option> | |
| 83 </param> | |
| 84 </section> | |
| 85 | |
| 1 | 86 <!-- |
| 87 -generate_metaproteins Turn meta-protein generation (aka. protein grouping) on or off (1: on, 0: off, default is '1'). | |
| 88 -peptide_rule The peptide rule chosen for meta-protein generation (-1: off, 0: share-one-peptide, 1: shared-peptide-subset, default is '0'). | |
| 89 -cluster_rule The sequence cluster rule chosen for meta-protein generation (-1: off, 0: UniRef100, 1: UniRef90, 2: UniRef50, default is '-1'). | |
| 90 -taxonomy_rule The taxonomy rule chosen for meta-protein generation (-1: off, 0: on superkingdom or lower, 1: on kingdom or lower, 2: on phylum or lower, 3: on class or lower, 4: on order or lower, 5: on family or lower, 6: on genus or lower, 7: on species or lower, 8: on subspecies, default is '-1'). | |
| 91 -iterative_search Turn iterative (aka. two-step) searching on or off (1: on, 0: off, default is '0'). | |
| 92 -fdr_threshold The applied FDR threshold for filtering the results (default is 0.05 == 5% FDR). | |
| 93 --> | |
| 94 | |
| 95 | |
| 0 | 96 </inputs> |
| 97 <outputs> | |
| 98 <data format="tabular" name="output_proteins" from_work_dir="proteins.csv" label="${tool.name} on ${on_string}: proteins"> | |
| 99 <actions> | |
| 100 <action name="comment_lines" type="metadata" default="1" /> | |
| 101 <action name="column_names" type="metadata" default="Protein_No,Protein_Accession,Protein_Description,Protein_Taxonomy,Sequence_Coverage,Peptide_Count,NSAF,emPAI,Spectral_Count,Isoelectric_Point,Molecular_Weight,Protein_Sequence,Peptides" /> | |
| 102 </actions> | |
| 103 </data> | |
| 104 <data format="tabular" name="output_peptides" from_work_dir="peptides.csv" label="${tool.name} on ${on_string}: peptides"> | |
| 105 <actions> | |
| 106 <action name="comment_lines" type="metadata" default="1" /> | |
| 107 <action name="column_names" type="metadata" default="Peptide_Num,Protein_Accessions,Peptide_Sequence,Protein_Count,Spectral_Count,Taxonomic_Group,Taxonomic_Rank,NCBI_Taxonomy_ID" /> | |
| 108 </actions> | |
| 109 </data> | |
| 110 <data format="tabular" name="output_PSMs" from_work_dir="psms.csv" label="${tool.name} on ${on_string}: PSMs"> | |
| 111 <actions> | |
| 112 <action name="comment_lines" type="metadata" default="1" /> | |
| 113 <action name="column_names" type="metadata" default="PSM_Num,Protein_Accessions,Peptide_Sequence,Spectrum_Title,Charge,Search_Engine,q-value,Score" /> | |
| 114 </actions> | |
| 115 </data> | |
| 116 <data format="tabular" name="output_spectrum_ids" from_work_dir="spectrum_ids.csv" label="${tool.name} on ${on_string}: spectrum_ids"> | |
| 117 <actions> | |
| 118 <action name="comment_lines" type="metadata" default="1" /> | |
| 119 <action name="column_names" type="metadata" default="Spectrum_Number,Spectrum_ID,Spectrum_Title,Peptides,Protein_Accessions" /> | |
| 120 </actions> | |
| 121 </data> | |
| 122 <data format="tabular" name="output_metaproteins" from_work_dir="metaproteins.csv" label="${tool.name} on ${on_string}: metaproteins"> | |
| 123 <actions> | |
| 124 <action name="comment_lines" type="metadata" default="1" /> | |
| 125 <action name="column_names" type="metadata" default="Meta-Protein_Num,Meta-Protein_Accession,Meta-Protein_Description,Meta-Protein_Taxonomy,Meta-Protein_UniRef100,Meta-Protein_UniRef90,Meta-Protein_UniRef50,Meta-Protein_KO,Meta-Protein_EC,Peptide_Count,Spectral_Count,Proteins,Peptides" /> | |
| 126 </actions> | |
| 127 </data> | |
| 128 <data format="tabular" name="output_metaprotein_taxa" from_work_dir="metaprotein_taxa.csv" label="${tool.name} on ${on_string}: metaprotein_taxa"> | |
| 129 <actions> | |
| 130 <action name="comment_lines" type="metadata" default="1" /> | |
| 131 <action name="column_names" type="metadata" default="Unclassified,Superkingdom,Kingdom,Phylum,Class,Order,Family,Genus,Species,Subspecies,Num_Peptides,Spectral_Count" /> | |
| 132 </actions> | |
| 133 </data> | |
| 134 </outputs> | |
| 135 <tests> | |
| 1 | 136 <test> |
| 137 <param name="peak_lists" value="Test416Ebendorf.mgf" ftype="mgf"/> | |
| 138 <param name="input_database" value="searchdb.fa" ftype="fasta"/> | |
| 139 <param name="missed_cleavages" value="2"/> | |
| 140 <param name="prec_tol" value="ppm"/> | |
| 141 <param name="prec_tol" value="10"/> | |
| 142 <param name="frag_tol" value="0.5"/> | |
| 143 <param name="engines" value="X!Tandem,MSGF,Comet"/> | |
| 144 <output name="output_PSMs"> | |
| 145 <assert_contents> | |
| 146 <has_text text="A2SPK1" /> | |
| 147 </assert_contents> | |
| 148 </output> | |
| 149 </test> | |
| 0 | 150 </tests> |
| 151 <help> | |
| 152 **What it does** | |
| 153 | |
| 1 | 154 ======= |
| 0 | 155 |
| 1 | 156 MetaProteomeAnalyzer (MPA) performs identification of proteins and in-depth analysis of metaproteomics (and also proteomics) data. The MPA software currently supports the database search engines Comet, MS-GF+ and X!Tandem taking MGF spectrum files as input data. User-provided FASTA databases (preferably downloaded from UniProtKB) are formatted automatically. |
| 157 | |
| 158 https://github.com/compomics/meta-proteome-analyzer | |
| 0 | 159 |
| 160 ---- | |
| 161 | |
| 1 | 162 Outputs |
| 0 | 163 ======= |
| 164 | |
| 1 | 165 MPA generates 6 tabular outputs: |
| 0 | 166 |
| 1 | 167 * psms |
| 168 * peptides | |
| 169 * proteins | |
| 170 * spectrum_ids | |
| 171 * metaproteins | |
|
2
eeb17bb3d332
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
galaxyp
parents:
1
diff
changeset
|
172 * metaprotein_taxa |
| 0 | 173 |
| 174 | |
| 175 ------ | |
| 176 | |
| 177 </help> | |
| 178 <citations> | |
| 179 <citation type="doi">10.1021/pr501246w</citation> | |
| 180 </citations> | |
| 181 </tool> |
