diff meta_proteome_analyzer.xml @ 1:b41e6d379c5f draft

Uploaded
author galaxyp
date Fri, 03 Mar 2017 14:57:59 -0500
parents c0abf055f6f1
children eeb17bb3d332
line wrap: on
line diff
--- a/meta_proteome_analyzer.xml	Fri Mar 03 11:44:29 2017 -0500
+++ b/meta_proteome_analyzer.xml	Fri Mar 03 14:57:59 2017 -0500
@@ -13,9 +13,10 @@
         mkdir -p output_dir;
         ## copy mpa conf dir to working dir
         jar_dir=`mpa-portable -get_jar_dir`;
-        cp -R jar_dir/conf .;
+        cp -R \$jar_dir/conf .;
 
         ## echo the search engines to run
+        #set $search_engines = str($search_engines_options.engines).split(',')
         echo "$search_engines_options.engines";
         echo "DB: ${input_database.display_name} sequences: ${input_database.metadata.sequences}";
 
@@ -36,10 +37,10 @@
             -database input_database.fasta
             -missed_cleav $missed_cleavages
             -prec_tol ${precursor_options.prec_tol}${precursor_options.prec_tol_units}
-            -frag_tol ${precursor_options.frag_tol}
-            -xtandem #if 'X!Tandem' in $search_engines_options.engines 1 else 0#
-            -comet #if 'Comet' in $search_engines_options.engines 1 else 0#
-            -msgf #if 'MSGF' in $search_engines_options.engines 1 else 0#
+            -frag_tol ${precursor_options.frag_tol}Da
+            -xtandem #if 'X!Tandem' in $search_engines then 1 else 0#
+            -comet #if 'Comet' in $search_engines then 1 else 0#
+            -msgf #if 'MSGF' in $search_engines then 1 else 0#
             -output_folder output_dir
             -threads "\${GALAXY_SLOTS:-12}"
          2> $temp_stderr) &&
@@ -83,6 +84,16 @@
             </param>
         </section>
 
+<!--
+-generate_metaproteins  Turn meta-protein generation (aka. protein grouping) on or off (1: on, 0: off, default is '1').
+-peptide_rule           The peptide rule chosen for meta-protein generation (-1: off, 0: share-one-peptide, 1: shared-peptide-subset, default is '0').
+-cluster_rule           The sequence cluster rule chosen for meta-protein generation (-1: off, 0: UniRef100, 1: UniRef90, 2: UniRef50, default is '-1').
+-taxonomy_rule          The taxonomy rule chosen for meta-protein generation (-1: off, 0: on superkingdom or lower, 1: on kingdom or lower, 2: on phylum or lower, 3: on class or lower, 4: on order or lower, 5: on family or lower, 6: on genus or lower, 7: on species or lower, 8: on subspecies, default is '-1').
+-iterative_search       Turn iterative (aka. two-step) searching on or off (1: on, 0: off, default is '0').
+-fdr_threshold          The applied FDR threshold for filtering the results (default is 0.05 == 5% FDR).
+-->
+
+
     </inputs>
     <outputs>
         <data format="tabular" name="output_proteins" from_work_dir="proteins.csv" label="${tool.name} on ${on_string}: proteins">
@@ -123,163 +134,47 @@
         </data>
     </outputs>
     <tests>
+        <test>
+            <param name="peak_lists" value="Test416Ebendorf.mgf" ftype="mgf"/>
+            <param name="input_database" value="searchdb.fa" ftype="fasta"/>
+            <param name="missed_cleavages" value="2"/>
+            <param name="prec_tol" value="ppm"/>
+            <param name="prec_tol" value="10"/>
+            <param name="frag_tol" value="0.5"/>
+            <param name="engines" value="X!Tandem,MSGF,Comet"/>
+            <output name="output_PSMs">
+               <assert_contents>
+                    <has_text text="A2SPK1" />
+                </assert_contents>
+            </output>
+        </test>
     </tests>
     <help>
 **What it does**
 
-Runs multiple search engines (X! Tandem, OMSSA and MS-GF+) on any number of MGF peak lists using the SearchGUI application and combines the results.
+=======
 
-http://compomics.github.io/projects/peptide-shaker.html
-http://compomics.github.io/projects/searchgui.html
+MetaProteomeAnalyzer (MPA) performs identification of proteins and in-depth analysis of metaproteomics (and also proteomics) data. The MPA software currently supports the database search engines Comet, MS-GF+ and X!Tandem taking MGF spectrum files as input data. User-provided FASTA databases (preferably downloaded from UniProtKB) are formatted automatically. 
+
+https://github.com/compomics/meta-proteome-analyzer
 
 ----
 
-Reports
+Outputs
 =======
 
-
-PSM Report
-----------
-
-* Protein(s):                Protein(s) to which the peptide can be attached
-* Sequence:                  Sequence of the peptide
-* Variable Modifications:   The variable modifications
-* D-score:	                D-score for variable PTM localization
-* probabilistic PTM score:	The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization.
-* Localization Confidence:	The confidence in variable PTM localization.
-* Fixed Modifications:	    The fixed modifications.
-* Spectrum File:	The spectrum file.
-* Spectrum Title:	The title of the spectrum.
-* Spectrum Scan Number:	The spectrum scan number.
-* RT:	Retention time
-* m/z:	Measured m/z
-* Measured Charge:	The charge as given in the spectrum file.
-* Identification Charge:	The charge as inferred by the search engine.
-* Theoretical Mass:	The theoretical mass of the peptide.
-* Isotope Number:	The isotope number targetted by the instrument.
-* Precursor m/z Error:	The precursor m/z matching error.
-* Score:	Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs).
-* Confidence:	Confidence in percent associated to the retained PSM.
-* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
-* Validation: Indicates the validation level of the protein group.
-
-
-Protein Report
---------------
-
-* Main Accession:	Main accession of the protein group.
-* Description:	Description of the protein designed by the main accession.
-* Gene Name:	The gene names of the Ensembl gene ID associated to the main accession.
-* Chromosome:	The chromosome of the Ensembl gene ID associated to the main accession.
-* PI:	Protein Inference status of the protein group.
-* Secondary Accessions:	Other accessions in the protein group (alphabetical order).
-* Protein Group:	The complete protein group (alphabetical order).
-* #Peptides:	Total number of peptides.
-* #Validated Peptides:	Number of validated peptides.
-* #Unique:	Total number of peptides unique to this protein group.
-* #PSMs:	Number of PSMs
-* #Validated PSMs:	Number of validated PSMs
-* Coverage (%):	Sequence coverage in percent of the protein designed by the main accession.
-* Possible Coverage (%):	Possible sequence coverage in percent of the protein designed by the main accession according to the search settings.
-* MW (kDa):	Molecular Weight.
-* Spectrum Counting NSAF: 	Normalized Spectrum Abundance Factor (NSAF)
-* Spectrum Counting emPAI:	exponentially modified Protein Abundance Index (emPAI)
-* Confident Modification Sites: Number of Confident Modification Sites	List of the sites where a variable modification was confidently localized.
-* Other Modification Sites: Number of other Modification Sites	List of the non*confident sites where a variable modification was localized.
-* Score:	Score of the protein group.
-* Confidence:	Confidence in percent associated to the protein group.
-* Decoy:	Indicates whether the protein group is a decoy (1: yes, 0: no).
-* Validation:	Indicates the validation level of the protein group.
-
-
-Peptide Report
---------------
-
+MPA generates 6 tabular outputs:
 
-* Protein(s):	Protein(s) to which this peptide can be attached.
-* AAs Before:	The amino-acids before the sequence.
-* Sequence:	Sequence of the peptide.
-* AAs After:	The amino-acids after the sequence.
-* Modified Sequence:	The peptide sequence annotated with variable modifications.
-* Variable Modifications:	The variable modifications.
-* Localization Confidence:	The confidence in PTMs localization.
-* Fixed Modifications:	The fixed modifications.
-* #Validated PSMs:	Number of validated PSMs.
-* #PSMs:	Number of PSMs.
-* Score:	Score of the peptide.
-* Confidence:	Confidence in percent associated to the peptide.
-* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
-* Validation:	Indicates the validation level of the protein group.
-
-
-Hierachical Report
-------------------
-
-* Main Accession:	Main accession of the protein group.
-* Description:	Description of the protein designed by the main accession.
-* PI:	Protein Inference status of the protein group.
-* Secondary Accessions:	Other accessions in the protein group (alphabetical order).
-* Protein Group:	The complete protein group (alphabetical order).
-* #Peptides:	Total number of peptides.
-* #Validated Peptides:	Number of validated peptides.
-* #Unique:	Total number of peptides unique to this protein group.
-* #PSMs:	Number of PSMs
-* #Validated PSMs:	Number of validated PSMs
-* Coverage (%):	Sequence coverage in percent of the protein designed by the main accession.
-* Possible Coverage (%):	Possible sequence coverage in percent of the protein designed by the main accession according to the search settings.
-* MW (kDa):	Molecular Weight.
-* Spectrum Counting NSAF: 	Normalized Spectrum Abundance Factor (NSAF)
-* Spectrum Counting emPAI:	exponentially modified Protein Abundance Index (emPAI)
-* Confident Modification Sites: # Confident Modification Sites	List of the sites where a variable modification was confidently localized.
-* Other Modification Sites: # Other Modification Sites	List of the non-confident sites where a variable modification was localized.
-* Score:	Score of the protein group.
-* Confidence:	Confidence in percent associated to the protein group.
-* Decoy:	Indicates whether the protein group is a decoy (1: yes, 0: no).
-* Validation:	Indicates the validation level of the protein group.
-* Protein(s):	Protein(s) to which this peptide can be attached.
-* AAs Before:	The amino-acids before the sequence.
-* Sequence:	Sequence of the peptide.
-* AAs After:	The amino-acids after the sequence.
-* Variable Modifications:	The variable modifications.
-* Localization Confidence:	The confidence in PTMs localization.
-* Fixed Modifications:	The fixed modifications.
-* #Validated PSMs:	Number of validated PSMs.
-* #PSMs:	Number of PSMs.
-* Score:	Score of the peptide.
-* Confidence:	Confidence in percent associated to the peptide.
-* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
-* Validation:	Indicates the validation level of the protein group.
-* Protein(s):	Protein(s) to which the peptide can be attached.
-* Sequence:	Sequence of the peptide.
-* Modified Sequence:	The peptide sequence annotated with variable modifications.
-* Variable Modifications:	The variable modifications.
-* D-score:	D-score for variable PTM localization.
-* probabilistic PTM score:	The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization.
-* Localization Confidence:	The confidence in variable PTM localization.
-* Fixed Modifications:	The fixed modifications.
-* Spectrum File:	The spectrum file.
-* Spectrum Title:	The title of the spectrum.
-* Spectrum Scan Number:	The spectrum scan number.
-* RT:	Retention time
-* m/z:	Measured m/z
-* Measured Charge:	The charge as given in the spectrum file.
-* Identification Charge:	The charge as inferred by the search engine.
-* Theoretical Mass:	The theoretical mass of the peptide.
-* Isotope Number:	The isotope number targetted by the instrument.
-* Precursor m/z Error:	The precursor m/z matching error.
-* Score:	Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs).
-* Confidence:	Confidence in percent associated to the retained PSM.
-* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
-* Validation:	Indicates the validation level of the protein group.
+* psms
+* peptides
+* proteins
+* spectrum_ids
+* metaproteins
+*metaprotein_taxa
 
 
 ------
 
-**Citation**
-
-To cite the underlying tools (PeptideShaker and SearchGUI) please refer to the list of papers at http://compomics.github.io
-
-If you use this tool in Galaxy, please cite Chilton J, Ira Cooke, Bjoern Gruening et al.
     </help>
     <citations>
         <citation type="doi">10.1021/pr501246w</citation>