Mercurial > repos > galaxyp > diann

diff diann.xml @ 2:65134626eb2b draft
planemo upload for repository https://github.com/vdemichev/DiaNN commit 005bb8d9fa17c3f28b960d99b23947841e06d90a
author: galaxyp
date: Mon, 31 Mar 2025 19:30:22 +0000
parents: c9228a392c74
children: a8f461b7d193
--- a/diann.xml	Mon Oct 02 14:15:30 2023 +0000
+++ b/diann.xml	Mon Mar 31 19:30:22 2025 +0000
@@ -1,12 +1,15 @@
-<tool id="diann" name="DIA-NN" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
+<tool id="diann" name="DIA-NN" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.2">
     <description>is a software for DIA/SWATH data processing</description>
     <macros>
         <token name="@TOOL_VERSION@">1.8.1</token>
-        <token name="@VERSION_SUFFIX@">1</token>
+        <token name="@VERSION_SUFFIX@">4</token>
     </macros>
     <requirements>
-        <container type="docker">biocontainers/diann:v@TOOL_VERSION@_cv1</container>
+        <container type="docker">biocontainers/diann:@TOOL_VERSION@_cv2</container>
     </requirements>
+    <stdio>
+        <regex source="stdout" match="invalid raw MS data format" level="fatal" description="One of the inputs could not be loaded"/>
+    </stdio>
     <command detect_errors="exit_code"><![CDATA[
         mkdir ./input_data &&
         mkdir ./tmp &&
@@ -17,10 +20,11 @@
             ## if brukertdf.d.tar, extract to ./input_data and append "./input_data/" to infiles_str
             #if $infile.is_of_type("brukertdf.d.tar"):
                 tar -xf '$infile' -C ./input_data &&
-                #set $infiles_str += '--f ./input_data/' + str($infile.element_identifier[:-4]) + ' '
+                #set $infiles_str += '--f ./input_data/' + str($infile.element_identifier[:-4])
             #else
-                ln -s '$infile' './input_data/$infile.element_identifier' &&
-                #set $infiles_str += '--f ./input_data/' + str($infile.element_identifier)
+                #set name = './input_data/' + $infile.element_identifier + '.' + $infile.ext
+                ln -s '$infile' '$name' &&
+                #set $infiles_str += '--f ' + $name + ' '
             #end if
         #end for
 
@@ -29,13 +33,17 @@
             #set $fasta_file_str += '--fasta ' + str($fasta_file) + ' '
         #end for
 
+        #if $input.spectral_lib_options.lib
+            ln -s '$input.spectral_lib_options.lib' './input_data/report-lib.predicted.speclib' &&
+        #end if
+
         diann
             #if $input.f != 'None'
             '$infiles_str'
             #end if
             --dir ./
             #if $input.spectral_lib_options.lib
-            --lib '$input.spectral_lib_options.lib'
+            --lib './input_data/report-lib.predicted.speclib'
             #else
             --lib
             #end if
@@ -60,12 +68,12 @@
             $input.spectral_lib_options.out_measured_rt
             $input.spectral_lib_options.predictor
             $input.spectral_lib_options.reannotate
-            #if $input.spectral_lib_options.ref
-            --ref '$input.spectral_lib_options.ref'
-            #end if
+            ##if $input.spectral_lib_options.ref
+            ##--ref '$input.spectral_lib_options.ref'
+            ###end if
 
             ## FASTA database options
-            #if $fasta_file_str != 'None'
+            #if $fasta_file_str != '--fasta None '
             '$fasta_file_str'
             #end if
             #if $input.fasta_db_options.fasta_filter
@@ -84,7 +92,7 @@
             #if $algo_options.mass_acc_ms1
             --mass-acc-ms1 '$algo_options.mass_acc_ms1'
             #end if
-            $algo_options.quick_mass_acc
+            ##$algo_options.quick_mass_acc
             $algo_options.reanalyse
             $algo_options.mbr_fix_settings
             $algo_options.relaxed_prot_inf
@@ -135,9 +143,9 @@
 
             ## Channel options
             ## ToDo: no_decoy_channel as condition?
-            #if $channel_options.channels
-            --channels '$channel_options.channels'
-            #end if
+            ##if $channel_options.channels
+            ##--channels '$channel_options.channels'
+            ###end if
             #if $channel_options.decoy_channel
             --decoy-channel '$channel_options.decoy_channel'
             #end if
@@ -191,14 +199,14 @@
             $other_options.full_unimod
             $other_options.gen_fr_restriction
             $other_options.global_mass_cal
-            $other_options.il_eq
+            ##$other_options.il_eq
             $other_options.individual_mass_acc
             $other_options.individual_reports
             $other_options.individual_windows
             $other_options.no_isotopes
             $other_options.regular_swath
             $other_options.scanning_swath
-            $other_options.semi
+            ##$other_options.semi
             $other_options.species_genes
             $other_options.tims_skip_errors
 
@@ -234,22 +242,22 @@
     <inputs>
         <!-- not used: cfg, convert, dir, ext, no_quant_files, out, out_lib, out_lib_copy, prefix, temp, threads, use-quant -->
         <section name="input" expanded="true" title="Input files">
-            <param name="f" type="data" format="mzml,dia,wiff,thermo.raw,brukertdf.d.tar" multiple="true" optional="true" label="Input file" help="Specify a run to be analysed"/>
+            <param name="f" type="data" format="mzml,wiff,thermo.raw,brukertdf.d.tar" multiple="true" optional="true" label="Input file" help="Specify a run to be analysed"/>
             <!-- ToDo lib: The use of multiple lib commands (experimental) allows to load multiple libraries in .tsv format -->
             <section name="spectral_lib_options" title="Spectral library">
                 <param name="gen_spec_lib" type="boolean" truevalue="--gen-spec-lib" falsevalue="" checked="false" label="Generate a spectral library" help="Instructs DIA-NN to generate a spectral library"/>
                 <param name="predictor" type="boolean" truevalue="--predictor" falsevalue="" checked="false" label="Perform deep learning-based prediction of spectra, retention times and ion mobility values" help="Instructs DIA-NN to perform deep learning-based prediction of spectra, retention times and ion mobility values"/>
-                <param name="lib" type="data" format="csv,tsv,xls,txt,binary,speclib,sptxt,msp" optional="true" label="Spectral library" help="Specify a spectral library"/>
+<!--                Removed sptxt,msp as input format > experimental feature-->
+                <param name="lib" type="data" format="csv,tsv,xls,txt,binary" optional="true" label="Spectral library" help="Specify a spectral library"/>
                 <param name="library_headers" type="text" optional="true" label="Library headers" help="Specifies column names in the spectral library to be used, in the order described in Spectral library formats [name 1],[name 2],.... Use '*' instead of the column name if DIA-NN already recognizes its name"/>
                 <param name="no_lib_filter" type="boolean" truevalue="--no-lib-filter" falsevalue="" checked="false" label="Use the input library 'as is'" help="The input library will be used 'as is' without discarding fragments that might be harmful for the analysis; use with caution"/>
                 <param name="learn_lib" type="data" format="tsv" optional="true" label="Training library" help="Specifies a 'training library' for the legacy predictor"/>
                 <param name="out_measured_rt" type="boolean" truevalue="--out-measured-rt" falsevalue="" checked="false" label="Save raw empirical retention times in the spectral library" help="Instructs DIA-NN to save raw empirical retention times in the spectral library being generated, instead of saving RTs aligned to a particular scale"/>
                 <param name="reannotate" type="boolean" truevalue="--reannotate" falsevalue="" checked="false" label="Reannotate the spectral library with protein information from the FASTA database" help="Reannotate the spectral library with protein information from the FASTA database, using the specified digest specificity"/>
-                <param name="ref" type="text" optional="true" label="Reference run" help="(Experimental) Specify a special (small) spectral library which will be used exclusively for calibration - this function can speed up calibration in library-free searches"/>
+<!--                <param name="ref" type="text" optional="true" label="Reference run" help="(Experimental) Specify a special (small) spectral library which will be used exclusively for calibration - this function can speed up calibration in library-free searches"/>-->
             </section>
             <section name="fasta_db_options" title="FASTA database">
-                <!-- ToDo fasta: use multiple fasta commands to specify multiple databases -->
-                <param name="fasta" type="data" format="fasta" multiple="true" label="Sequence database" help="Specify a sequence database in FASTA format"/>
+                <param name="fasta" type="data" format="fasta" optional="true" multiple="true" label="Sequence database" help="Specify a sequence database in FASTA format"/>
                 <param name="fasta_filter" type="data" format="txt" optional="true" label="Fasta filter" help="Only consider peptides matching the stripped sequences specified in the text file provided, when processing a sequence database"/>
                 <param name="fasta_search" type="boolean" truevalue="--fasta-search" falsevalue="" checked="false" label="Perform in silico digest of the sequence database" help="Instructs DIA-NN to perform an in silico digest of the sequence database"/>
             </section>
@@ -259,7 +267,7 @@
             <param name="mass_acc" type="float" min="0" optional="true" label="MS2 mass accuracy" help="Sets the MS2 mass accuracy to N ppm"/>
             <param name="mass_acc_cal" type="float" min="0" optional="true" label="Mass accuracy during calibration phase" help="Sets the mass accuracy used during the calibration phase of the search to N ppm"/>
             <param name="mass_acc_ms1" type="float" min="0" optional="true" label="MS1 mass accuracy" help="Sets the MS1 mass accuracy to N ppm"/>
-            <param name="quick_mass_acc" type="boolean" truevalue="--quick-mass-acc" falsevalue="" checked="false" label="(Experimental) Use a fast heuristical algorithm for MS2 mass accuracy" help="When choosing the MS2 mass accuracy setting automatically, DIA-NN will use a fast heuristical algorithm instead of IDs number optimisation"/>
+<!--            <param name="quick_mass_acc" type="boolean" truevalue="&#45;&#45;quick-mass-acc" falsevalue="" checked="false" label="(Experimental) Use a fast heuristical algorithm for MS2 mass accuracy" help="When choosing the MS2 mass accuracy setting automatically, DIA-NN will use a fast heuristical algorithm instead of IDs number optimisation"/>-->
             <!-- ToDo mbr_fix_settings: what does 'Unrelated runs' mean? -->
             <param name="reanalyse" type="boolean" truevalue="--reanalyse" falsevalue="" checked="false" label="Enable MBR" help="Enables MBR"/>
             <param name="mbr_fix_settings" type="boolean" truevalue="--mbr-fix-settings" falsevalue="" checked="false" label="Use the same settings for all runs during the second MBR pass" help="When using the 'Unrelated runs' option in combination with MBR, the same settings will be used to process all runs during the second MBR pass"/>
@@ -286,7 +294,7 @@
             <param name="cross_run_norm" type="select" label="Cross-run normalisation" help="Normalisation strategy. --global-norm/Global: Instructs DIA-NN to use simple global normalisation instead of RT-dependent normalisation. RT-dependent: Instructs DIA-NN to use RT-dependent normalisation. --sig-norm/RT and signal-dep. (experimental): Instructs DIA-NN to use RT and signal dependent normalisation (experimental). --no-norm/Off: disables cross-run normalisation. Normalised quantities reported along with the raw quantities. Default:  RT-dependent">
                 <option value="--global-norm">Global</option>
                 <option value="" selected="True">RT-dependent</option>
-                <option value="--sig-norm">RT and signal-dep. (experimental)</option>
+<!--                <option value="&#45;&#45;sig-norm">RT and signal-dep. (experimental)</option>-->
                 <option value="--no-norm">Off</option>
             </param>
             <param name="lib_gen_strategy" type="select" label="Library generationm strategy" help="Enables an intelligent algorithm which determines how to extract spectra, when creating a spectral library from DIA data. This is highly recommended and should almost always be enabled">
@@ -360,7 +368,7 @@
             <param name="target_fr" type="integer" min="0" optional="true" label="Number of fragment ions for spectral library" help="Fragment ions beyond this number will only be included in the spectral library being created (from DIA data) if they have high-quality chromatograms. Default value is 6"/>
         </section>
         <section name="channel_options" title="Channel">
-            <param name="channels" type="text" optional="true" label="(Experimental) List multiplexing channels" help="Lists multiplexing channels, wherein each channel declaration has the form [channel] = [label group],[channel name],[sites],[mass1:mass2:...], wherein [sites] has the same syntax as for --var-mod and if N sites are listed, N masses are listed at the end of the channel declaration. Examples: '--channels SILAC,L,KR,0:0; SILAC,H,KR,8.014199:10.008269' - declares standard light/heavy SILAC labels, '--channels mTRAQ,0,nK,0:0; mTRAQ,4,nK,4.0070994:4.0070994;mTRAQ,8,nK,8.0141988132:8.0141988132' - declares mTRAQ. The spectral library will be automatically split into multiple channels, for precursors bearing the [label group] modification. To add the latter to a label-free spectral library, can use --lib-fixed-mod, e.g. --fixed-mod SILAC,0.0,KR,label --lib-fixed-mod SILAC. The --channels command must be used in conjunction with --peak-translation."/>
+<!--            <param name="channels" type="text" optional="true" label="(Experimental) List multiplexing channels" help="Lists multiplexing channels, wherein each channel declaration has the form [channel] = [label group],[channel name],[sites],[mass1:mass2:...], wherein [sites] has the same syntax as for &#45;&#45;var-mod and if N sites are listed, N masses are listed at the end of the channel declaration. Examples: '&#45;&#45;channels SILAC,L,KR,0:0; SILAC,H,KR,8.014199:10.008269' - declares standard light/heavy SILAC labels, '&#45;&#45;channels mTRAQ,0,nK,0:0; mTRAQ,4,nK,4.0070994:4.0070994;mTRAQ,8,nK,8.0141988132:8.0141988132' - declares mTRAQ. The spectral library will be automatically split into multiple channels, for precursors bearing the [label group] modification. To add the latter to a label-free spectral library, can use &#45;&#45;lib-fixed-mod, e.g. &#45;&#45;fixed-mod SILAC,0.0,KR,label &#45;&#45;lib-fixed-mod SILAC. The &#45;&#45;channels command must be used in conjunction with &#45;&#45;peak-translation."/>-->
             <param name="decoy_channel" type="text" optional="true" label="Decoy channel" help="Specifies the decoy channel masses (same syntax as for --channels)"/>
             <param name="no_decoy_channel" type="boolean" truevalue="--no-decoy-channel" falsevalue="" checked="false" label="Disable the use of a decoy channel" help="Disables the use of a decoy channel for channel q-value calculation"/>
         </section>
@@ -381,7 +389,7 @@
             <param name="full_unimod" type="boolean" truevalue="--full-unimod" falsevalue="" checked="false" label="Full UniMod modification database" help="Loads the complete UniMod modification database and disables the automatic conversion of modification names to the UniMod format"/>
             <param name="gen_fr_restriction" type="boolean" truevalue="--gen-fr-restriction" falsevalue="" checked="false" label="Generate fragment exclusion information" help="Annotates the library with fragment exclusion information, based on the runs being analysed (fragments least affected by interferences are selected for quantification, why the rest are excluded)"/>
             <param name="global_mass_cal" type="boolean" truevalue="--global-mass-cal" falsevalue="" checked="false" label="Disable RT-dependent mass calibration" help="Disables RT-dependent mass calibration"/>
-            <param name="il_eq" type="boolean" truevalue="--il-eq" falsevalue="" checked="false" label="(Experimental) Isoleucine and leucine equivalent" help="When using the 'Reannotate' function, peptides will be matched to proteins while considering isoleucine and leucine equivalent"/>
+<!--            <param name="il_eq" type="boolean" truevalue="&#45;&#45;il-eq" falsevalue="" checked="false" label="(Experimental) Isoleucine and leucine equivalent" help="When using the 'Reannotate' function, peptides will be matched to proteins while considering isoleucine and leucine equivalent"/>-->
             <!-- ToDo individual_mass_acc: what does automatic mean? -->
             <param name="individual_mass_acc" type="boolean" truevalue="--individual-mass-acc" falsevalue="" checked="false" label="Independent mass accuracies" help="Mass accuracies, if set to automatic, will be determined independently for different runs"/>
             <param name="individual_reports" type="boolean" truevalue="--individual-reports" falsevalue="" checked="false" label="Individual output reports" help="A separate output report will be created for each run"/>
@@ -390,7 +398,7 @@
             <param name="no_isotopes" type="boolean" truevalue="--no-isotopes" falsevalue="" checked="false" label="Do not extract chromatograms for heavy isotopologues" help="Do not extract chromatograms for heavy isotopologues"/>
             <param name="regular_swath" type="boolean" truevalue="--regular-swath" falsevalue="" checked="false" label="Analyse all runs as regular runs" help="All runs will be analysed as if they were not Scanning SWATH runs"/>
             <param name="scanning_swath" type="boolean" truevalue="--scanning-swath" falsevalue="" checked="false" label="Analyse all runs as Scanning SWATH runs" help="All runs will be analysed as if they were Scanning SWATH runs"/>
-            <param name="semi" type="boolean" truevalue="--semi" falsevalue="" checked="false" label="(Experimental) Match a peptide to a protein with one specific and one non-specific cut" help="When using the 'Reannotate' function, a peptide will be matched to a protein also if it could be obtained with one specific and one non-specific cut (at either of the termini)"/>
+<!--            <param name="semi" type="boolean" truevalue="&#45;&#45;semi" falsevalue="" checked="false" label="(Experimental) Match a peptide to a protein with one specific and one non-specific cut" help="When using the 'Reannotate' function, a peptide will be matched to a protein also if it could be obtained with one specific and one non-specific cut (at either of the termini)"/>-->
             <param name="species_genes" type="boolean" truevalue="--species-genes" falsevalue="" checked="false" label="Add the organism identifier to the gene names" help="Instructs DIA-NN to add the organism identifier to the gene names - useful for distinguishing genes from different species, when analysing mixed samples. Works with UniProt sequence databases."/>
             <param name="tims_skip_errors" type="boolean" truevalue="--tims-skip-errors" falsevalue="" checked="false" label="Ignore errors when loading dia-PASEF data" help="DIA-NN will ignore errors when loading dia-PASEF data"/>
         </section>
@@ -426,7 +434,7 @@
         <!-- test for default run -->
         <test expect_num_outputs="4">
             <section name="input">
-                <param name="f" value="small-peakpicking-cwt-allMS.mzML" />
+                <param name="f" value="small-peakpicking-cwt-allMS" />
                 <section name="spectral_lib_options">
                     <param name="gen_spec_lib" value="True"/>
                     <param name="predictor" value="True"/>
@@ -452,7 +460,25 @@
         <!-- test for multiple fastas -->
         <test expect_num_outputs="1">
             <section name="input">
-                <param name="f" value="small-peakpicking-cwt-allMS.mzML" />
+                <param name="f" value="small-peakpicking-cwt-allMS" />
+                <section name="fasta_db_options">
+                    <param name="fasta" value="bsa.fasta,bsa2.fasta"/>
+                    <param name="fasta_search" value="True"/>
+                </section>
+            </section>
+            <output name="output_report" file="report.tsv">
+                <assert_contents>
+                    <has_text text="PG.Normalised"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- test for spec library -->
+        <test expect_num_outputs="1">
+            <section name="input">
+                <param name="f" value="small-peakpicking-cwt-allMS" />
+                <section name="spectral_lib_options">
+                    <param name="lib" value="report-lib.predicted.speclib"/>
+                </section>
                 <section name="fasta_db_options">
                     <param name="fasta" value="bsa.fasta,bsa2.fasta"/>
                     <param name="fasta_search" value="True"/>
@@ -465,29 +491,29 @@
             </output>
         </test>
         <!-- test for Bruker data -->
-<!--        <test expect_num_outputs="2">-->
-<!--            <section name="input">-->
-<!--                <param name="f" value="ThyroglobMRM000003.d.tar" />-->
-<!--                <section name="spectral_lib_options">-->
-<!--                    <param name="gen_spec_lib" value="True"/>-->
-<!--                    <param name="predictor" value="True"/>-->
-<!--                </section>-->
-<!--                <section name="fasta_db_options">-->
-<!--                    <param name="fasta" value="bsa.fasta"/>-->
-<!--                    <param name="fasta_search" value="True"/>-->
-<!--                </section>-->
-<!--            </section>-->
-<!--            <output name="output_report" file="bruker-report.tsv">-->
-<!--                <assert_contents>-->
-<!--                    <has_text text="PG.Normalised"/>-->
-<!--                </assert_contents>-->
-<!--            </output>-->
-<!--            <output name="output_report_lib" file="bruker-report-lib.tsv">-->
-<!--                <assert_contents>-->
-<!--                    <has_text text="PrecursorMz"/>-->
-<!--                </assert_contents>-->
-<!--            </output>-->
-<!--        </test>-->
+       <!-- <test expect_num_outputs="2">
+           <section name="input">
+               <param name="f" value="ThyroglobMRM000003.d.tar" />
+               <section name="spectral_lib_options">
+                   <param name="gen_spec_lib" value="True"/>
+                   <param name="predictor" value="True"/>
+               </section>
+               <section name="fasta_db_options">
+                   <param name="fasta" value="bsa.fasta"/>
+                   <param name="fasta_search" value="True"/>
+               </section>
+           </section>
+           <output name="output_report" file="bruker-report.tsv">
+               <assert_contents>
+                   <has_text text="PG.Normalised"/>
+               </assert_contents>
+           </output>
+           <output name="output_report_lib" file="bruker-report-lib.tsv">
+               <assert_contents>
+                   <has_text text="PrecursorMz"/>
+               </assert_contents>
+           </output>
+        </test> -->
     </tests>
     <help>
         <![CDATA[
@@ -517,7 +543,7 @@
 
             *Spectral library formats*
 
-            DIA-NN supports comma-separated (.csv) or tab-separated (.tsv, .xls or .txt), .speclib (compact format used by DIA-NN), .sptxt (SpectraST, experimental) and .msp (NIST, experimental) library files. Important: the library must not contain non-fragmented precursor ions as 'fragments': each fragment ion must actually be produced by the peptide backbone fragmentation.
+            DIA-NN supports comma-separated (.csv) or tab-separated (.tsv, .xls or .txt), .speclib (compact format used by DIA-NN), .sptxt (SpectraST, experimental [not in Galaxy]) and .msp (NIST, experimental [not in Galaxy]) library files. Important: the library must not contain non-fragmented precursor ions as 'fragments': each fragment ion must actually be produced by the peptide backbone fragmentation.
 
             *Library-free search*
author	galaxyp
date	Mon, 31 Mar 2025 19:30:22 +0000
parents	c9228a392c74
children	a8f461b7d193