Mercurial > repos > bgruening > flexynesis

diff flexynesis.xml @ 5:466b593fd87e draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 1afbaf45449e25238935e222f983da62392c067a
author: bgruening
date: Fri, 04 Jul 2025 14:57:40 +0000
parents: 1a5f8cedda43
children: 33816f44fc7d
--- a/flexynesis.xml	Mon Jun 30 17:19:10 2025 +0000
+++ b/flexynesis.xml	Fri Jul 04 14:57:40 2025 +0000
@@ -11,17 +11,26 @@
     <command detect_errors="exit_code"><![CDATA[
         @CHECK_NON_COMMERCIAL_USE@
         mkdir -p input/test input/train output &&
-        ln -s '$train_clin' input/train/clin.csv &&
-        ln -s '$test_clin' input/test/clin.csv &&
+        ln -s '$train_clin' input/train/clin.tabular &&
+        python $__tool_directory__/convert.py input/train/clin.tabular input/train/clin.csv &&
+
+        ln -s '$test_clin' input/test/clin.tabular &&
+        python $__tool_directory__/convert.py input/test/clin.tabular input/test/clin.csv &&
 
         #if str($assay_main) != '':
             #set $name = str($assay_main.replace(" ", "_"))
-            ln -s '$train_omics_main' input/train/${name}.csv &&
-            ln -s '$test_omics_main' input/test/${name}.csv &&
+            ln -s '$train_omics_main' input/train/${name}.tabular &&
+            python '$__tool_directory__/convert.py' input/train/${name}.tabular input/train/${name}.csv &&
+
+            ln -s '$test_omics_main' input/test/${name}.tabular &&
+            python '$__tool_directory__/convert.py' input/test/${name}.tabular input/test/${name}.csv &&
+
             #set $data_names = [$name]
         #else
-            ln -s '$train_omics_main' input/train/main.csv &&
-            ln -s '$test_omics_main' input/test/main.csv &&
+            ln -s '$train_omics_main' input/train/main.tabular &&
+            python '$__tool_directory__/convert.py' input/train/main.tabular input/train/main.csv &&
+            ln -s '$test_omics_main' input/test/main.tabular &&
+            python '$__tool_directory__/convert.py' input/test/main.tabular input/test/main.csv &&
             #set $data_names = ['main']
         #end if
         #if str($training_type.model) == 'cm_train':
@@ -38,8 +47,12 @@
                 #if str($element.assay) != '':
                     #set $i = str($element.assay.replace(" ", "_"))
                 #end if
-                ln -s '${element.train_omics}' input/train/omics_${i}.csv &&
-                ln -s '${element.test_omics}' input/test/omics_${i}.csv &&
+                ln -s '${element.train_omics}' input/train/omics_${i}.tabular &&
+                python '$__tool_directory__/convert.py' input/train/omics_${i}.tabular input/train/omics_${i}.csv &&
+
+                ln -s '${element.test_omics}' input/test/omics_${i}.tabular &&
+                python '$__tool_directory__/convert.py' input/test/omics_${i}.tabular input/test/omics_${i}.csv &&
+
                 $data_names.append("omics_" + str($i))
                 #if str($training_type.model) == 'cm_train':
                     #if str($element.layer) == 'input':
@@ -50,6 +63,40 @@
                 #end if
             #end if
         #end for
+        ## set target variables
+        #if str($training_type.model) == 's_train':
+            #if len($targets) > 0:
+                target_variables="" &&
+                #for $i, $element in enumerate($targets)
+                    target_name=`$__tool_directory__/index_to_name.py input/train/clin.csv $element.target_variables` &&
+                    #if $i == 0:
+                        target_variables="\$target_name" &&
+                    #else:
+                        target_variables="\$target_variables,\$target_name" &&
+                    #end if
+                #end for
+                echo "Target variables: \$target_variables" &&
+            #end if
+            ## set survival variables
+            #if str($surv_event_var) != 'None' and str($surv_time_var) != 'None':
+                surv_event_var=`$__tool_directory__/index_to_name.py input/train/clin.csv $surv_event_var` &&
+                echo "Survival event variable: \$surv_event_var" &&
+                surv_time_var=`$__tool_directory__/index_to_name.py input/train/clin.csv $surv_time_var` &&
+                echo "Survival time variable: \$surv_time_var" &&
+            #end if
+            ## set target value for plots
+            #if str($surv_event_var) != 'None' and str($surv_time_var) != 'None' and len($targets) > 0:
+                target_value="\$target_variables,\$surv_event_var" &&
+                echo "Target value for plots: \$target_value" &&
+            #else if str($surv_event_var) == 'None' and str($surv_time_var) == 'None' and len($targets) > 0:
+                target_value="\$target_variables" &&
+                echo "Target value for plots: \$target_value" &&
+            #else if str($surv_event_var) != 'None' and str($surv_time_var) != 'None' and len($targets) == 0:
+                target_value="\$surv_event_var" &&
+                echo "Target value for plots: \$target_value" &&
+            #end if
+        #end if
+
         flexynesis
             --data_path input
             --outdir output
@@ -60,12 +107,12 @@
                 --string_node_name $string_node_name
             #end if
             #if str($training_type.model) == 's_train':
-                #if str($target_variables) != '':
-                    --target_variables $target_variables
+                #if len($targets) > 0:
+                    --target_variables \$target_variables
                 #end if
-                #if str($surv_event_var) != '':
-                    --surv_event_var $surv_event_var
-                    --surv_time_var $surv_time_var
+                #if str($surv_event_var) != 'None' and str($surv_time_var) != 'None':
+                    --surv_event_var \$surv_event_var
+                    --surv_time_var \$surv_time_var
                 #end if
             #end if
             #if str($training_type.model) == 'cm_train':
@@ -90,13 +137,26 @@
             $evaluate_baseline_performance
             --feature_importance_method $feature_importance_method
             \${GALAXY_FLEXYNESIS_EXTRA_ARGUMENTS}
+
+        ## convert flexynesis output files to tabular format
+        && for file in output/*.csv; do
+            if [ -f "\$file" ]; then
+                basename="\${file%.csv}";
+                python $__tool_directory__/convert.py "\$file" "\${basename}.tabular";
+                echo "Converted \$file to \${basename}.tabular";
+            fi
+        done
         #if str($plot.plot_embeddings_conditional.plot_embeddings) == 'yes':
                 && python $__tool_directory__/flexynesis_plot.py
                     --plot_type dimred
-                    --embeddings output/job.embeddings_test.csv
-                    --labels output/job.predicted_labels.csv
+                    --embeddings output/job.embeddings_test.tabular
+                    --labels output/job.predicted_labels.tabular
                     --method $plot.plot_embeddings_conditional.method
-                    --target_variables $target_variables
+                    #if str($training_type.model) == 's_train':
+                        #if len($targets) > 0 or (str($surv_event_var) != 'None' and str($surv_time_var) != 'None'):
+                            --target_value \$target_value
+                        #end if
+                    #end if
                     --output_dir plots
                     --format $plot.plot_embeddings_conditional.format
                     --dpi $plot.plot_embeddings_conditional.dpi
@@ -108,10 +168,10 @@
             #else
                 && python $__tool_directory__/flexynesis_plot.py
                     --plot_type kaplan_meier
-                    --labels output/job.predicted_labels.csv
+                    --labels output/job.predicted_labels.tabular
                     --survival_data input/test/clin.csv
-                    --surv_event_var $surv_event_var
-                    --surv_time_var $surv_time_var
+                    --surv_event_var \$surv_event_var
+                    --surv_time_var \$surv_time_var
                     --event_value $plot.plot_km_conditional.event_value
                     --output_dir plots
                     --format $plot.plot_km_conditional.format
@@ -136,8 +196,8 @@
                         --omics_train input/train/main.csv
                         --omics_test input/test/main.csv
                     #end if
-                    --surv_time_var $surv_time_var
-                    --surv_event_var $surv_event_var
+                    --surv_time_var \$surv_time_var
+                    --surv_event_var \$surv_event_var
                     #if str($plot.plot_cox_conditional.clinical_variables) != '':
                         --clinical_variables $str($plot.plot_cox_conditional.clinical_variables)
                     #end if
@@ -158,11 +218,11 @@
         #if str($plot.plot_scatter_conditional.plot_scatter) == 'yes':
             && python $__tool_directory__/flexynesis_plot.py
                 --plot_type scatter
-                --labels output/job.predicted_labels.csv
-                #if str($surv_event_var) != '':
-                    --target_value $target_variables,$surv_event_var
-                #else
-                    --target_value $target_variables
+                --labels output/job.predicted_labels.tabular
+                #if str($training_type.model) == 's_train':
+                    #if len($targets) > 0 or (str($surv_event_var) != 'None' and str($surv_time_var) != 'None'):
+                        --target_value \$target_value
+                    #end if
                 #end if
                 --output_dir plots
                 --format $plot.plot_scatter_conditional.format
@@ -171,11 +231,11 @@
         #if str($plot.plot_concordance_conditional.plot_concordance) == 'yes':
             && python $__tool_directory__/flexynesis_plot.py
                 --plot_type concordance_heatmap
-                --labels output/job.predicted_labels.csv
-                #if str($surv_event_var) != '':
-                    --target_value $target_variables,$surv_event_var
-                #else
-                    --target_value $target_variables
+                --labels output/job.predicted_labels.tabular
+                #if str($training_type.model) == 's_train':
+                    #if len($targets) > 0 or (str($surv_event_var) != 'None' and str($surv_time_var) != 'None'):
+                        --target_value \$target_value
+                    #end if
                 #end if
                 --output_dir plots
                 --format $plot.plot_concordance_conditional.format
@@ -184,11 +244,11 @@
         #if str($plot.plot_pr_curves_conditional.plot_pr_curves) == 'yes':
             && python $__tool_directory__/flexynesis_plot.py
                 --plot_type pr_curve
-                --labels output/job.predicted_labels.csv
-                #if str($surv_event_var) != '':
-                    --target_value $target_variables,$surv_event_var
-                #else
-                    --target_value $target_variables
+                --labels output/job.predicted_labels.tabular
+                #if str($training_type.model) == 's_train':
+                    #if len($targets) > 0 or (str($surv_event_var) != 'None' and str($surv_time_var) != 'None'):
+                        --target_value \$target_value
+                    #end if
                 #end if
                 --output_dir plots
                 --format $plot.plot_pr_curves_conditional.format
@@ -197,11 +257,11 @@
         #if str($plot.plot_roc_curves_conditional.plot_roc_curves) == 'yes':
             && python $__tool_directory__/flexynesis_plot.py
                 --plot_type roc_curve
-                --labels output/job.predicted_labels.csv
-                #if str($surv_event_var) != '':
-                    --target_value $target_variables,$surv_event_var
-                #else
-                    --target_value $target_variables
+                --labels output/job.predicted_labels.tabular
+                #if str($training_type.model) == 's_train':
+                    #if len($targets) > 0 or (str($surv_event_var) != 'None' and str($surv_time_var) != 'None'):
+                        --target_value \$target_value
+                    #end if
                 #end if
                 --output_dir plots
                 --format $plot.plot_roc_curves_conditional.format
@@ -210,11 +270,11 @@
         #if str($plot.plot_boxplot_conditional.plot_boxplot) == 'yes':
             && python $__tool_directory__/flexynesis_plot.py
                 --plot_type box_plot
-                --labels output/job.predicted_labels.csv
-                #if str($surv_event_var) != '':
-                    --target_value $target_variables,$surv_event_var
-                #else
-                    --target_value $target_variables
+                --labels output/job.predicted_labels.tabular
+                #if str($training_type.model) == 's_train':
+                    #if len($targets) > 0 or (str($surv_event_var) != 'None' and str($surv_time_var) != 'None'):
+                        --target_value \$target_value
+                    #end if
                 #end if
                 --output_dir plots
                 --format $plot.plot_boxplot_conditional.format
@@ -268,15 +328,11 @@
                     <when value="RandomSurvivalForest"/>
                     <when value="XGBoost"/>
                 </conditional>
-                <param argument="--target_variables" type="text" label="Target variables" help="Which variables in 'clin.csv' to use for predictions, comma-separated if multiple.">
-                    <expand macro="sanitizer_printable"/>
-                </param>
-                <param argument="--surv_event_var" type="text" label="Survival event" help="Which column in 'clin.csv' to use as event/status indicator for survival modeling.">
-                    <expand macro="sanitizer_printable"/>
-                </param>
-                <param argument="--surv_time_var" type="text" label="Survival time" help="Which column in 'clin.csv' to use as time/duration indicator for survival modeling.">
-                    <expand macro="sanitizer_printable"/>
-                </param>
+                <repeat name="targets" min="0" title="Target variables">
+                    <param argument="--target_variables" type="data_column" data_ref="train_clin" optional="false" label="Column name in the train clinical data to use for predictions, multiple targets are allowed"/>
+                </repeat>
+                <param argument="--surv_event_var" type="data_column" data_ref="train_clin" optional="true" label="Column name in the train clinical data to use as survival event"/>
+                <param argument="--surv_time_var" type="data_column" data_ref="train_clin" optional="true" label="Column name in the train clinical data to use as survival time"/>
                 <expand macro="advanced"/>
             </when>
             <when value="us_train">
@@ -428,7 +484,7 @@
     </inputs>
     <outputs>
         <collection name="results" type="list" label="${tool.name} on ${on_string}: results">
-            <discover_datasets pattern="(?P&lt;name&gt;.+)\.csv$" format="csv" directory="output"/>
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.tabular$" format="tabular" directory="output"/>
         </collection>
         <collection name="plots" type="list" label="${tool.name} on ${on_string}: embeddings plots">
             <discover_datasets pattern="__name_and_ext__" directory="plots"/>
@@ -441,20 +497,22 @@
             <param name="non_commercial_use" value="True"/>
             <conditional name="training_type">
                 <param name="model" value="s_train"/>
-                <param name="train_clin" value="train/clin" ftype="csv"/>
-                <param name="test_clin" value="test/clin" ftype="csv"/>
-                <param name="train_omics_main" value="train/gex" ftype="csv"/>
-                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="train_clin" value="train/clin" ftype="tabular"/>
+                <param name="test_clin" value="test/clin" ftype="tabular"/>
+                <param name="train_omics_main" value="train/gex" ftype="tabular"/>
+                <param name="test_omics_main" value="test/gex" ftype="tabular"/>
                 <param name="assay_main" value="bar"/>
                 <repeat name="omics">
-                    <param name="train_omics" value="train/cnv" ftype="csv"/>
-                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="train_omics" value="train/cnv" ftype="tabular"/>
+                    <param name="test_omics" value="test/cnv" ftype="tabular"/>
                     <param name="assay" value="foo"/>
                 </repeat>
                 <conditional name="model_class">
                     <param name="model_class" value="DirectPred"/>
                 </conditional>
-                <param name="target_variables" value="Erlotinib"/>
+                <repeat name="targets">
+                    <param name="target_variables" value="3"/>
+                </repeat>
                 <section name="advanced">
                     <param name="hpo_iter" value="1"/>
                 </section>
@@ -472,15 +530,15 @@
                 </element>
                 <element name="job.feature_importance.GradientShap">
                     <assert_contents>
-                        <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
-                        <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
                         <has_text_matching expression="GradientShap"/>
                     </assert_contents>
                 </element>
                 <element name="job.feature_importance.IntegratedGradients">
                     <assert_contents>
-                        <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
-                        <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
                         <has_text_matching expression="IntegratedGradients"/>
                     </assert_contents>
                 </element>
@@ -496,15 +554,15 @@
                 </element>
                 <element name="job.predicted_labels">
                     <assert_contents>
-                        <has_text_matching expression="source_dataset:A-704,Erlotinib,"/>
-                        <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/>
+                        <has_text_matching expression="source_dataset:A-704\tErlotinib\t"/>
+                        <has_text_matching expression="target_dataset:KMRC-20\tErlotinib\t"/>
                     </assert_contents>
                 </element>
                 <element name="job.stats">
                     <assert_contents>
-                        <has_text_matching expression="DirectPred,Erlotinib,numerical,mse,"/>
-                        <has_text_matching expression="DirectPred,Erlotinib,numerical,r2,"/>
-                        <has_text_matching expression="DirectPred,Erlotinib,numerical,pearson_corr,"/>
+                        <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tmse\t"/>
+                        <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tr2\t"/>
+                        <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tpearson_corr\t"/>
                     </assert_contents>
                 </element>
             </output_collection>
@@ -514,15 +572,17 @@
             <param name="non_commercial_use" value="True"/>
             <conditional name="training_type">
                 <param name="model" value="s_train"/>
-                <param name="train_clin" value="train/clin" ftype="csv"/>
-                <param name="test_clin" value="test/clin" ftype="csv"/>
-                <param name="train_omics_main" value="train/gex" ftype="csv"/>
-                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="train_clin" value="train/clin" ftype="tabular"/>
+                <param name="test_clin" value="test/clin" ftype="tabular"/>
+                <param name="train_omics_main" value="train/gex" ftype="tabular"/>
+                <param name="test_omics_main" value="test/gex" ftype="tabular"/>
                 <param name="assay_main" value="bar"/>
                 <conditional name="model_class">
                     <param name="model_class" value="DirectPred"/>
                 </conditional>
-                <param name="target_variables" value="Erlotinib"/>
+                <repeat name="targets">
+                    <param name="target_variables" value="3"/>
+                </repeat>
                 <section name="advanced">
                     <param name="hpo_iter" value="1"/>
                 </section>
@@ -540,15 +600,15 @@
                 </element>
                 <element name="job.feature_importance.GradientShap">
                     <assert_contents>
-                        <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
-                        <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
                         <has_text_matching expression="GradientShap"/>
                     </assert_contents>
                 </element>
                 <element name="job.feature_importance.IntegratedGradients">
                     <assert_contents>
-                        <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
-                        <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
                         <has_text_matching expression="IntegratedGradients"/>
                     </assert_contents>
                 </element>
@@ -559,15 +619,15 @@
                 </element>
                 <element name="job.predicted_labels">
                     <assert_contents>
-                        <has_text_matching expression="source_dataset:A-704,Erlotinib,"/>
-                        <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/>
+                        <has_text_matching expression="source_dataset:A-704\tErlotinib\t"/>
+                        <has_text_matching expression="target_dataset:KMRC-20\tErlotinib\t"/>
                     </assert_contents>
                 </element>
                 <element name="job.stats">
                     <assert_contents>
-                        <has_text_matching expression="DirectPred,Erlotinib,numerical,mse,"/>
-                        <has_text_matching expression="DirectPred,Erlotinib,numerical,r2,"/>
-                        <has_text_matching expression="DirectPred,Erlotinib,numerical,pearson_corr,"/>
+                        <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tmse\t"/>
+                        <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tr2\t"/>
+                        <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tpearson_corr\t"/>
                     </assert_contents>
                 </element>
             </output_collection>
@@ -577,20 +637,22 @@
             <param name="non_commercial_use" value="True"/>
             <conditional name="training_type">
                 <param name="model" value="s_train"/>
-                <param name="train_clin" value="train/clin" ftype="csv"/>
-                <param name="test_clin" value="test/clin" ftype="csv"/>
-                <param name="train_omics_main" value="train/gex" ftype="csv"/>
-                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="train_clin" value="train/clin" ftype="tabular"/>
+                <param name="test_clin" value="test/clin" ftype="tabular"/>
+                <param name="train_omics_main" value="train/gex" ftype="tabular"/>
+                <param name="test_omics_main" value="test/gex" ftype="tabular"/>
                 <param name="assay_main" value="bar"/>
                 <repeat name="omics">
-                    <param name="train_omics" value="train/cnv" ftype="csv"/>
-                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="train_omics" value="train/cnv" ftype="tabular"/>
+                    <param name="test_omics" value="test/cnv" ftype="tabular"/>
                     <param name="assay" value="foo"/>
                 </repeat>
                 <conditional name="model_class">
                     <param name="model_class" value="DirectPred"/>
                 </conditional>
-                <param name="target_variables" value="Irinotecan"/>
+                <repeat name="targets">
+                    <param name="target_variables" value="4"/>
+                </repeat>
                 <section name="advanced">
                     <param name="hpo_iter" value="1"/>
                 </section>
@@ -608,15 +670,15 @@
                 </element>
                 <element name="job.feature_importance.GradientShap">
                     <assert_contents>
-                        <has_text_matching expression="Irinotecan,0,,bar,A2M,"/>
-                        <has_text_matching expression="Irinotecan,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="Irinotecan\t0\t\tbar\tA2M\t"/>
+                        <has_text_matching expression="Irinotecan\t0\t\tbar\tABCC4\t"/>
                         <has_text_matching expression="GradientShap"/>
                     </assert_contents>
                 </element>
                 <element name="job.feature_importance.IntegratedGradients">
                     <assert_contents>
-                        <has_text_matching expression="Irinotecan,0,,bar,A2M,"/>
-                        <has_text_matching expression="Irinotecan,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="Irinotecan\t0\t\tbar\tA2M\t"/>
+                        <has_text_matching expression="Irinotecan\t0\t\tbar\tABCC4\t"/>
                         <has_text_matching expression="IntegratedGradients"/>
                     </assert_contents>
                 </element>
@@ -632,15 +694,15 @@
                 </element>
                 <element name="job.predicted_labels">
                     <assert_contents>
-                        <has_text_matching expression="source_dataset:A-704,Irinotecan,"/>
-                        <has_text_matching expression="target_dataset:KMRC-20,Irinotecan,"/>
+                        <has_text_matching expression="source_dataset:A-704\tIrinotecan\t"/>
+                        <has_text_matching expression="target_dataset:KMRC-20\tIrinotecan\t"/>
                     </assert_contents>
                 </element>
                 <element name="job.stats">
                     <assert_contents>
-                        <has_text_matching expression="DirectPred,Irinotecan,numerical,mse,"/>
-                        <has_text_matching expression="DirectPred,Irinotecan,numerical,r2,"/>
-                        <has_text_matching expression="DirectPred,Irinotecan,numerical,pearson_corr,"/>
+                        <has_text_matching expression="DirectPred\tIrinotecan\tnumerical\tmse\t"/>
+                        <has_text_matching expression="DirectPred\tIrinotecan\tnumerical\tr2\t"/>
+                        <has_text_matching expression="DirectPred\tIrinotecan\tnumerical\tpearson_corr\t"/>
                     </assert_contents>
                 </element>
             </output_collection>
@@ -650,14 +712,14 @@
             <param name="non_commercial_use" value="True"/>
             <conditional name="training_type">
                 <param name="model" value="us_train"/>
-                <param name="train_clin" value="train/clin" ftype="csv"/>
-                <param name="test_clin" value="test/clin" ftype="csv"/>
-                <param name="train_omics_main" value="train/gex" ftype="csv"/>
-                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="train_clin" value="train/clin" ftype="tabular"/>
+                <param name="test_clin" value="test/clin" ftype="tabular"/>
+                <param name="train_omics_main" value="train/gex" ftype="tabular"/>
+                <param name="test_omics_main" value="test/gex" ftype="tabular"/>
                 <param name="assay_main" value="bar"/>
                 <repeat name="omics">
-                    <param name="train_omics" value="train/cnv" ftype="csv"/>
-                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="train_omics" value="train/cnv" ftype="tabular"/>
+                    <param name="test_omics" value="test/cnv" ftype="tabular"/>
                     <param name="assay" value="foo"/>
                 </repeat>
                 <param name="model_class" value="supervised_vae"/>
@@ -693,15 +755,15 @@
             <param name="non_commercial_use" value="True"/>
             <conditional name="training_type">
                 <param name="model" value="cm_train"/>
-                <param name="train_clin" value="train/clin" ftype="csv"/>
-                <param name="test_clin" value="test/clin" ftype="csv"/>
-                <param name="train_omics_main" value="train/gex" ftype="csv"/>
-                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="train_clin" value="train/clin" ftype="tabular"/>
+                <param name="test_clin" value="test/clin" ftype="tabular"/>
+                <param name="train_omics_main" value="train/gex" ftype="tabular"/>
+                <param name="test_omics_main" value="test/gex" ftype="tabular"/>
                 <param name="assay_main" value="bar"/>
                 <param name="layer_main" value="input"/>
                 <repeat name="omics">
-                    <param name="train_omics" value="train/cnv" ftype="csv"/>
-                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="train_omics" value="train/cnv" ftype="tabular"/>
+                    <param name="test_omics" value="test/cnv" ftype="tabular"/>
                     <param name="assay" value="foo"/>
                     <param name="layer" value="output"/>
                 </repeat>
@@ -748,14 +810,14 @@
             <param name="non_commercial_use" value="True"/>
             <conditional name="training_type">
                 <param name="model" value="s_train"/>
-                <param name="train_clin" value="train/clin" ftype="csv"/>
-                <param name="test_clin" value="test/clin" ftype="csv"/>
-                <param name="train_omics_main" value="train/gex" ftype="csv"/>
-                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="train_clin" value="train/clin" ftype="tabular"/>
+                <param name="test_clin" value="test/clin" ftype="tabular"/>
+                <param name="train_omics_main" value="train/gex" ftype="tabular"/>
+                <param name="test_omics_main" value="test/gex" ftype="tabular"/>
                 <param name="assay_main" value="bar"/>
                 <repeat name="omics">
-                    <param name="train_omics" value="train/cnv" ftype="csv"/>
-                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="train_omics" value="train/cnv" ftype="tabular"/>
+                    <param name="test_omics" value="test/cnv" ftype="tabular"/>
                     <param name="assay" value="foo"/>
                 </repeat>
                 <conditional name="model_class">
@@ -764,7 +826,9 @@
                     <param name="string_organism" value="9606"/>
                     <param name="string_node_name" value="gene_name"/>
                 </conditional>
-                <param name="target_variables" value="Erlotinib"/>
+                <repeat name="targets">
+                    <param name="target_variables" value="3"/>
+                </repeat>
                 <section name="advanced">
                     <param name="hpo_iter" value="1"/>
                 </section>
@@ -782,15 +846,15 @@
                 </element>
                 <element name="job.feature_importance.GradientShap">
                     <assert_contents>
-                        <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
-                        <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
                         <has_text_matching expression="GradientShap"/>
                     </assert_contents>
                 </element>
                 <element name="job.feature_importance.IntegratedGradients">
                     <assert_contents>
-                        <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
-                        <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
                         <has_text_matching expression="IntegratedGradients"/>
                     </assert_contents>
                 </element>
@@ -806,15 +870,15 @@
                 </element>
                 <element name="job.predicted_labels">
                     <assert_contents>
-                        <has_text_matching expression="source_dataset:A-704,Erlotinib,"/>
-                        <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/>
+                        <has_text_matching expression="source_dataset:A-704\tErlotinib\t"/>
+                        <has_text_matching expression="target_dataset:KMRC-20\tErlotinib\t"/>
                     </assert_contents>
                 </element>
                 <element name="job.stats">
                     <assert_contents>
-                        <has_text_matching expression="GNN,Erlotinib,numerical,mse,"/>
-                        <has_text_matching expression="GNN,Erlotinib,numerical,r2,"/>
-                        <has_text_matching expression="GNN,Erlotinib,numerical,pearson_corr,"/>
+                        <has_text_matching expression="GNN\tErlotinib\tnumerical\tmse\t"/>
+                        <has_text_matching expression="GNN\tErlotinib\tnumerical\tr2\t"/>
+                        <has_text_matching expression="GNN\tErlotinib\tnumerical\tpearson_corr\t"/>
                     </assert_contents>
                 </element>
             </output_collection>
@@ -824,14 +888,14 @@
             <param name="non_commercial_use" value="True"/>
             <conditional name="training_type">
                 <param name="model" value="us_train"/>
-                <param name="train_clin" value="train/clin" ftype="csv"/>
-                <param name="test_clin" value="test/clin" ftype="csv"/>
-                <param name="train_omics_main" value="train/gex" ftype="csv"/>
-                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="train_clin" value="train/clin" ftype="tabular"/>
+                <param name="test_clin" value="test/clin" ftype="tabular"/>
+                <param name="train_omics_main" value="train/gex" ftype="tabular"/>
+                <param name="test_omics_main" value="test/gex" ftype="tabular"/>
                 <param name="assay_main" value="b ar"/>
                 <repeat name="omics">
-                    <param name="train_omics" value="train/cnv" ftype="csv"/>
-                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="train_omics" value="train/cnv" ftype="tabular"/>
+                    <param name="test_omics" value="test/cnv" ftype="tabular"/>
                     <param name="assay" value="f oo"/>
                 </repeat>
                 <param name="model_class" value="supervised_vae"/>
@@ -867,20 +931,22 @@
             <param name="non_commercial_use" value="True"/>
             <conditional name="training_type">
                 <param name="model" value="s_train"/>
-                <param name="train_clin" value="train/clin" ftype="csv"/>
-                <param name="test_clin" value="test/clin" ftype="csv"/>
-                <param name="train_omics_main" value="train/gex" ftype="csv"/>
-                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="train_clin" value="train/clin" ftype="tabular"/>
+                <param name="test_clin" value="test/clin" ftype="tabular"/>
+                <param name="train_omics_main" value="train/gex" ftype="tabular"/>
+                <param name="test_omics_main" value="test/gex" ftype="tabular"/>
                 <param name="assay_main" value="bar"/>
                 <repeat name="omics">
-                    <param name="train_omics" value="train/cnv" ftype="csv"/>
-                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="train_omics" value="train/cnv" ftype="tabular"/>
+                    <param name="test_omics" value="test/cnv" ftype="tabular"/>
                     <param name="assay" value="foo"/>
                 </repeat>
                 <conditional name="model_class">
                     <param name="model_class" value="XGBoost"/>
                 </conditional>
-                <param name="target_variables" value="Erlotinib"/>
+                <repeat name="targets">
+                    <param name="target_variables" value="3"/>
+                </repeat>
                 <section name="advanced">
                     <param name="hpo_iter" value="1"/>
                 </section>
@@ -888,9 +954,9 @@
             <output_collection name="results" type="list">
                 <element name="job.stats">
                     <assert_contents>
-                        <has_text_matching expression="XGBoostRegressor,Erlotinib,numerical,mse,"/>
-                        <has_text_matching expression="XGBoostRegressor,Erlotinib,numerical,r2,"/>
-                        <has_text_matching expression="XGBoostRegressor,Erlotinib,numerical,pearson_corr,"/>
+                        <has_text_matching expression="XGBoostRegressor\tErlotinib\tnumerical\tmse\t"/>
+                        <has_text_matching expression="XGBoostRegressor\tErlotinib\tnumerical\tr2\t"/>
+                        <has_text_matching expression="XGBoostRegressor\tErlotinib\tnumerical\tpearson_corr\t"/>
                     </assert_contents>
                 </element>
             </output_collection>
@@ -900,20 +966,22 @@
             <param name="non_commercial_use" value="True"/>
             <conditional name="training_type">
                 <param name="model" value="s_train"/>
-                <param name="train_clin" value="train/clin" ftype="csv"/>
-                <param name="test_clin" value="test/clin" ftype="csv"/>
-                <param name="train_omics_main" value="train/gex" ftype="csv"/>
-                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="train_clin" value="train/clin" ftype="tabular"/>
+                <param name="test_clin" value="test/clin" ftype="tabular"/>
+                <param name="train_omics_main" value="train/gex" ftype="tabular"/>
+                <param name="test_omics_main" value="test/gex" ftype="tabular"/>
                 <param name="assay_main" value="bar"/>
                 <repeat name="omics">
-                    <param name="train_omics" value="train/cnv" ftype="csv"/>
-                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="train_omics" value="train/cnv" ftype="tabular"/>
+                    <param name="test_omics" value="test/cnv" ftype="tabular"/>
                     <param name="assay" value="foo"/>
                 </repeat>
                 <conditional name="model_class">
                     <param name="model_class" value="DirectPred"/>
                 </conditional>
-                <param name="target_variables" value="Erlotinib"/>
+                <repeat name="targets">
+                    <param name="target_variables" value="3"/>
+                </repeat>
                 <section name="advanced">
                     <param name="hpo_iter" value="1"/>
                     <param name="feature_importance_method" value="IntegratedGradients"/>
@@ -933,8 +1001,8 @@
                 </element>
                 <element name="job.feature_importance.IntegratedGradients">
                     <assert_contents>
-                        <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
-                        <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
                     </assert_contents>
                 </element>
                 <element name="job.feature_logs.bar">
@@ -949,15 +1017,15 @@
                 </element>
                 <element name="job.predicted_labels">
                     <assert_contents>
-                        <has_text_matching expression="source_dataset:A-704,Erlotinib,"/>
-                        <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/>
+                        <has_text_matching expression="source_dataset:A-704\tErlotinib\t"/>
+                        <has_text_matching expression="target_dataset:KMRC-20\tErlotinib\t"/>
                     </assert_contents>
                 </element>
                 <element name="job.stats">
                     <assert_contents>
-                        <has_text_matching expression="DirectPred,Erlotinib,numerical,mse,"/>
-                        <has_text_matching expression="DirectPred,Erlotinib,numerical,r2,"/>
-                        <has_text_matching expression="DirectPred,Erlotinib,numerical,pearson_corr,"/>
+                        <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tmse\t"/>
+                        <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tr2\t"/>
+                        <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tpearson_corr\t"/>
                     </assert_contents>
                 </element>
             </output_collection>
@@ -967,20 +1035,22 @@
             <param name="non_commercial_use" value="True"/>
             <conditional name="training_type">
                 <param name="model" value="s_train"/>
-                <param name="train_clin" value="train/clin" ftype="csv"/>
-                <param name="test_clin" value="test/clin" ftype="csv"/>
-                <param name="train_omics_main" value="train/gex" ftype="csv"/>
-                <param name="test_omics_main" value="test/gex" ftype="csv"/>
+                <param name="train_clin" value="train/clin" ftype="tabular"/>
+                <param name="test_clin" value="test/clin" ftype="tabular"/>
+                <param name="train_omics_main" value="train/gex" ftype="tabular"/>
+                <param name="test_omics_main" value="test/gex" ftype="tabular"/>
                 <param name="assay_main" value="bar"/>
                 <repeat name="omics">
-                    <param name="train_omics" value="train/cnv" ftype="csv"/>
-                    <param name="test_omics" value="test/cnv" ftype="csv"/>
+                    <param name="train_omics" value="train/cnv" ftype="tabular"/>
+                    <param name="test_omics" value="test/cnv" ftype="tabular"/>
                     <param name="assay" value="foo"/>
                 </repeat>
                 <conditional name="model_class">
                     <param name="model_class" value="DirectPred"/>
                 </conditional>
-                <param name="target_variables" value="Erlotinib"/>
+                <repeat name="targets">
+                    <param name="target_variables" value="3"/>
+                </repeat>
                 <section name="advanced">
                     <param name="hpo_iter" value="1"/>
                 </section>
@@ -1006,15 +1076,15 @@
                 </element>
                 <element name="job.feature_importance.GradientShap">
                     <assert_contents>
-                        <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
-                        <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
                         <has_text_matching expression="GradientShap"/>
                     </assert_contents>
                 </element>
                 <element name="job.feature_importance.IntegratedGradients">
                     <assert_contents>
-                        <has_text_matching expression="Erlotinib,0,,bar,A2M,"/>
-                        <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
                         <has_text_matching expression="IntegratedGradients"/>
                     </assert_contents>
                 </element>
@@ -1030,15 +1100,15 @@
                 </element>
                 <element name="job.predicted_labels">
                     <assert_contents>
-                        <has_text_matching expression="source_dataset:A-704,Erlotinib,"/>
-                        <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/>
+                        <has_text_matching expression="source_dataset:A-704\tErlotinib\t"/>
+                        <has_text_matching expression="target_dataset:KMRC-20\tErlotinib\t"/>
                     </assert_contents>
                 </element>
                 <element name="job.stats">
                     <assert_contents>
-                        <has_text_matching expression="DirectPred,Erlotinib,numerical,mse,"/>
-                        <has_text_matching expression="DirectPred,Erlotinib,numerical,r2,"/>
-                        <has_text_matching expression="DirectPred,Erlotinib,numerical,pearson_corr,"/>
+                        <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tmse\t"/>
+                        <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tr2\t"/>
+                        <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tpearson_corr\t"/>
                     </assert_contents>
                 </element>
             </output_collection>
@@ -1265,6 +1335,96 @@
                 </element>
             </output_collection>
         </test>
+        <!-- test 19: Supervised training with GEX and CNV data, DirectPred model, Erlotinib and Crizotinib targets -->
+        <test expect_num_outputs="1">
+            <param name="non_commercial_use" value="True"/>
+            <conditional name="training_type">
+                <param name="model" value="s_train"/>
+                <param name="train_clin" value="train/clin" ftype="tabular"/>
+                <param name="test_clin" value="test/clin" ftype="tabular"/>
+                <param name="train_omics_main" value="train/gex" ftype="tabular"/>
+                <param name="test_omics_main" value="test/gex" ftype="tabular"/>
+                <param name="assay_main" value="bar"/>
+                <repeat name="omics">
+                    <param name="train_omics" value="train/cnv" ftype="tabular"/>
+                    <param name="test_omics" value="test/cnv" ftype="tabular"/>
+                    <param name="assay" value="foo"/>
+                </repeat>
+                <conditional name="model_class">
+                    <param name="model_class" value="DirectPred"/>
+                </conditional>
+                <repeat name="targets">
+                    <param name="target_variables" value="3"/>
+                </repeat>
+                <repeat name="targets">
+                    <param name="target_variables" value="2"/>
+                </repeat>
+                <section name="advanced">
+                    <param name="hpo_iter" value="1"/>
+                </section>
+            </conditional>
+            <assert_stdout>
+                <has_text_matching expression="Target variables: Erlotinib,Crizotinib"/>
+            </assert_stdout>
+            <output_collection name="results" type="list">
+                <element name="job.embeddings_test">
+                    <assert_contents>
+                        <has_n_lines n="50"/>
+                    </assert_contents>
+                </element>
+                <element name="job.embeddings_train">
+                    <assert_contents>
+                        <has_n_lines n="50"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_importance.GradientShap">
+                    <assert_contents>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
+                        <has_text_matching expression="Crizotinib\t0\t\tbar\tA2M\t"/>
+                        <has_text_matching expression="Crizotinib\t0\t\tbar\tABCC4\t"/>
+                        <has_text_matching expression="GradientShap"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_importance.IntegratedGradients">
+                    <assert_contents>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
+                        <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
+                        <has_text_matching expression="Crizotinib\t0\t\tbar\tA2M\t"/>
+                        <has_text_matching expression="Crizotinib\t0\t\tbar\tABCC4\t"/>
+                        <has_text_matching expression="IntegratedGradients"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_logs.bar">
+                    <assert_contents>
+                        <has_n_lines n="25"/>
+                    </assert_contents>
+                </element>
+                <element name="job.feature_logs.omics_foo">
+                    <assert_contents>
+                        <has_n_lines n="25"/>
+                    </assert_contents>
+                </element>
+                <element name="job.predicted_labels">
+                    <assert_contents>
+                        <has_text_matching expression="source_dataset:A-704\tErlotinib\t"/>
+                        <has_text_matching expression="source_dataset:A-704\tCrizotinib\t"/>
+                        <has_text_matching expression="target_dataset:KMRC-20\tErlotinib\t"/>
+                        <has_text_matching expression="target_dataset:KMRC-20\tCrizotinib\t"/>
+                    </assert_contents>
+                </element>
+                <element name="job.stats">
+                    <assert_contents>
+                        <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tmse\t"/>
+                        <has_text_matching expression="DirectPred\tCrizotinib\tnumerical\tmse\t"/>
+                        <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tr2\t"/>
+                        <has_text_matching expression="DirectPred\tCrizotinib\tnumerical\tr2\t"/>
+                        <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tpearson_corr\t"/>
+                        <has_text_matching expression="DirectPred\tCrizotinib\tnumerical\tpearson_corr\t"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
     </tests>
     <help><![CDATA[
 @COMMON_HELP@
@@ -1276,9 +1436,9 @@
 
 **Input Files**
 
-**clin.csv**
+**clin.tabular**
 
-clin.csv contains the sample metadata. The first column contains unique sample identifiers. The other columns contain sample-associated clinical variables. NA values are allowed in the clinical variables.
+clin.tabular contains the sample metadata. The first column contains unique sample identifiers. The other columns contain sample-associated clinical variables. NA values are allowed in the clinical variables.
 
 The format might look like so:
 
@@ -1296,9 +1456,9 @@
 
 .
 
-**omics.csv**
+**omics.tabular**
 
-The first column of the feature tables must be unique feature identifiers (e.g. gene names). The column names must be sample identifiers that should overlap with those in the clin.csv. They don't have to be completely identical or in the same order. Samples from the clin.csv that are not represented in the omics table will be dropped.
+The first column of the feature tables must be unique feature identifiers (e.g. gene names). The column names must be sample identifiers that should overlap with those in the clin.tabular. They don't have to be completely identical or in the same order. Samples from the clin.tabular that are not represented in the omics table will be dropped.
 
 The format might look like so:
 
@@ -1320,7 +1480,7 @@
 
 **Concordance between train/test splits:**
 
-The corresponding omics files in train/test splits must contain overlapping feature names (they don't have to be identical or in the same order). The clin.csv files in train/test must contain matching clinical variables.
+The corresponding omics files in train/test splits must contain overlapping feature names (they don't have to be identical or in the same order). The clin.tabular files in train/test must contain matching clinical variables.
 
 -----
 
@@ -1328,7 +1488,7 @@
 
 **Minimum requirements**
 
-* clin.csv and omics.csv files for training and testing
+* clin.tabular and omics.tabular files for training and testing
 * Selection of a tool/model
 * One target variable which can be numerical or categorical for regression/classification tasks.
author	bgruening
date	Fri, 04 Jul 2025 14:57:40 +0000
parents	1a5f8cedda43
children	33816f44fc7d