Mercurial > repos > bgruening > flexynesis
diff flexynesis.xml @ 5:466b593fd87e draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 1afbaf45449e25238935e222f983da62392c067a
| author | bgruening |
|---|---|
| date | Fri, 04 Jul 2025 14:57:40 +0000 |
| parents | 1a5f8cedda43 |
| children | 33816f44fc7d |
line wrap: on
line diff
--- a/flexynesis.xml Mon Jun 30 17:19:10 2025 +0000 +++ b/flexynesis.xml Fri Jul 04 14:57:40 2025 +0000 @@ -11,17 +11,26 @@ <command detect_errors="exit_code"><![CDATA[ @CHECK_NON_COMMERCIAL_USE@ mkdir -p input/test input/train output && - ln -s '$train_clin' input/train/clin.csv && - ln -s '$test_clin' input/test/clin.csv && + ln -s '$train_clin' input/train/clin.tabular && + python $__tool_directory__/convert.py input/train/clin.tabular input/train/clin.csv && + + ln -s '$test_clin' input/test/clin.tabular && + python $__tool_directory__/convert.py input/test/clin.tabular input/test/clin.csv && #if str($assay_main) != '': #set $name = str($assay_main.replace(" ", "_")) - ln -s '$train_omics_main' input/train/${name}.csv && - ln -s '$test_omics_main' input/test/${name}.csv && + ln -s '$train_omics_main' input/train/${name}.tabular && + python '$__tool_directory__/convert.py' input/train/${name}.tabular input/train/${name}.csv && + + ln -s '$test_omics_main' input/test/${name}.tabular && + python '$__tool_directory__/convert.py' input/test/${name}.tabular input/test/${name}.csv && + #set $data_names = [$name] #else - ln -s '$train_omics_main' input/train/main.csv && - ln -s '$test_omics_main' input/test/main.csv && + ln -s '$train_omics_main' input/train/main.tabular && + python '$__tool_directory__/convert.py' input/train/main.tabular input/train/main.csv && + ln -s '$test_omics_main' input/test/main.tabular && + python '$__tool_directory__/convert.py' input/test/main.tabular input/test/main.csv && #set $data_names = ['main'] #end if #if str($training_type.model) == 'cm_train': @@ -38,8 +47,12 @@ #if str($element.assay) != '': #set $i = str($element.assay.replace(" ", "_")) #end if - ln -s '${element.train_omics}' input/train/omics_${i}.csv && - ln -s '${element.test_omics}' input/test/omics_${i}.csv && + ln -s '${element.train_omics}' input/train/omics_${i}.tabular && + python '$__tool_directory__/convert.py' input/train/omics_${i}.tabular input/train/omics_${i}.csv && + + ln -s '${element.test_omics}' input/test/omics_${i}.tabular && + python '$__tool_directory__/convert.py' input/test/omics_${i}.tabular input/test/omics_${i}.csv && + $data_names.append("omics_" + str($i)) #if str($training_type.model) == 'cm_train': #if str($element.layer) == 'input': @@ -50,6 +63,40 @@ #end if #end if #end for + ## set target variables + #if str($training_type.model) == 's_train': + #if len($targets) > 0: + target_variables="" && + #for $i, $element in enumerate($targets) + target_name=`$__tool_directory__/index_to_name.py input/train/clin.csv $element.target_variables` && + #if $i == 0: + target_variables="\$target_name" && + #else: + target_variables="\$target_variables,\$target_name" && + #end if + #end for + echo "Target variables: \$target_variables" && + #end if + ## set survival variables + #if str($surv_event_var) != 'None' and str($surv_time_var) != 'None': + surv_event_var=`$__tool_directory__/index_to_name.py input/train/clin.csv $surv_event_var` && + echo "Survival event variable: \$surv_event_var" && + surv_time_var=`$__tool_directory__/index_to_name.py input/train/clin.csv $surv_time_var` && + echo "Survival time variable: \$surv_time_var" && + #end if + ## set target value for plots + #if str($surv_event_var) != 'None' and str($surv_time_var) != 'None' and len($targets) > 0: + target_value="\$target_variables,\$surv_event_var" && + echo "Target value for plots: \$target_value" && + #else if str($surv_event_var) == 'None' and str($surv_time_var) == 'None' and len($targets) > 0: + target_value="\$target_variables" && + echo "Target value for plots: \$target_value" && + #else if str($surv_event_var) != 'None' and str($surv_time_var) != 'None' and len($targets) == 0: + target_value="\$surv_event_var" && + echo "Target value for plots: \$target_value" && + #end if + #end if + flexynesis --data_path input --outdir output @@ -60,12 +107,12 @@ --string_node_name $string_node_name #end if #if str($training_type.model) == 's_train': - #if str($target_variables) != '': - --target_variables $target_variables + #if len($targets) > 0: + --target_variables \$target_variables #end if - #if str($surv_event_var) != '': - --surv_event_var $surv_event_var - --surv_time_var $surv_time_var + #if str($surv_event_var) != 'None' and str($surv_time_var) != 'None': + --surv_event_var \$surv_event_var + --surv_time_var \$surv_time_var #end if #end if #if str($training_type.model) == 'cm_train': @@ -90,13 +137,26 @@ $evaluate_baseline_performance --feature_importance_method $feature_importance_method \${GALAXY_FLEXYNESIS_EXTRA_ARGUMENTS} + + ## convert flexynesis output files to tabular format + && for file in output/*.csv; do + if [ -f "\$file" ]; then + basename="\${file%.csv}"; + python $__tool_directory__/convert.py "\$file" "\${basename}.tabular"; + echo "Converted \$file to \${basename}.tabular"; + fi + done #if str($plot.plot_embeddings_conditional.plot_embeddings) == 'yes': && python $__tool_directory__/flexynesis_plot.py --plot_type dimred - --embeddings output/job.embeddings_test.csv - --labels output/job.predicted_labels.csv + --embeddings output/job.embeddings_test.tabular + --labels output/job.predicted_labels.tabular --method $plot.plot_embeddings_conditional.method - --target_variables $target_variables + #if str($training_type.model) == 's_train': + #if len($targets) > 0 or (str($surv_event_var) != 'None' and str($surv_time_var) != 'None'): + --target_value \$target_value + #end if + #end if --output_dir plots --format $plot.plot_embeddings_conditional.format --dpi $plot.plot_embeddings_conditional.dpi @@ -108,10 +168,10 @@ #else && python $__tool_directory__/flexynesis_plot.py --plot_type kaplan_meier - --labels output/job.predicted_labels.csv + --labels output/job.predicted_labels.tabular --survival_data input/test/clin.csv - --surv_event_var $surv_event_var - --surv_time_var $surv_time_var + --surv_event_var \$surv_event_var + --surv_time_var \$surv_time_var --event_value $plot.plot_km_conditional.event_value --output_dir plots --format $plot.plot_km_conditional.format @@ -136,8 +196,8 @@ --omics_train input/train/main.csv --omics_test input/test/main.csv #end if - --surv_time_var $surv_time_var - --surv_event_var $surv_event_var + --surv_time_var \$surv_time_var + --surv_event_var \$surv_event_var #if str($plot.plot_cox_conditional.clinical_variables) != '': --clinical_variables $str($plot.plot_cox_conditional.clinical_variables) #end if @@ -158,11 +218,11 @@ #if str($plot.plot_scatter_conditional.plot_scatter) == 'yes': && python $__tool_directory__/flexynesis_plot.py --plot_type scatter - --labels output/job.predicted_labels.csv - #if str($surv_event_var) != '': - --target_value $target_variables,$surv_event_var - #else - --target_value $target_variables + --labels output/job.predicted_labels.tabular + #if str($training_type.model) == 's_train': + #if len($targets) > 0 or (str($surv_event_var) != 'None' and str($surv_time_var) != 'None'): + --target_value \$target_value + #end if #end if --output_dir plots --format $plot.plot_scatter_conditional.format @@ -171,11 +231,11 @@ #if str($plot.plot_concordance_conditional.plot_concordance) == 'yes': && python $__tool_directory__/flexynesis_plot.py --plot_type concordance_heatmap - --labels output/job.predicted_labels.csv - #if str($surv_event_var) != '': - --target_value $target_variables,$surv_event_var - #else - --target_value $target_variables + --labels output/job.predicted_labels.tabular + #if str($training_type.model) == 's_train': + #if len($targets) > 0 or (str($surv_event_var) != 'None' and str($surv_time_var) != 'None'): + --target_value \$target_value + #end if #end if --output_dir plots --format $plot.plot_concordance_conditional.format @@ -184,11 +244,11 @@ #if str($plot.plot_pr_curves_conditional.plot_pr_curves) == 'yes': && python $__tool_directory__/flexynesis_plot.py --plot_type pr_curve - --labels output/job.predicted_labels.csv - #if str($surv_event_var) != '': - --target_value $target_variables,$surv_event_var - #else - --target_value $target_variables + --labels output/job.predicted_labels.tabular + #if str($training_type.model) == 's_train': + #if len($targets) > 0 or (str($surv_event_var) != 'None' and str($surv_time_var) != 'None'): + --target_value \$target_value + #end if #end if --output_dir plots --format $plot.plot_pr_curves_conditional.format @@ -197,11 +257,11 @@ #if str($plot.plot_roc_curves_conditional.plot_roc_curves) == 'yes': && python $__tool_directory__/flexynesis_plot.py --plot_type roc_curve - --labels output/job.predicted_labels.csv - #if str($surv_event_var) != '': - --target_value $target_variables,$surv_event_var - #else - --target_value $target_variables + --labels output/job.predicted_labels.tabular + #if str($training_type.model) == 's_train': + #if len($targets) > 0 or (str($surv_event_var) != 'None' and str($surv_time_var) != 'None'): + --target_value \$target_value + #end if #end if --output_dir plots --format $plot.plot_roc_curves_conditional.format @@ -210,11 +270,11 @@ #if str($plot.plot_boxplot_conditional.plot_boxplot) == 'yes': && python $__tool_directory__/flexynesis_plot.py --plot_type box_plot - --labels output/job.predicted_labels.csv - #if str($surv_event_var) != '': - --target_value $target_variables,$surv_event_var - #else - --target_value $target_variables + --labels output/job.predicted_labels.tabular + #if str($training_type.model) == 's_train': + #if len($targets) > 0 or (str($surv_event_var) != 'None' and str($surv_time_var) != 'None'): + --target_value \$target_value + #end if #end if --output_dir plots --format $plot.plot_boxplot_conditional.format @@ -268,15 +328,11 @@ <when value="RandomSurvivalForest"/> <when value="XGBoost"/> </conditional> - <param argument="--target_variables" type="text" label="Target variables" help="Which variables in 'clin.csv' to use for predictions, comma-separated if multiple."> - <expand macro="sanitizer_printable"/> - </param> - <param argument="--surv_event_var" type="text" label="Survival event" help="Which column in 'clin.csv' to use as event/status indicator for survival modeling."> - <expand macro="sanitizer_printable"/> - </param> - <param argument="--surv_time_var" type="text" label="Survival time" help="Which column in 'clin.csv' to use as time/duration indicator for survival modeling."> - <expand macro="sanitizer_printable"/> - </param> + <repeat name="targets" min="0" title="Target variables"> + <param argument="--target_variables" type="data_column" data_ref="train_clin" optional="false" label="Column name in the train clinical data to use for predictions, multiple targets are allowed"/> + </repeat> + <param argument="--surv_event_var" type="data_column" data_ref="train_clin" optional="true" label="Column name in the train clinical data to use as survival event"/> + <param argument="--surv_time_var" type="data_column" data_ref="train_clin" optional="true" label="Column name in the train clinical data to use as survival time"/> <expand macro="advanced"/> </when> <when value="us_train"> @@ -428,7 +484,7 @@ </inputs> <outputs> <collection name="results" type="list" label="${tool.name} on ${on_string}: results"> - <discover_datasets pattern="(?P<name>.+)\.csv$" format="csv" directory="output"/> + <discover_datasets pattern="(?P<name>.+)\.tabular$" format="tabular" directory="output"/> </collection> <collection name="plots" type="list" label="${tool.name} on ${on_string}: embeddings plots"> <discover_datasets pattern="__name_and_ext__" directory="plots"/> @@ -441,20 +497,22 @@ <param name="non_commercial_use" value="True"/> <conditional name="training_type"> <param name="model" value="s_train"/> - <param name="train_clin" value="train/clin" ftype="csv"/> - <param name="test_clin" value="test/clin" ftype="csv"/> - <param name="train_omics_main" value="train/gex" ftype="csv"/> - <param name="test_omics_main" value="test/gex" ftype="csv"/> + <param name="train_clin" value="train/clin" ftype="tabular"/> + <param name="test_clin" value="test/clin" ftype="tabular"/> + <param name="train_omics_main" value="train/gex" ftype="tabular"/> + <param name="test_omics_main" value="test/gex" ftype="tabular"/> <param name="assay_main" value="bar"/> <repeat name="omics"> - <param name="train_omics" value="train/cnv" ftype="csv"/> - <param name="test_omics" value="test/cnv" ftype="csv"/> + <param name="train_omics" value="train/cnv" ftype="tabular"/> + <param name="test_omics" value="test/cnv" ftype="tabular"/> <param name="assay" value="foo"/> </repeat> <conditional name="model_class"> <param name="model_class" value="DirectPred"/> </conditional> - <param name="target_variables" value="Erlotinib"/> + <repeat name="targets"> + <param name="target_variables" value="3"/> + </repeat> <section name="advanced"> <param name="hpo_iter" value="1"/> </section> @@ -472,15 +530,15 @@ </element> <element name="job.feature_importance.GradientShap"> <assert_contents> - <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> - <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/> <has_text_matching expression="GradientShap"/> </assert_contents> </element> <element name="job.feature_importance.IntegratedGradients"> <assert_contents> - <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> - <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/> <has_text_matching expression="IntegratedGradients"/> </assert_contents> </element> @@ -496,15 +554,15 @@ </element> <element name="job.predicted_labels"> <assert_contents> - <has_text_matching expression="source_dataset:A-704,Erlotinib,"/> - <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/> + <has_text_matching expression="source_dataset:A-704\tErlotinib\t"/> + <has_text_matching expression="target_dataset:KMRC-20\tErlotinib\t"/> </assert_contents> </element> <element name="job.stats"> <assert_contents> - <has_text_matching expression="DirectPred,Erlotinib,numerical,mse,"/> - <has_text_matching expression="DirectPred,Erlotinib,numerical,r2,"/> - <has_text_matching expression="DirectPred,Erlotinib,numerical,pearson_corr,"/> + <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tmse\t"/> + <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tr2\t"/> + <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tpearson_corr\t"/> </assert_contents> </element> </output_collection> @@ -514,15 +572,17 @@ <param name="non_commercial_use" value="True"/> <conditional name="training_type"> <param name="model" value="s_train"/> - <param name="train_clin" value="train/clin" ftype="csv"/> - <param name="test_clin" value="test/clin" ftype="csv"/> - <param name="train_omics_main" value="train/gex" ftype="csv"/> - <param name="test_omics_main" value="test/gex" ftype="csv"/> + <param name="train_clin" value="train/clin" ftype="tabular"/> + <param name="test_clin" value="test/clin" ftype="tabular"/> + <param name="train_omics_main" value="train/gex" ftype="tabular"/> + <param name="test_omics_main" value="test/gex" ftype="tabular"/> <param name="assay_main" value="bar"/> <conditional name="model_class"> <param name="model_class" value="DirectPred"/> </conditional> - <param name="target_variables" value="Erlotinib"/> + <repeat name="targets"> + <param name="target_variables" value="3"/> + </repeat> <section name="advanced"> <param name="hpo_iter" value="1"/> </section> @@ -540,15 +600,15 @@ </element> <element name="job.feature_importance.GradientShap"> <assert_contents> - <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> - <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/> <has_text_matching expression="GradientShap"/> </assert_contents> </element> <element name="job.feature_importance.IntegratedGradients"> <assert_contents> - <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> - <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/> <has_text_matching expression="IntegratedGradients"/> </assert_contents> </element> @@ -559,15 +619,15 @@ </element> <element name="job.predicted_labels"> <assert_contents> - <has_text_matching expression="source_dataset:A-704,Erlotinib,"/> - <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/> + <has_text_matching expression="source_dataset:A-704\tErlotinib\t"/> + <has_text_matching expression="target_dataset:KMRC-20\tErlotinib\t"/> </assert_contents> </element> <element name="job.stats"> <assert_contents> - <has_text_matching expression="DirectPred,Erlotinib,numerical,mse,"/> - <has_text_matching expression="DirectPred,Erlotinib,numerical,r2,"/> - <has_text_matching expression="DirectPred,Erlotinib,numerical,pearson_corr,"/> + <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tmse\t"/> + <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tr2\t"/> + <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tpearson_corr\t"/> </assert_contents> </element> </output_collection> @@ -577,20 +637,22 @@ <param name="non_commercial_use" value="True"/> <conditional name="training_type"> <param name="model" value="s_train"/> - <param name="train_clin" value="train/clin" ftype="csv"/> - <param name="test_clin" value="test/clin" ftype="csv"/> - <param name="train_omics_main" value="train/gex" ftype="csv"/> - <param name="test_omics_main" value="test/gex" ftype="csv"/> + <param name="train_clin" value="train/clin" ftype="tabular"/> + <param name="test_clin" value="test/clin" ftype="tabular"/> + <param name="train_omics_main" value="train/gex" ftype="tabular"/> + <param name="test_omics_main" value="test/gex" ftype="tabular"/> <param name="assay_main" value="bar"/> <repeat name="omics"> - <param name="train_omics" value="train/cnv" ftype="csv"/> - <param name="test_omics" value="test/cnv" ftype="csv"/> + <param name="train_omics" value="train/cnv" ftype="tabular"/> + <param name="test_omics" value="test/cnv" ftype="tabular"/> <param name="assay" value="foo"/> </repeat> <conditional name="model_class"> <param name="model_class" value="DirectPred"/> </conditional> - <param name="target_variables" value="Irinotecan"/> + <repeat name="targets"> + <param name="target_variables" value="4"/> + </repeat> <section name="advanced"> <param name="hpo_iter" value="1"/> </section> @@ -608,15 +670,15 @@ </element> <element name="job.feature_importance.GradientShap"> <assert_contents> - <has_text_matching expression="Irinotecan,0,,bar,A2M,"/> - <has_text_matching expression="Irinotecan,0,,bar,ABCC4,"/> + <has_text_matching expression="Irinotecan\t0\t\tbar\tA2M\t"/> + <has_text_matching expression="Irinotecan\t0\t\tbar\tABCC4\t"/> <has_text_matching expression="GradientShap"/> </assert_contents> </element> <element name="job.feature_importance.IntegratedGradients"> <assert_contents> - <has_text_matching expression="Irinotecan,0,,bar,A2M,"/> - <has_text_matching expression="Irinotecan,0,,bar,ABCC4,"/> + <has_text_matching expression="Irinotecan\t0\t\tbar\tA2M\t"/> + <has_text_matching expression="Irinotecan\t0\t\tbar\tABCC4\t"/> <has_text_matching expression="IntegratedGradients"/> </assert_contents> </element> @@ -632,15 +694,15 @@ </element> <element name="job.predicted_labels"> <assert_contents> - <has_text_matching expression="source_dataset:A-704,Irinotecan,"/> - <has_text_matching expression="target_dataset:KMRC-20,Irinotecan,"/> + <has_text_matching expression="source_dataset:A-704\tIrinotecan\t"/> + <has_text_matching expression="target_dataset:KMRC-20\tIrinotecan\t"/> </assert_contents> </element> <element name="job.stats"> <assert_contents> - <has_text_matching expression="DirectPred,Irinotecan,numerical,mse,"/> - <has_text_matching expression="DirectPred,Irinotecan,numerical,r2,"/> - <has_text_matching expression="DirectPred,Irinotecan,numerical,pearson_corr,"/> + <has_text_matching expression="DirectPred\tIrinotecan\tnumerical\tmse\t"/> + <has_text_matching expression="DirectPred\tIrinotecan\tnumerical\tr2\t"/> + <has_text_matching expression="DirectPred\tIrinotecan\tnumerical\tpearson_corr\t"/> </assert_contents> </element> </output_collection> @@ -650,14 +712,14 @@ <param name="non_commercial_use" value="True"/> <conditional name="training_type"> <param name="model" value="us_train"/> - <param name="train_clin" value="train/clin" ftype="csv"/> - <param name="test_clin" value="test/clin" ftype="csv"/> - <param name="train_omics_main" value="train/gex" ftype="csv"/> - <param name="test_omics_main" value="test/gex" ftype="csv"/> + <param name="train_clin" value="train/clin" ftype="tabular"/> + <param name="test_clin" value="test/clin" ftype="tabular"/> + <param name="train_omics_main" value="train/gex" ftype="tabular"/> + <param name="test_omics_main" value="test/gex" ftype="tabular"/> <param name="assay_main" value="bar"/> <repeat name="omics"> - <param name="train_omics" value="train/cnv" ftype="csv"/> - <param name="test_omics" value="test/cnv" ftype="csv"/> + <param name="train_omics" value="train/cnv" ftype="tabular"/> + <param name="test_omics" value="test/cnv" ftype="tabular"/> <param name="assay" value="foo"/> </repeat> <param name="model_class" value="supervised_vae"/> @@ -693,15 +755,15 @@ <param name="non_commercial_use" value="True"/> <conditional name="training_type"> <param name="model" value="cm_train"/> - <param name="train_clin" value="train/clin" ftype="csv"/> - <param name="test_clin" value="test/clin" ftype="csv"/> - <param name="train_omics_main" value="train/gex" ftype="csv"/> - <param name="test_omics_main" value="test/gex" ftype="csv"/> + <param name="train_clin" value="train/clin" ftype="tabular"/> + <param name="test_clin" value="test/clin" ftype="tabular"/> + <param name="train_omics_main" value="train/gex" ftype="tabular"/> + <param name="test_omics_main" value="test/gex" ftype="tabular"/> <param name="assay_main" value="bar"/> <param name="layer_main" value="input"/> <repeat name="omics"> - <param name="train_omics" value="train/cnv" ftype="csv"/> - <param name="test_omics" value="test/cnv" ftype="csv"/> + <param name="train_omics" value="train/cnv" ftype="tabular"/> + <param name="test_omics" value="test/cnv" ftype="tabular"/> <param name="assay" value="foo"/> <param name="layer" value="output"/> </repeat> @@ -748,14 +810,14 @@ <param name="non_commercial_use" value="True"/> <conditional name="training_type"> <param name="model" value="s_train"/> - <param name="train_clin" value="train/clin" ftype="csv"/> - <param name="test_clin" value="test/clin" ftype="csv"/> - <param name="train_omics_main" value="train/gex" ftype="csv"/> - <param name="test_omics_main" value="test/gex" ftype="csv"/> + <param name="train_clin" value="train/clin" ftype="tabular"/> + <param name="test_clin" value="test/clin" ftype="tabular"/> + <param name="train_omics_main" value="train/gex" ftype="tabular"/> + <param name="test_omics_main" value="test/gex" ftype="tabular"/> <param name="assay_main" value="bar"/> <repeat name="omics"> - <param name="train_omics" value="train/cnv" ftype="csv"/> - <param name="test_omics" value="test/cnv" ftype="csv"/> + <param name="train_omics" value="train/cnv" ftype="tabular"/> + <param name="test_omics" value="test/cnv" ftype="tabular"/> <param name="assay" value="foo"/> </repeat> <conditional name="model_class"> @@ -764,7 +826,9 @@ <param name="string_organism" value="9606"/> <param name="string_node_name" value="gene_name"/> </conditional> - <param name="target_variables" value="Erlotinib"/> + <repeat name="targets"> + <param name="target_variables" value="3"/> + </repeat> <section name="advanced"> <param name="hpo_iter" value="1"/> </section> @@ -782,15 +846,15 @@ </element> <element name="job.feature_importance.GradientShap"> <assert_contents> - <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> - <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/> <has_text_matching expression="GradientShap"/> </assert_contents> </element> <element name="job.feature_importance.IntegratedGradients"> <assert_contents> - <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> - <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/> <has_text_matching expression="IntegratedGradients"/> </assert_contents> </element> @@ -806,15 +870,15 @@ </element> <element name="job.predicted_labels"> <assert_contents> - <has_text_matching expression="source_dataset:A-704,Erlotinib,"/> - <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/> + <has_text_matching expression="source_dataset:A-704\tErlotinib\t"/> + <has_text_matching expression="target_dataset:KMRC-20\tErlotinib\t"/> </assert_contents> </element> <element name="job.stats"> <assert_contents> - <has_text_matching expression="GNN,Erlotinib,numerical,mse,"/> - <has_text_matching expression="GNN,Erlotinib,numerical,r2,"/> - <has_text_matching expression="GNN,Erlotinib,numerical,pearson_corr,"/> + <has_text_matching expression="GNN\tErlotinib\tnumerical\tmse\t"/> + <has_text_matching expression="GNN\tErlotinib\tnumerical\tr2\t"/> + <has_text_matching expression="GNN\tErlotinib\tnumerical\tpearson_corr\t"/> </assert_contents> </element> </output_collection> @@ -824,14 +888,14 @@ <param name="non_commercial_use" value="True"/> <conditional name="training_type"> <param name="model" value="us_train"/> - <param name="train_clin" value="train/clin" ftype="csv"/> - <param name="test_clin" value="test/clin" ftype="csv"/> - <param name="train_omics_main" value="train/gex" ftype="csv"/> - <param name="test_omics_main" value="test/gex" ftype="csv"/> + <param name="train_clin" value="train/clin" ftype="tabular"/> + <param name="test_clin" value="test/clin" ftype="tabular"/> + <param name="train_omics_main" value="train/gex" ftype="tabular"/> + <param name="test_omics_main" value="test/gex" ftype="tabular"/> <param name="assay_main" value="b ar"/> <repeat name="omics"> - <param name="train_omics" value="train/cnv" ftype="csv"/> - <param name="test_omics" value="test/cnv" ftype="csv"/> + <param name="train_omics" value="train/cnv" ftype="tabular"/> + <param name="test_omics" value="test/cnv" ftype="tabular"/> <param name="assay" value="f oo"/> </repeat> <param name="model_class" value="supervised_vae"/> @@ -867,20 +931,22 @@ <param name="non_commercial_use" value="True"/> <conditional name="training_type"> <param name="model" value="s_train"/> - <param name="train_clin" value="train/clin" ftype="csv"/> - <param name="test_clin" value="test/clin" ftype="csv"/> - <param name="train_omics_main" value="train/gex" ftype="csv"/> - <param name="test_omics_main" value="test/gex" ftype="csv"/> + <param name="train_clin" value="train/clin" ftype="tabular"/> + <param name="test_clin" value="test/clin" ftype="tabular"/> + <param name="train_omics_main" value="train/gex" ftype="tabular"/> + <param name="test_omics_main" value="test/gex" ftype="tabular"/> <param name="assay_main" value="bar"/> <repeat name="omics"> - <param name="train_omics" value="train/cnv" ftype="csv"/> - <param name="test_omics" value="test/cnv" ftype="csv"/> + <param name="train_omics" value="train/cnv" ftype="tabular"/> + <param name="test_omics" value="test/cnv" ftype="tabular"/> <param name="assay" value="foo"/> </repeat> <conditional name="model_class"> <param name="model_class" value="XGBoost"/> </conditional> - <param name="target_variables" value="Erlotinib"/> + <repeat name="targets"> + <param name="target_variables" value="3"/> + </repeat> <section name="advanced"> <param name="hpo_iter" value="1"/> </section> @@ -888,9 +954,9 @@ <output_collection name="results" type="list"> <element name="job.stats"> <assert_contents> - <has_text_matching expression="XGBoostRegressor,Erlotinib,numerical,mse,"/> - <has_text_matching expression="XGBoostRegressor,Erlotinib,numerical,r2,"/> - <has_text_matching expression="XGBoostRegressor,Erlotinib,numerical,pearson_corr,"/> + <has_text_matching expression="XGBoostRegressor\tErlotinib\tnumerical\tmse\t"/> + <has_text_matching expression="XGBoostRegressor\tErlotinib\tnumerical\tr2\t"/> + <has_text_matching expression="XGBoostRegressor\tErlotinib\tnumerical\tpearson_corr\t"/> </assert_contents> </element> </output_collection> @@ -900,20 +966,22 @@ <param name="non_commercial_use" value="True"/> <conditional name="training_type"> <param name="model" value="s_train"/> - <param name="train_clin" value="train/clin" ftype="csv"/> - <param name="test_clin" value="test/clin" ftype="csv"/> - <param name="train_omics_main" value="train/gex" ftype="csv"/> - <param name="test_omics_main" value="test/gex" ftype="csv"/> + <param name="train_clin" value="train/clin" ftype="tabular"/> + <param name="test_clin" value="test/clin" ftype="tabular"/> + <param name="train_omics_main" value="train/gex" ftype="tabular"/> + <param name="test_omics_main" value="test/gex" ftype="tabular"/> <param name="assay_main" value="bar"/> <repeat name="omics"> - <param name="train_omics" value="train/cnv" ftype="csv"/> - <param name="test_omics" value="test/cnv" ftype="csv"/> + <param name="train_omics" value="train/cnv" ftype="tabular"/> + <param name="test_omics" value="test/cnv" ftype="tabular"/> <param name="assay" value="foo"/> </repeat> <conditional name="model_class"> <param name="model_class" value="DirectPred"/> </conditional> - <param name="target_variables" value="Erlotinib"/> + <repeat name="targets"> + <param name="target_variables" value="3"/> + </repeat> <section name="advanced"> <param name="hpo_iter" value="1"/> <param name="feature_importance_method" value="IntegratedGradients"/> @@ -933,8 +1001,8 @@ </element> <element name="job.feature_importance.IntegratedGradients"> <assert_contents> - <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> - <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/> </assert_contents> </element> <element name="job.feature_logs.bar"> @@ -949,15 +1017,15 @@ </element> <element name="job.predicted_labels"> <assert_contents> - <has_text_matching expression="source_dataset:A-704,Erlotinib,"/> - <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/> + <has_text_matching expression="source_dataset:A-704\tErlotinib\t"/> + <has_text_matching expression="target_dataset:KMRC-20\tErlotinib\t"/> </assert_contents> </element> <element name="job.stats"> <assert_contents> - <has_text_matching expression="DirectPred,Erlotinib,numerical,mse,"/> - <has_text_matching expression="DirectPred,Erlotinib,numerical,r2,"/> - <has_text_matching expression="DirectPred,Erlotinib,numerical,pearson_corr,"/> + <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tmse\t"/> + <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tr2\t"/> + <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tpearson_corr\t"/> </assert_contents> </element> </output_collection> @@ -967,20 +1035,22 @@ <param name="non_commercial_use" value="True"/> <conditional name="training_type"> <param name="model" value="s_train"/> - <param name="train_clin" value="train/clin" ftype="csv"/> - <param name="test_clin" value="test/clin" ftype="csv"/> - <param name="train_omics_main" value="train/gex" ftype="csv"/> - <param name="test_omics_main" value="test/gex" ftype="csv"/> + <param name="train_clin" value="train/clin" ftype="tabular"/> + <param name="test_clin" value="test/clin" ftype="tabular"/> + <param name="train_omics_main" value="train/gex" ftype="tabular"/> + <param name="test_omics_main" value="test/gex" ftype="tabular"/> <param name="assay_main" value="bar"/> <repeat name="omics"> - <param name="train_omics" value="train/cnv" ftype="csv"/> - <param name="test_omics" value="test/cnv" ftype="csv"/> + <param name="train_omics" value="train/cnv" ftype="tabular"/> + <param name="test_omics" value="test/cnv" ftype="tabular"/> <param name="assay" value="foo"/> </repeat> <conditional name="model_class"> <param name="model_class" value="DirectPred"/> </conditional> - <param name="target_variables" value="Erlotinib"/> + <repeat name="targets"> + <param name="target_variables" value="3"/> + </repeat> <section name="advanced"> <param name="hpo_iter" value="1"/> </section> @@ -1006,15 +1076,15 @@ </element> <element name="job.feature_importance.GradientShap"> <assert_contents> - <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> - <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/> <has_text_matching expression="GradientShap"/> </assert_contents> </element> <element name="job.feature_importance.IntegratedGradients"> <assert_contents> - <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> - <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/> <has_text_matching expression="IntegratedGradients"/> </assert_contents> </element> @@ -1030,15 +1100,15 @@ </element> <element name="job.predicted_labels"> <assert_contents> - <has_text_matching expression="source_dataset:A-704,Erlotinib,"/> - <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/> + <has_text_matching expression="source_dataset:A-704\tErlotinib\t"/> + <has_text_matching expression="target_dataset:KMRC-20\tErlotinib\t"/> </assert_contents> </element> <element name="job.stats"> <assert_contents> - <has_text_matching expression="DirectPred,Erlotinib,numerical,mse,"/> - <has_text_matching expression="DirectPred,Erlotinib,numerical,r2,"/> - <has_text_matching expression="DirectPred,Erlotinib,numerical,pearson_corr,"/> + <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tmse\t"/> + <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tr2\t"/> + <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tpearson_corr\t"/> </assert_contents> </element> </output_collection> @@ -1265,6 +1335,96 @@ </element> </output_collection> </test> + <!-- test 19: Supervised training with GEX and CNV data, DirectPred model, Erlotinib and Crizotinib targets --> + <test expect_num_outputs="1"> + <param name="non_commercial_use" value="True"/> + <conditional name="training_type"> + <param name="model" value="s_train"/> + <param name="train_clin" value="train/clin" ftype="tabular"/> + <param name="test_clin" value="test/clin" ftype="tabular"/> + <param name="train_omics_main" value="train/gex" ftype="tabular"/> + <param name="test_omics_main" value="test/gex" ftype="tabular"/> + <param name="assay_main" value="bar"/> + <repeat name="omics"> + <param name="train_omics" value="train/cnv" ftype="tabular"/> + <param name="test_omics" value="test/cnv" ftype="tabular"/> + <param name="assay" value="foo"/> + </repeat> + <conditional name="model_class"> + <param name="model_class" value="DirectPred"/> + </conditional> + <repeat name="targets"> + <param name="target_variables" value="3"/> + </repeat> + <repeat name="targets"> + <param name="target_variables" value="2"/> + </repeat> + <section name="advanced"> + <param name="hpo_iter" value="1"/> + </section> + </conditional> + <assert_stdout> + <has_text_matching expression="Target variables: Erlotinib,Crizotinib"/> + </assert_stdout> + <output_collection name="results" type="list"> + <element name="job.embeddings_test"> + <assert_contents> + <has_n_lines n="50"/> + </assert_contents> + </element> + <element name="job.embeddings_train"> + <assert_contents> + <has_n_lines n="50"/> + </assert_contents> + </element> + <element name="job.feature_importance.GradientShap"> + <assert_contents> + <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/> + <has_text_matching expression="Crizotinib\t0\t\tbar\tA2M\t"/> + <has_text_matching expression="Crizotinib\t0\t\tbar\tABCC4\t"/> + <has_text_matching expression="GradientShap"/> + </assert_contents> + </element> + <element name="job.feature_importance.IntegratedGradients"> + <assert_contents> + <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/> + <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/> + <has_text_matching expression="Crizotinib\t0\t\tbar\tA2M\t"/> + <has_text_matching expression="Crizotinib\t0\t\tbar\tABCC4\t"/> + <has_text_matching expression="IntegratedGradients"/> + </assert_contents> + </element> + <element name="job.feature_logs.bar"> + <assert_contents> + <has_n_lines n="25"/> + </assert_contents> + </element> + <element name="job.feature_logs.omics_foo"> + <assert_contents> + <has_n_lines n="25"/> + </assert_contents> + </element> + <element name="job.predicted_labels"> + <assert_contents> + <has_text_matching expression="source_dataset:A-704\tErlotinib\t"/> + <has_text_matching expression="source_dataset:A-704\tCrizotinib\t"/> + <has_text_matching expression="target_dataset:KMRC-20\tErlotinib\t"/> + <has_text_matching expression="target_dataset:KMRC-20\tCrizotinib\t"/> + </assert_contents> + </element> + <element name="job.stats"> + <assert_contents> + <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tmse\t"/> + <has_text_matching expression="DirectPred\tCrizotinib\tnumerical\tmse\t"/> + <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tr2\t"/> + <has_text_matching expression="DirectPred\tCrizotinib\tnumerical\tr2\t"/> + <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tpearson_corr\t"/> + <has_text_matching expression="DirectPred\tCrizotinib\tnumerical\tpearson_corr\t"/> + </assert_contents> + </element> + </output_collection> + </test> </tests> <help><