comparison flexynesis.xml @ 5:466b593fd87e draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 1afbaf45449e25238935e222f983da62392c067a
author bgruening
date Fri, 04 Jul 2025 14:57:40 +0000
parents 1a5f8cedda43
children 33816f44fc7d
comparison
equal deleted inserted replaced
4:1a5f8cedda43 5:466b593fd87e
9 <include path="flexynesis_plot.py" /> 9 <include path="flexynesis_plot.py" />
10 </required_files> 10 </required_files>
11 <command detect_errors="exit_code"><![CDATA[ 11 <command detect_errors="exit_code"><![CDATA[
12 @CHECK_NON_COMMERCIAL_USE@ 12 @CHECK_NON_COMMERCIAL_USE@
13 mkdir -p input/test input/train output && 13 mkdir -p input/test input/train output &&
14 ln -s '$train_clin' input/train/clin.csv && 14 ln -s '$train_clin' input/train/clin.tabular &&
15 ln -s '$test_clin' input/test/clin.csv && 15 python $__tool_directory__/convert.py input/train/clin.tabular input/train/clin.csv &&
16
17 ln -s '$test_clin' input/test/clin.tabular &&
18 python $__tool_directory__/convert.py input/test/clin.tabular input/test/clin.csv &&
16 19
17 #if str($assay_main) != '': 20 #if str($assay_main) != '':
18 #set $name = str($assay_main.replace(" ", "_")) 21 #set $name = str($assay_main.replace(" ", "_"))
19 ln -s '$train_omics_main' input/train/${name}.csv && 22 ln -s '$train_omics_main' input/train/${name}.tabular &&
20 ln -s '$test_omics_main' input/test/${name}.csv && 23 python '$__tool_directory__/convert.py' input/train/${name}.tabular input/train/${name}.csv &&
24
25 ln -s '$test_omics_main' input/test/${name}.tabular &&
26 python '$__tool_directory__/convert.py' input/test/${name}.tabular input/test/${name}.csv &&
27
21 #set $data_names = [$name] 28 #set $data_names = [$name]
22 #else 29 #else
23 ln -s '$train_omics_main' input/train/main.csv && 30 ln -s '$train_omics_main' input/train/main.tabular &&
24 ln -s '$test_omics_main' input/test/main.csv && 31 python '$__tool_directory__/convert.py' input/train/main.tabular input/train/main.csv &&
32 ln -s '$test_omics_main' input/test/main.tabular &&
33 python '$__tool_directory__/convert.py' input/test/main.tabular input/test/main.csv &&
25 #set $data_names = ['main'] 34 #set $data_names = ['main']
26 #end if 35 #end if
27 #if str($training_type.model) == 'cm_train': 36 #if str($training_type.model) == 'cm_train':
28 #if str($layer_main) == 'input': 37 #if str($layer_main) == 'input':
29 #set $input_layers = $data_names 38 #set $input_layers = $data_names
36 #for $i, $element in enumerate($omics) 45 #for $i, $element in enumerate($omics)
37 #if str($element.train_omics) != 'None' and str($element.test_omics) != 'None': 46 #if str($element.train_omics) != 'None' and str($element.test_omics) != 'None':
38 #if str($element.assay) != '': 47 #if str($element.assay) != '':
39 #set $i = str($element.assay.replace(" ", "_")) 48 #set $i = str($element.assay.replace(" ", "_"))
40 #end if 49 #end if
41 ln -s '${element.train_omics}' input/train/omics_${i}.csv && 50 ln -s '${element.train_omics}' input/train/omics_${i}.tabular &&
42 ln -s '${element.test_omics}' input/test/omics_${i}.csv && 51 python '$__tool_directory__/convert.py' input/train/omics_${i}.tabular input/train/omics_${i}.csv &&
52
53 ln -s '${element.test_omics}' input/test/omics_${i}.tabular &&
54 python '$__tool_directory__/convert.py' input/test/omics_${i}.tabular input/test/omics_${i}.csv &&
55
43 $data_names.append("omics_" + str($i)) 56 $data_names.append("omics_" + str($i))
44 #if str($training_type.model) == 'cm_train': 57 #if str($training_type.model) == 'cm_train':
45 #if str($element.layer) == 'input': 58 #if str($element.layer) == 'input':
46 $input_layers.append("omics_" + str($i)) 59 $input_layers.append("omics_" + str($i))
47 #else 60 #else
48 $output_layers.append("omics_" + str($i)) 61 $output_layers.append("omics_" + str($i))
49 #end if 62 #end if
50 #end if 63 #end if
51 #end if 64 #end if
52 #end for 65 #end for
66 ## set target variables
67 #if str($training_type.model) == 's_train':
68 #if len($targets) > 0:
69 target_variables="" &&
70 #for $i, $element in enumerate($targets)
71 target_name=`$__tool_directory__/index_to_name.py input/train/clin.csv $element.target_variables` &&
72 #if $i == 0:
73 target_variables="\$target_name" &&
74 #else:
75 target_variables="\$target_variables,\$target_name" &&
76 #end if
77 #end for
78 echo "Target variables: \$target_variables" &&
79 #end if
80 ## set survival variables
81 #if str($surv_event_var) != 'None' and str($surv_time_var) != 'None':
82 surv_event_var=`$__tool_directory__/index_to_name.py input/train/clin.csv $surv_event_var` &&
83 echo "Survival event variable: \$surv_event_var" &&
84 surv_time_var=`$__tool_directory__/index_to_name.py input/train/clin.csv $surv_time_var` &&
85 echo "Survival time variable: \$surv_time_var" &&
86 #end if
87 ## set target value for plots
88 #if str($surv_event_var) != 'None' and str($surv_time_var) != 'None' and len($targets) > 0:
89 target_value="\$target_variables,\$surv_event_var" &&
90 echo "Target value for plots: \$target_value" &&
91 #else if str($surv_event_var) == 'None' and str($surv_time_var) == 'None' and len($targets) > 0:
92 target_value="\$target_variables" &&
93 echo "Target value for plots: \$target_value" &&
94 #else if str($surv_event_var) != 'None' and str($surv_time_var) != 'None' and len($targets) == 0:
95 target_value="\$surv_event_var" &&
96 echo "Target value for plots: \$target_value" &&
97 #end if
98 #end if
99
53 flexynesis 100 flexynesis
54 --data_path input 101 --data_path input
55 --outdir output 102 --outdir output
56 --model_class $model_class 103 --model_class $model_class
57 #if str($model_class) == 'GNN': 104 #if str($model_class) == 'GNN':
58 --gnn_conv_type $gnn_conv_type 105 --gnn_conv_type $gnn_conv_type
59 --string_organism $string_organism 106 --string_organism $string_organism
60 --string_node_name $string_node_name 107 --string_node_name $string_node_name
61 #end if 108 #end if
62 #if str($training_type.model) == 's_train': 109 #if str($training_type.model) == 's_train':
63 #if str($target_variables) != '': 110 #if len($targets) > 0:
64 --target_variables $target_variables 111 --target_variables \$target_variables
65 #end if 112 #end if
66 #if str($surv_event_var) != '': 113 #if str($surv_event_var) != 'None' and str($surv_time_var) != 'None':
67 --surv_event_var $surv_event_var 114 --surv_event_var \$surv_event_var
68 --surv_time_var $surv_time_var 115 --surv_time_var \$surv_time_var
69 #end if 116 #end if
70 #end if 117 #end if
71 #if str($training_type.model) == 'cm_train': 118 #if str($training_type.model) == 'cm_train':
72 --input_layers $str(",".join($input_layers)) 119 --input_layers $str(",".join($input_layers))
73 --output_layers $str(",".join($output_layers)) 120 --output_layers $str(",".join($output_layers))
88 $use_loss_weighting 135 $use_loss_weighting
89 $use_cv 136 $use_cv
90 $evaluate_baseline_performance 137 $evaluate_baseline_performance
91 --feature_importance_method $feature_importance_method 138 --feature_importance_method $feature_importance_method
92 \${GALAXY_FLEXYNESIS_EXTRA_ARGUMENTS} 139 \${GALAXY_FLEXYNESIS_EXTRA_ARGUMENTS}
140
141 ## convert flexynesis output files to tabular format
142 && for file in output/*.csv; do
143 if [ -f "\$file" ]; then
144 basename="\${file%.csv}";
145 python $__tool_directory__/convert.py "\$file" "\${basename}.tabular";
146 echo "Converted \$file to \${basename}.tabular";
147 fi
148 done
93 #if str($plot.plot_embeddings_conditional.plot_embeddings) == 'yes': 149 #if str($plot.plot_embeddings_conditional.plot_embeddings) == 'yes':
94 && python $__tool_directory__/flexynesis_plot.py 150 && python $__tool_directory__/flexynesis_plot.py
95 --plot_type dimred 151 --plot_type dimred
96 --embeddings output/job.embeddings_test.csv 152 --embeddings output/job.embeddings_test.tabular
97 --labels output/job.predicted_labels.csv 153 --labels output/job.predicted_labels.tabular
98 --method $plot.plot_embeddings_conditional.method 154 --method $plot.plot_embeddings_conditional.method
99 --target_variables $target_variables 155 #if str($training_type.model) == 's_train':
156 #if len($targets) > 0 or (str($surv_event_var) != 'None' and str($surv_time_var) != 'None'):
157 --target_value \$target_value
158 #end if
159 #end if
100 --output_dir plots 160 --output_dir plots
101 --format $plot.plot_embeddings_conditional.format 161 --format $plot.plot_embeddings_conditional.format
102 --dpi $plot.plot_embeddings_conditional.dpi 162 --dpi $plot.plot_embeddings_conditional.dpi
103 #end if 163 #end if
104 #if str($plot.plot_km_conditional.plot_km) == 'yes': 164 #if str($plot.plot_km_conditional.plot_km) == 'yes':
106 && echo "Survival event and time variables must be specified for Kaplan-Meier plots." 166 && echo "Survival event and time variables must be specified for Kaplan-Meier plots."
107 && exit 1 167 && exit 1
108 #else 168 #else
109 && python $__tool_directory__/flexynesis_plot.py 169 && python $__tool_directory__/flexynesis_plot.py
110 --plot_type kaplan_meier 170 --plot_type kaplan_meier
111 --labels output/job.predicted_labels.csv 171 --labels output/job.predicted_labels.tabular
112 --survival_data input/test/clin.csv 172 --survival_data input/test/clin.csv
113 --surv_event_var $surv_event_var 173 --surv_event_var \$surv_event_var
114 --surv_time_var $surv_time_var 174 --surv_time_var \$surv_time_var
115 --event_value $plot.plot_km_conditional.event_value 175 --event_value $plot.plot_km_conditional.event_value
116 --output_dir plots 176 --output_dir plots
117 --format $plot.plot_km_conditional.format 177 --format $plot.plot_km_conditional.format
118 --dpi $plot.plot_km_conditional.dpi 178 --dpi $plot.plot_km_conditional.dpi
119 #end if 179 #end if
134 --omics_test input/test/${name}.csv 194 --omics_test input/test/${name}.csv
135 #else 195 #else
136 --omics_train input/train/main.csv 196 --omics_train input/train/main.csv
137 --omics_test input/test/main.csv 197 --omics_test input/test/main.csv
138 #end if 198 #end if
139 --surv_time_var $surv_time_var 199 --surv_time_var \$surv_time_var
140 --surv_event_var $surv_event_var 200 --surv_event_var \$surv_event_var
141 #if str($plot.plot_cox_conditional.clinical_variables) != '': 201 #if str($plot.plot_cox_conditional.clinical_variables) != '':
142 --clinical_variables $str($plot.plot_cox_conditional.clinical_variables) 202 --clinical_variables $str($plot.plot_cox_conditional.clinical_variables)
143 #end if 203 #end if
144 --top_features $plot.plot_cox_conditional.top_features 204 --top_features $plot.plot_cox_conditional.top_features
145 --event_value $plot.plot_cox_conditional.event_value 205 --event_value $plot.plot_cox_conditional.event_value
156 #end if 216 #end if
157 #end if 217 #end if
158 #if str($plot.plot_scatter_conditional.plot_scatter) == 'yes': 218 #if str($plot.plot_scatter_conditional.plot_scatter) == 'yes':
159 && python $__tool_directory__/flexynesis_plot.py 219 && python $__tool_directory__/flexynesis_plot.py
160 --plot_type scatter 220 --plot_type scatter
161 --labels output/job.predicted_labels.csv 221 --labels output/job.predicted_labels.tabular
162 #if str($surv_event_var) != '': 222 #if str($training_type.model) == 's_train':
163 --target_value $target_variables,$surv_event_var 223 #if len($targets) > 0 or (str($surv_event_var) != 'None' and str($surv_time_var) != 'None'):
164 #else 224 --target_value \$target_value
165 --target_value $target_variables 225 #end if
166 #end if 226 #end if
167 --output_dir plots 227 --output_dir plots
168 --format $plot.plot_scatter_conditional.format 228 --format $plot.plot_scatter_conditional.format
169 --dpi $plot.plot_scatter_conditional.dpi 229 --dpi $plot.plot_scatter_conditional.dpi
170 #end if 230 #end if
171 #if str($plot.plot_concordance_conditional.plot_concordance) == 'yes': 231 #if str($plot.plot_concordance_conditional.plot_concordance) == 'yes':
172 && python $__tool_directory__/flexynesis_plot.py 232 && python $__tool_directory__/flexynesis_plot.py
173 --plot_type concordance_heatmap 233 --plot_type concordance_heatmap
174 --labels output/job.predicted_labels.csv 234 --labels output/job.predicted_labels.tabular
175 #if str($surv_event_var) != '': 235 #if str($training_type.model) == 's_train':
176 --target_value $target_variables,$surv_event_var 236 #if len($targets) > 0 or (str($surv_event_var) != 'None' and str($surv_time_var) != 'None'):
177 #else 237 --target_value \$target_value
178 --target_value $target_variables 238 #end if
179 #end if 239 #end if
180 --output_dir plots 240 --output_dir plots
181 --format $plot.plot_concordance_conditional.format 241 --format $plot.plot_concordance_conditional.format
182 --dpi $plot.plot_concordance_conditional.dpi 242 --dpi $plot.plot_concordance_conditional.dpi
183 #end if 243 #end if
184 #if str($plot.plot_pr_curves_conditional.plot_pr_curves) == 'yes': 244 #if str($plot.plot_pr_curves_conditional.plot_pr_curves) == 'yes':
185 && python $__tool_directory__/flexynesis_plot.py 245 && python $__tool_directory__/flexynesis_plot.py
186 --plot_type pr_curve 246 --plot_type pr_curve
187 --labels output/job.predicted_labels.csv 247 --labels output/job.predicted_labels.tabular
188 #if str($surv_event_var) != '': 248 #if str($training_type.model) == 's_train':
189 --target_value $target_variables,$surv_event_var 249 #if len($targets) > 0 or (str($surv_event_var) != 'None' and str($surv_time_var) != 'None'):
190 #else 250 --target_value \$target_value
191 --target_value $target_variables 251 #end if
192 #end if 252 #end if
193 --output_dir plots 253 --output_dir plots
194 --format $plot.plot_pr_curves_conditional.format 254 --format $plot.plot_pr_curves_conditional.format
195 --dpi $plot.plot_pr_curves_conditional.dpi 255 --dpi $plot.plot_pr_curves_conditional.dpi
196 #end if 256 #end if
197 #if str($plot.plot_roc_curves_conditional.plot_roc_curves) == 'yes': 257 #if str($plot.plot_roc_curves_conditional.plot_roc_curves) == 'yes':
198 && python $__tool_directory__/flexynesis_plot.py 258 && python $__tool_directory__/flexynesis_plot.py
199 --plot_type roc_curve 259 --plot_type roc_curve
200 --labels output/job.predicted_labels.csv 260 --labels output/job.predicted_labels.tabular
201 #if str($surv_event_var) != '': 261 #if str($training_type.model) == 's_train':
202 --target_value $target_variables,$surv_event_var 262 #if len($targets) > 0 or (str($surv_event_var) != 'None' and str($surv_time_var) != 'None'):
203 #else 263 --target_value \$target_value
204 --target_value $target_variables 264 #end if
205 #end if 265 #end if
206 --output_dir plots 266 --output_dir plots
207 --format $plot.plot_roc_curves_conditional.format 267 --format $plot.plot_roc_curves_conditional.format
208 --dpi $plot.plot_roc_curves_conditional.dpi 268 --dpi $plot.plot_roc_curves_conditional.dpi
209 #end if 269 #end if
210 #if str($plot.plot_boxplot_conditional.plot_boxplot) == 'yes': 270 #if str($plot.plot_boxplot_conditional.plot_boxplot) == 'yes':
211 && python $__tool_directory__/flexynesis_plot.py 271 && python $__tool_directory__/flexynesis_plot.py
212 --plot_type box_plot 272 --plot_type box_plot
213 --labels output/job.predicted_labels.csv 273 --labels output/job.predicted_labels.tabular
214 #if str($surv_event_var) != '': 274 #if str($training_type.model) == 's_train':
215 --target_value $target_variables,$surv_event_var 275 #if len($targets) > 0 or (str($surv_event_var) != 'None' and str($surv_time_var) != 'None'):
216 #else 276 --target_value \$target_value
217 --target_value $target_variables 277 #end if
218 #end if 278 #end if
219 --output_dir plots 279 --output_dir plots
220 --format $plot.plot_boxplot_conditional.format 280 --format $plot.plot_boxplot_conditional.format
221 --dpi $plot.plot_boxplot_conditional.dpi 281 --dpi $plot.plot_boxplot_conditional.dpi
222 #end if 282 #end if
266 <when value="RandomForest"/> 326 <when value="RandomForest"/>
267 <when value="SVM"/> 327 <when value="SVM"/>
268 <when value="RandomSurvivalForest"/> 328 <when value="RandomSurvivalForest"/>
269 <when value="XGBoost"/> 329 <when value="XGBoost"/>
270 </conditional> 330 </conditional>
271 <param argument="--target_variables" type="text" label="Target variables" help="Which variables in 'clin.csv' to use for predictions, comma-separated if multiple."> 331 <repeat name="targets" min="0" title="Target variables">
272 <expand macro="sanitizer_printable"/> 332 <param argument="--target_variables" type="data_column" data_ref="train_clin" optional="false" label="Column name in the train clinical data to use for predictions, multiple targets are allowed"/>
273 </param> 333 </repeat>
274 <param argument="--surv_event_var" type="text" label="Survival event" help="Which column in 'clin.csv' to use as event/status indicator for survival modeling."> 334 <param argument="--surv_event_var" type="data_column" data_ref="train_clin" optional="true" label="Column name in the train clinical data to use as survival event"/>
275 <expand macro="sanitizer_printable"/> 335 <param argument="--surv_time_var" type="data_column" data_ref="train_clin" optional="true" label="Column name in the train clinical data to use as survival time"/>
276 </param>
277 <param argument="--surv_time_var" type="text" label="Survival time" help="Which column in 'clin.csv' to use as time/duration indicator for survival modeling.">
278 <expand macro="sanitizer_printable"/>
279 </param>
280 <expand macro="advanced"/> 336 <expand macro="advanced"/>
281 </when> 337 </when>
282 <when value="us_train"> 338 <when value="us_train">
283 <expand macro="main_inputs"/> 339 <expand macro="main_inputs"/>
284 <repeat name="omics" min="0" title="Multiple omics layers?"> 340 <repeat name="omics" min="0" title="Multiple omics layers?">
426 </conditional> 482 </conditional>
427 </section> 483 </section>
428 </inputs> 484 </inputs>
429 <outputs> 485 <outputs>
430 <collection name="results" type="list" label="${tool.name} on ${on_string}: results"> 486 <collection name="results" type="list" label="${tool.name} on ${on_string}: results">
431 <discover_datasets pattern="(?P&lt;name&gt;.+)\.csv$" format="csv" directory="output"/> 487 <discover_datasets pattern="(?P&lt;name&gt;.+)\.tabular$" format="tabular" directory="output"/>
432 </collection> 488 </collection>
433 <collection name="plots" type="list" label="${tool.name} on ${on_string}: embeddings plots"> 489 <collection name="plots" type="list" label="${tool.name} on ${on_string}: embeddings plots">
434 <discover_datasets pattern="__name_and_ext__" directory="plots"/> 490 <discover_datasets pattern="__name_and_ext__" directory="plots"/>
435 <filter>plot['plot_embeddings_conditional']['plot_embeddings'] == 'yes' or plot['plot_km_conditional']['plot_km'] == 'yes' or plot['plot_cox_conditional']['plot_cox'] == 'yes' or plot['plot_scatter_conditional']['plot_scatter'] == 'yes' or plot['plot_concordance_conditional']['plot_concordance'] == 'yes' or plot['plot_pr_curves_conditional']['plot_pr_curves'] == 'yes' or plot['plot_roc_curves_conditional']['plot_roc_curves'] == 'yes' or plot['plot_boxplot_conditional']['plot_boxplot'] == 'yes'</filter> 491 <filter>plot['plot_embeddings_conditional']['plot_embeddings'] == 'yes' or plot['plot_km_conditional']['plot_km'] == 'yes' or plot['plot_cox_conditional']['plot_cox'] == 'yes' or plot['plot_scatter_conditional']['plot_scatter'] == 'yes' or plot['plot_concordance_conditional']['plot_concordance'] == 'yes' or plot['plot_pr_curves_conditional']['plot_pr_curves'] == 'yes' or plot['plot_roc_curves_conditional']['plot_roc_curves'] == 'yes' or plot['plot_boxplot_conditional']['plot_boxplot'] == 'yes'</filter>
436 </collection> 492 </collection>
439 <!-- test 1: Supervised training with GEX and CNV data, DirectPred model, Erlotinib target --> 495 <!-- test 1: Supervised training with GEX and CNV data, DirectPred model, Erlotinib target -->
440 <test expect_num_outputs="1"> 496 <test expect_num_outputs="1">
441 <param name="non_commercial_use" value="True"/> 497 <param name="non_commercial_use" value="True"/>
442 <conditional name="training_type"> 498 <conditional name="training_type">
443 <param name="model" value="s_train"/> 499 <param name="model" value="s_train"/>
444 <param name="train_clin" value="train/clin" ftype="csv"/> 500 <param name="train_clin" value="train/clin" ftype="tabular"/>
445 <param name="test_clin" value="test/clin" ftype="csv"/> 501 <param name="test_clin" value="test/clin" ftype="tabular"/>
446 <param name="train_omics_main" value="train/gex" ftype="csv"/> 502 <param name="train_omics_main" value="train/gex" ftype="tabular"/>
447 <param name="test_omics_main" value="test/gex" ftype="csv"/> 503 <param name="test_omics_main" value="test/gex" ftype="tabular"/>
448 <param name="assay_main" value="bar"/> 504 <param name="assay_main" value="bar"/>
449 <repeat name="omics"> 505 <repeat name="omics">
450 <param name="train_omics" value="train/cnv" ftype="csv"/> 506 <param name="train_omics" value="train/cnv" ftype="tabular"/>
451 <param name="test_omics" value="test/cnv" ftype="csv"/> 507 <param name="test_omics" value="test/cnv" ftype="tabular"/>
452 <param name="assay" value="foo"/> 508 <param name="assay" value="foo"/>
453 </repeat> 509 </repeat>
454 <conditional name="model_class"> 510 <conditional name="model_class">
455 <param name="model_class" value="DirectPred"/> 511 <param name="model_class" value="DirectPred"/>
456 </conditional> 512 </conditional>
457 <param name="target_variables" value="Erlotinib"/> 513 <repeat name="targets">
514 <param name="target_variables" value="3"/>
515 </repeat>
458 <section name="advanced"> 516 <section name="advanced">
459 <param name="hpo_iter" value="1"/> 517 <param name="hpo_iter" value="1"/>
460 </section> 518 </section>
461 </conditional> 519 </conditional>
462 <output_collection name="results" type="list"> 520 <output_collection name="results" type="list">
470 <has_n_lines n="50"/> 528 <has_n_lines n="50"/>
471 </assert_contents> 529 </assert_contents>
472 </element> 530 </element>
473 <element name="job.feature_importance.GradientShap"> 531 <element name="job.feature_importance.GradientShap">
474 <assert_contents> 532 <assert_contents>
475 <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> 533 <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
476 <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> 534 <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
477 <has_text_matching expression="GradientShap"/> 535 <has_text_matching expression="GradientShap"/>
478 </assert_contents> 536 </assert_contents>
479 </element> 537 </element>
480 <element name="job.feature_importance.IntegratedGradients"> 538 <element name="job.feature_importance.IntegratedGradients">
481 <assert_contents> 539 <assert_contents>
482 <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> 540 <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
483 <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> 541 <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
484 <has_text_matching expression="IntegratedGradients"/> 542 <has_text_matching expression="IntegratedGradients"/>
485 </assert_contents> 543 </assert_contents>
486 </element> 544 </element>
487 <element name="job.feature_logs.bar"> 545 <element name="job.feature_logs.bar">
488 <assert_contents> 546 <assert_contents>
494 <has_n_lines n="25"/> 552 <has_n_lines n="25"/>
495 </assert_contents> 553 </assert_contents>
496 </element> 554 </element>
497 <element name="job.predicted_labels"> 555 <element name="job.predicted_labels">
498 <assert_contents> 556 <assert_contents>
499 <has_text_matching expression="source_dataset:A-704,Erlotinib,"/> 557 <has_text_matching expression="source_dataset:A-704\tErlotinib\t"/>
500 <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/> 558 <has_text_matching expression="target_dataset:KMRC-20\tErlotinib\t"/>
501 </assert_contents> 559 </assert_contents>
502 </element> 560 </element>
503 <element name="job.stats"> 561 <element name="job.stats">
504 <assert_contents> 562 <assert_contents>
505 <has_text_matching expression="DirectPred,Erlotinib,numerical,mse,"/> 563 <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tmse\t"/>
506 <has_text_matching expression="DirectPred,Erlotinib,numerical,r2,"/> 564 <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tr2\t"/>
507 <has_text_matching expression="DirectPred,Erlotinib,numerical,pearson_corr,"/> 565 <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tpearson_corr\t"/>
508 </assert_contents> 566 </assert_contents>
509 </element> 567 </element>
510 </output_collection> 568 </output_collection>
511 </test> 569 </test>
512 <!-- test 2: Supervised training with GEX data, DirectPred model, Erlotinib target --> 570 <!-- test 2: Supervised training with GEX data, DirectPred model, Erlotinib target -->
513 <test expect_num_outputs="1"> 571 <test expect_num_outputs="1">
514 <param name="non_commercial_use" value="True"/> 572 <param name="non_commercial_use" value="True"/>
515 <conditional name="training_type"> 573 <conditional name="training_type">
516 <param name="model" value="s_train"/> 574 <param name="model" value="s_train"/>
517 <param name="train_clin" value="train/clin" ftype="csv"/> 575 <param name="train_clin" value="train/clin" ftype="tabular"/>
518 <param name="test_clin" value="test/clin" ftype="csv"/> 576 <param name="test_clin" value="test/clin" ftype="tabular"/>
519 <param name="train_omics_main" value="train/gex" ftype="csv"/> 577 <param name="train_omics_main" value="train/gex" ftype="tabular"/>
520 <param name="test_omics_main" value="test/gex" ftype="csv"/> 578 <param name="test_omics_main" value="test/gex" ftype="tabular"/>
521 <param name="assay_main" value="bar"/> 579 <param name="assay_main" value="bar"/>
522 <conditional name="model_class"> 580 <conditional name="model_class">
523 <param name="model_class" value="DirectPred"/> 581 <param name="model_class" value="DirectPred"/>
524 </conditional> 582 </conditional>
525 <param name="target_variables" value="Erlotinib"/> 583 <repeat name="targets">
584 <param name="target_variables" value="3"/>
585 </repeat>
526 <section name="advanced"> 586 <section name="advanced">
527 <param name="hpo_iter" value="1"/> 587 <param name="hpo_iter" value="1"/>
528 </section> 588 </section>
529 </conditional> 589 </conditional>
530 <output_collection name="results" type="list"> 590 <output_collection name="results" type="list">
538 <has_n_lines n="50"/> 598 <has_n_lines n="50"/>
539 </assert_contents> 599 </assert_contents>
540 </element> 600 </element>
541 <element name="job.feature_importance.GradientShap"> 601 <element name="job.feature_importance.GradientShap">
542 <assert_contents> 602 <assert_contents>
543 <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> 603 <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
544 <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> 604 <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
545 <has_text_matching expression="GradientShap"/> 605 <has_text_matching expression="GradientShap"/>
546 </assert_contents> 606 </assert_contents>
547 </element> 607 </element>
548 <element name="job.feature_importance.IntegratedGradients"> 608 <element name="job.feature_importance.IntegratedGradients">
549 <assert_contents> 609 <assert_contents>
550 <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> 610 <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
551 <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> 611 <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
552 <has_text_matching expression="IntegratedGradients"/> 612 <has_text_matching expression="IntegratedGradients"/>
553 </assert_contents> 613 </assert_contents>
554 </element> 614 </element>
555 <element name="job.feature_logs.bar"> 615 <element name="job.feature_logs.bar">
556 <assert_contents> 616 <assert_contents>
557 <has_n_lines n="25"/> 617 <has_n_lines n="25"/>
558 </assert_contents> 618 </assert_contents>
559 </element> 619 </element>
560 <element name="job.predicted_labels"> 620 <element name="job.predicted_labels">
561 <assert_contents> 621 <assert_contents>
562 <has_text_matching expression="source_dataset:A-704,Erlotinib,"/> 622 <has_text_matching expression="source_dataset:A-704\tErlotinib\t"/>
563 <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/> 623 <has_text_matching expression="target_dataset:KMRC-20\tErlotinib\t"/>
564 </assert_contents> 624 </assert_contents>
565 </element> 625 </element>
566 <element name="job.stats"> 626 <element name="job.stats">
567 <assert_contents> 627 <assert_contents>
568 <has_text_matching expression="DirectPred,Erlotinib,numerical,mse,"/> 628 <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tmse\t"/>
569 <has_text_matching expression="DirectPred,Erlotinib,numerical,r2,"/> 629 <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tr2\t"/>
570 <has_text_matching expression="DirectPred,Erlotinib,numerical,pearson_corr,"/> 630 <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tpearson_corr\t"/>
571 </assert_contents> 631 </assert_contents>
572 </element> 632 </element>
573 </output_collection> 633 </output_collection>
574 </test> 634 </test>
575 <!-- test 3: Supervised training with GEX and CNV data, DirectPred model, Irinotecan target --> 635 <!-- test 3: Supervised training with GEX and CNV data, DirectPred model, Irinotecan target -->
576 <test expect_num_outputs="1"> 636 <test expect_num_outputs="1">
577 <param name="non_commercial_use" value="True"/> 637 <param name="non_commercial_use" value="True"/>
578 <conditional name="training_type"> 638 <conditional name="training_type">
579 <param name="model" value="s_train"/> 639 <param name="model" value="s_train"/>
580 <param name="train_clin" value="train/clin" ftype="csv"/> 640 <param name="train_clin" value="train/clin" ftype="tabular"/>
581 <param name="test_clin" value="test/clin" ftype="csv"/> 641 <param name="test_clin" value="test/clin" ftype="tabular"/>
582 <param name="train_omics_main" value="train/gex" ftype="csv"/> 642 <param name="train_omics_main" value="train/gex" ftype="tabular"/>
583 <param name="test_omics_main" value="test/gex" ftype="csv"/> 643 <param name="test_omics_main" value="test/gex" ftype="tabular"/>
584 <param name="assay_main" value="bar"/> 644 <param name="assay_main" value="bar"/>
585 <repeat name="omics"> 645 <repeat name="omics">
586 <param name="train_omics" value="train/cnv" ftype="csv"/> 646 <param name="train_omics" value="train/cnv" ftype="tabular"/>
587 <param name="test_omics" value="test/cnv" ftype="csv"/> 647 <param name="test_omics" value="test/cnv" ftype="tabular"/>
588 <param name="assay" value="foo"/> 648 <param name="assay" value="foo"/>
589 </repeat> 649 </repeat>
590 <conditional name="model_class"> 650 <conditional name="model_class">
591 <param name="model_class" value="DirectPred"/> 651 <param name="model_class" value="DirectPred"/>
592 </conditional> 652 </conditional>
593 <param name="target_variables" value="Irinotecan"/> 653 <repeat name="targets">
654 <param name="target_variables" value="4"/>
655 </repeat>
594 <section name="advanced"> 656 <section name="advanced">
595 <param name="hpo_iter" value="1"/> 657 <param name="hpo_iter" value="1"/>
596 </section> 658 </section>
597 </conditional> 659 </conditional>
598 <output_collection name="results" type="list"> 660 <output_collection name="results" type="list">
606 <has_n_lines n="50"/> 668 <has_n_lines n="50"/>
607 </assert_contents> 669 </assert_contents>
608 </element> 670 </element>
609 <element name="job.feature_importance.GradientShap"> 671 <element name="job.feature_importance.GradientShap">
610 <assert_contents> 672 <assert_contents>
611 <has_text_matching expression="Irinotecan,0,,bar,A2M,"/> 673 <has_text_matching expression="Irinotecan\t0\t\tbar\tA2M\t"/>
612 <has_text_matching expression="Irinotecan,0,,bar,ABCC4,"/> 674 <has_text_matching expression="Irinotecan\t0\t\tbar\tABCC4\t"/>
613 <has_text_matching expression="GradientShap"/> 675 <has_text_matching expression="GradientShap"/>
614 </assert_contents> 676 </assert_contents>
615 </element> 677 </element>
616 <element name="job.feature_importance.IntegratedGradients"> 678 <element name="job.feature_importance.IntegratedGradients">
617 <assert_contents> 679 <assert_contents>
618 <has_text_matching expression="Irinotecan,0,,bar,A2M,"/> 680 <has_text_matching expression="Irinotecan\t0\t\tbar\tA2M\t"/>
619 <has_text_matching expression="Irinotecan,0,,bar,ABCC4,"/> 681 <has_text_matching expression="Irinotecan\t0\t\tbar\tABCC4\t"/>
620 <has_text_matching expression="IntegratedGradients"/> 682 <has_text_matching expression="IntegratedGradients"/>
621 </assert_contents> 683 </assert_contents>
622 </element> 684 </element>
623 <element name="job.feature_logs.bar"> 685 <element name="job.feature_logs.bar">
624 <assert_contents> 686 <assert_contents>
630 <has_n_lines n="25"/> 692 <has_n_lines n="25"/>
631 </assert_contents> 693 </assert_contents>
632 </element> 694 </element>
633 <element name="job.predicted_labels"> 695 <element name="job.predicted_labels">
634 <assert_contents> 696 <assert_contents>
635 <has_text_matching expression="source_dataset:A-704,Irinotecan,"/> 697 <has_text_matching expression="source_dataset:A-704\tIrinotecan\t"/>
636 <has_text_matching expression="target_dataset:KMRC-20,Irinotecan,"/> 698 <has_text_matching expression="target_dataset:KMRC-20\tIrinotecan\t"/>
637 </assert_contents> 699 </assert_contents>
638 </element> 700 </element>
639 <element name="job.stats"> 701 <element name="job.stats">
640 <assert_contents> 702 <assert_contents>
641 <has_text_matching expression="DirectPred,Irinotecan,numerical,mse,"/> 703 <has_text_matching expression="DirectPred\tIrinotecan\tnumerical\tmse\t"/>
642 <has_text_matching expression="DirectPred,Irinotecan,numerical,r2,"/> 704 <has_text_matching expression="DirectPred\tIrinotecan\tnumerical\tr2\t"/>
643 <has_text_matching expression="DirectPred,Irinotecan,numerical,pearson_corr,"/> 705 <has_text_matching expression="DirectPred\tIrinotecan\tnumerical\tpearson_corr\t"/>
644 </assert_contents> 706 </assert_contents>
645 </element> 707 </element>
646 </output_collection> 708 </output_collection>
647 </test> 709 </test>
648 <!-- test 4: Un-supervised training with GEX and CNV data, supervised_vae model --> 710 <!-- test 4: Un-supervised training with GEX and CNV data, supervised_vae model -->
649 <test expect_num_outputs="1"> 711 <test expect_num_outputs="1">
650 <param name="non_commercial_use" value="True"/> 712 <param name="non_commercial_use" value="True"/>
651 <conditional name="training_type"> 713 <conditional name="training_type">
652 <param name="model" value="us_train"/> 714 <param name="model" value="us_train"/>
653 <param name="train_clin" value="train/clin" ftype="csv"/> 715 <param name="train_clin" value="train/clin" ftype="tabular"/>
654 <param name="test_clin" value="test/clin" ftype="csv"/> 716 <param name="test_clin" value="test/clin" ftype="tabular"/>
655 <param name="train_omics_main" value="train/gex" ftype="csv"/> 717 <param name="train_omics_main" value="train/gex" ftype="tabular"/>
656 <param name="test_omics_main" value="test/gex" ftype="csv"/> 718 <param name="test_omics_main" value="test/gex" ftype="tabular"/>
657 <param name="assay_main" value="bar"/> 719 <param name="assay_main" value="bar"/>
658 <repeat name="omics"> 720 <repeat name="omics">
659 <param name="train_omics" value="train/cnv" ftype="csv"/> 721 <param name="train_omics" value="train/cnv" ftype="tabular"/>
660 <param name="test_omics" value="test/cnv" ftype="csv"/> 722 <param name="test_omics" value="test/cnv" ftype="tabular"/>
661 <param name="assay" value="foo"/> 723 <param name="assay" value="foo"/>
662 </repeat> 724 </repeat>
663 <param name="model_class" value="supervised_vae"/> 725 <param name="model_class" value="supervised_vae"/>
664 <section name="advanced"> 726 <section name="advanced">
665 <param name="hpo_iter" value="1"/> 727 <param name="hpo_iter" value="1"/>
691 <!-- test 5: Cross-modality training with GEX and CNV data, CrossModalPred model --> 753 <!-- test 5: Cross-modality training with GEX and CNV data, CrossModalPred model -->
692 <test expect_num_outputs="1"> 754 <test expect_num_outputs="1">
693 <param name="non_commercial_use" value="True"/> 755 <param name="non_commercial_use" value="True"/>
694 <conditional name="training_type"> 756 <conditional name="training_type">
695 <param name="model" value="cm_train"/> 757 <param name="model" value="cm_train"/>
696 <param name="train_clin" value="train/clin" ftype="csv"/> 758 <param name="train_clin" value="train/clin" ftype="tabular"/>
697 <param name="test_clin" value="test/clin" ftype="csv"/> 759 <param name="test_clin" value="test/clin" ftype="tabular"/>
698 <param name="train_omics_main" value="train/gex" ftype="csv"/> 760 <param name="train_omics_main" value="train/gex" ftype="tabular"/>
699 <param name="test_omics_main" value="test/gex" ftype="csv"/> 761 <param name="test_omics_main" value="test/gex" ftype="tabular"/>
700 <param name="assay_main" value="bar"/> 762 <param name="assay_main" value="bar"/>
701 <param name="layer_main" value="input"/> 763 <param name="layer_main" value="input"/>
702 <repeat name="omics"> 764 <repeat name="omics">
703 <param name="train_omics" value="train/cnv" ftype="csv"/> 765 <param name="train_omics" value="train/cnv" ftype="tabular"/>
704 <param name="test_omics" value="test/cnv" ftype="csv"/> 766 <param name="test_omics" value="test/cnv" ftype="tabular"/>
705 <param name="assay" value="foo"/> 767 <param name="assay" value="foo"/>
706 <param name="layer" value="output"/> 768 <param name="layer" value="output"/>
707 </repeat> 769 </repeat>
708 <param name="model_class" value="CrossModalPred"/> 770 <param name="model_class" value="CrossModalPred"/>
709 <section name="advanced"> 771 <section name="advanced">
746 <!-- test 6: Supervised training with GEX and CNV data, GNN model, Erlotinib target --> 808 <!-- test 6: Supervised training with GEX and CNV data, GNN model, Erlotinib target -->
747 <test expect_num_outputs="1"> 809 <test expect_num_outputs="1">
748 <param name="non_commercial_use" value="True"/> 810 <param name="non_commercial_use" value="True"/>
749 <conditional name="training_type"> 811 <conditional name="training_type">
750 <param name="model" value="s_train"/> 812 <param name="model" value="s_train"/>
751 <param name="train_clin" value="train/clin" ftype="csv"/> 813 <param name="train_clin" value="train/clin" ftype="tabular"/>
752 <param name="test_clin" value="test/clin" ftype="csv"/> 814 <param name="test_clin" value="test/clin" ftype="tabular"/>
753 <param name="train_omics_main" value="train/gex" ftype="csv"/> 815 <param name="train_omics_main" value="train/gex" ftype="tabular"/>
754 <param name="test_omics_main" value="test/gex" ftype="csv"/> 816 <param name="test_omics_main" value="test/gex" ftype="tabular"/>
755 <param name="assay_main" value="bar"/> 817 <param name="assay_main" value="bar"/>
756 <repeat name="omics"> 818 <repeat name="omics">
757 <param name="train_omics" value="train/cnv" ftype="csv"/> 819 <param name="train_omics" value="train/cnv" ftype="tabular"/>
758 <param name="test_omics" value="test/cnv" ftype="csv"/> 820 <param name="test_omics" value="test/cnv" ftype="tabular"/>
759 <param name="assay" value="foo"/> 821 <param name="assay" value="foo"/>
760 </repeat> 822 </repeat>
761 <conditional name="model_class"> 823 <conditional name="model_class">
762 <param name="model_class" value="GNN"/> 824 <param name="model_class" value="GNN"/>
763 <param name="gnn_conv_type" value="GC"/> 825 <param name="gnn_conv_type" value="GC"/>
764 <param name="string_organism" value="9606"/> 826 <param name="string_organism" value="9606"/>
765 <param name="string_node_name" value="gene_name"/> 827 <param name="string_node_name" value="gene_name"/>
766 </conditional> 828 </conditional>
767 <param name="target_variables" value="Erlotinib"/> 829 <repeat name="targets">
830 <param name="target_variables" value="3"/>
831 </repeat>
768 <section name="advanced"> 832 <section name="advanced">
769 <param name="hpo_iter" value="1"/> 833 <param name="hpo_iter" value="1"/>
770 </section> 834 </section>
771 </conditional> 835 </conditional>
772 <output_collection name="results" type="list"> 836 <output_collection name="results" type="list">
780 <has_n_lines n="50"/> 844 <has_n_lines n="50"/>
781 </assert_contents> 845 </assert_contents>
782 </element> 846 </element>
783 <element name="job.feature_importance.GradientShap"> 847 <element name="job.feature_importance.GradientShap">
784 <assert_contents> 848 <assert_contents>
785 <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> 849 <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
786 <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> 850 <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
787 <has_text_matching expression="GradientShap"/> 851 <has_text_matching expression="GradientShap"/>
788 </assert_contents> 852 </assert_contents>
789 </element> 853 </element>
790 <element name="job.feature_importance.IntegratedGradients"> 854 <element name="job.feature_importance.IntegratedGradients">
791 <assert_contents> 855 <assert_contents>
792 <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> 856 <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
793 <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> 857 <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
794 <has_text_matching expression="IntegratedGradients"/> 858 <has_text_matching expression="IntegratedGradients"/>
795 </assert_contents> 859 </assert_contents>
796 </element> 860 </element>
797 <element name="job.feature_logs.bar"> 861 <element name="job.feature_logs.bar">
798 <assert_contents> 862 <assert_contents>
804 <has_n_lines n="25"/> 868 <has_n_lines n="25"/>
805 </assert_contents> 869 </assert_contents>
806 </element> 870 </element>
807 <element name="job.predicted_labels"> 871 <element name="job.predicted_labels">
808 <assert_contents> 872 <assert_contents>
809 <has_text_matching expression="source_dataset:A-704,Erlotinib,"/> 873 <has_text_matching expression="source_dataset:A-704\tErlotinib\t"/>
810 <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/> 874 <has_text_matching expression="target_dataset:KMRC-20\tErlotinib\t"/>
811 </assert_contents> 875 </assert_contents>
812 </element> 876 </element>
813 <element name="job.stats"> 877 <element name="job.stats">
814 <assert_contents> 878 <assert_contents>
815 <has_text_matching expression="GNN,Erlotinib,numerical,mse,"/> 879 <has_text_matching expression="GNN\tErlotinib\tnumerical\tmse\t"/>
816 <has_text_matching expression="GNN,Erlotinib,numerical,r2,"/> 880 <has_text_matching expression="GNN\tErlotinib\tnumerical\tr2\t"/>
817 <has_text_matching expression="GNN,Erlotinib,numerical,pearson_corr,"/> 881 <has_text_matching expression="GNN\tErlotinib\tnumerical\tpearson_corr\t"/>
818 </assert_contents> 882 </assert_contents>
819 </element> 883 </element>
820 </output_collection> 884 </output_collection>
821 </test> 885 </test>
822 <!-- test 7: test 4 + checking whitespace in assay name --> 886 <!-- test 7: test 4 + checking whitespace in assay name -->
823 <test expect_num_outputs="1"> 887 <test expect_num_outputs="1">
824 <param name="non_commercial_use" value="True"/> 888 <param name="non_commercial_use" value="True"/>
825 <conditional name="training_type"> 889 <conditional name="training_type">
826 <param name="model" value="us_train"/> 890 <param name="model" value="us_train"/>
827 <param name="train_clin" value="train/clin" ftype="csv"/> 891 <param name="train_clin" value="train/clin" ftype="tabular"/>
828 <param name="test_clin" value="test/clin" ftype="csv"/> 892 <param name="test_clin" value="test/clin" ftype="tabular"/>
829 <param name="train_omics_main" value="train/gex" ftype="csv"/> 893 <param name="train_omics_main" value="train/gex" ftype="tabular"/>
830 <param name="test_omics_main" value="test/gex" ftype="csv"/> 894 <param name="test_omics_main" value="test/gex" ftype="tabular"/>
831 <param name="assay_main" value="b ar"/> 895 <param name="assay_main" value="b ar"/>
832 <repeat name="omics"> 896 <repeat name="omics">
833 <param name="train_omics" value="train/cnv" ftype="csv"/> 897 <param name="train_omics" value="train/cnv" ftype="tabular"/>
834 <param name="test_omics" value="test/cnv" ftype="csv"/> 898 <param name="test_omics" value="test/cnv" ftype="tabular"/>
835 <param name="assay" value="f oo"/> 899 <param name="assay" value="f oo"/>
836 </repeat> 900 </repeat>
837 <param name="model_class" value="supervised_vae"/> 901 <param name="model_class" value="supervised_vae"/>
838 <section name="advanced"> 902 <section name="advanced">
839 <param name="hpo_iter" value="1"/> 903 <param name="hpo_iter" value="1"/>
865 <!-- test 8: Supervised training with GEX and CNV data, XGBoost model, Erlotinib target --> 929 <!-- test 8: Supervised training with GEX and CNV data, XGBoost model, Erlotinib target -->
866 <test expect_num_outputs="1"> 930 <test expect_num_outputs="1">
867 <param name="non_commercial_use" value="True"/> 931 <param name="non_commercial_use" value="True"/>
868 <conditional name="training_type"> 932 <conditional name="training_type">
869 <param name="model" value="s_train"/> 933 <param name="model" value="s_train"/>
870 <param name="train_clin" value="train/clin" ftype="csv"/> 934 <param name="train_clin" value="train/clin" ftype="tabular"/>
871 <param name="test_clin" value="test/clin" ftype="csv"/> 935 <param name="test_clin" value="test/clin" ftype="tabular"/>
872 <param name="train_omics_main" value="train/gex" ftype="csv"/> 936 <param name="train_omics_main" value="train/gex" ftype="tabular"/>
873 <param name="test_omics_main" value="test/gex" ftype="csv"/> 937 <param name="test_omics_main" value="test/gex" ftype="tabular"/>
874 <param name="assay_main" value="bar"/> 938 <param name="assay_main" value="bar"/>
875 <repeat name="omics"> 939 <repeat name="omics">
876 <param name="train_omics" value="train/cnv" ftype="csv"/> 940 <param name="train_omics" value="train/cnv" ftype="tabular"/>
877 <param name="test_omics" value="test/cnv" ftype="csv"/> 941 <param name="test_omics" value="test/cnv" ftype="tabular"/>
878 <param name="assay" value="foo"/> 942 <param name="assay" value="foo"/>
879 </repeat> 943 </repeat>
880 <conditional name="model_class"> 944 <conditional name="model_class">
881 <param name="model_class" value="XGBoost"/> 945 <param name="model_class" value="XGBoost"/>
882 </conditional> 946 </conditional>
883 <param name="target_variables" value="Erlotinib"/> 947 <repeat name="targets">
948 <param name="target_variables" value="3"/>
949 </repeat>
884 <section name="advanced"> 950 <section name="advanced">
885 <param name="hpo_iter" value="1"/> 951 <param name="hpo_iter" value="1"/>
886 </section> 952 </section>
887 </conditional> 953 </conditional>
888 <output_collection name="results" type="list"> 954 <output_collection name="results" type="list">
889 <element name="job.stats"> 955 <element name="job.stats">
890 <assert_contents> 956 <assert_contents>
891 <has_text_matching expression="XGBoostRegressor,Erlotinib,numerical,mse,"/> 957 <has_text_matching expression="XGBoostRegressor\tErlotinib\tnumerical\tmse\t"/>
892 <has_text_matching expression="XGBoostRegressor,Erlotinib,numerical,r2,"/> 958 <has_text_matching expression="XGBoostRegressor\tErlotinib\tnumerical\tr2\t"/>
893 <has_text_matching expression="XGBoostRegressor,Erlotinib,numerical,pearson_corr,"/> 959 <has_text_matching expression="XGBoostRegressor\tErlotinib\tnumerical\tpearson_corr\t"/>
894 </assert_contents> 960 </assert_contents>
895 </element> 961 </element>
896 </output_collection> 962 </output_collection>
897 </test> 963 </test>
898 <!-- test 9: test 1 + checking IntegratedGradients method --> 964 <!-- test 9: test 1 + checking IntegratedGradients method -->
899 <test expect_num_outputs="1"> 965 <test expect_num_outputs="1">
900 <param name="non_commercial_use" value="True"/> 966 <param name="non_commercial_use" value="True"/>
901 <conditional name="training_type"> 967 <conditional name="training_type">
902 <param name="model" value="s_train"/> 968 <param name="model" value="s_train"/>
903 <param name="train_clin" value="train/clin" ftype="csv"/> 969 <param name="train_clin" value="train/clin" ftype="tabular"/>
904 <param name="test_clin" value="test/clin" ftype="csv"/> 970 <param name="test_clin" value="test/clin" ftype="tabular"/>
905 <param name="train_omics_main" value="train/gex" ftype="csv"/> 971 <param name="train_omics_main" value="train/gex" ftype="tabular"/>
906 <param name="test_omics_main" value="test/gex" ftype="csv"/> 972 <param name="test_omics_main" value="test/gex" ftype="tabular"/>
907 <param name="assay_main" value="bar"/> 973 <param name="assay_main" value="bar"/>
908 <repeat name="omics"> 974 <repeat name="omics">
909 <param name="train_omics" value="train/cnv" ftype="csv"/> 975 <param name="train_omics" value="train/cnv" ftype="tabular"/>
910 <param name="test_omics" value="test/cnv" ftype="csv"/> 976 <param name="test_omics" value="test/cnv" ftype="tabular"/>
911 <param name="assay" value="foo"/> 977 <param name="assay" value="foo"/>
912 </repeat> 978 </repeat>
913 <conditional name="model_class"> 979 <conditional name="model_class">
914 <param name="model_class" value="DirectPred"/> 980 <param name="model_class" value="DirectPred"/>
915 </conditional> 981 </conditional>
916 <param name="target_variables" value="Erlotinib"/> 982 <repeat name="targets">
983 <param name="target_variables" value="3"/>
984 </repeat>
917 <section name="advanced"> 985 <section name="advanced">
918 <param name="hpo_iter" value="1"/> 986 <param name="hpo_iter" value="1"/>
919 <param name="feature_importance_method" value="IntegratedGradients"/> 987 <param name="feature_importance_method" value="IntegratedGradients"/>
920 <param name="val_size" value="0.2"/> 988 <param name="val_size" value="0.2"/>
921 </section> 989 </section>
931 <has_n_lines n="50"/> 999 <has_n_lines n="50"/>
932 </assert_contents> 1000 </assert_contents>
933 </element> 1001 </element>
934 <element name="job.feature_importance.IntegratedGradients"> 1002 <element name="job.feature_importance.IntegratedGradients">
935 <assert_contents> 1003 <assert_contents>
936 <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> 1004 <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
937 <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> 1005 <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
938 </assert_contents> 1006 </assert_contents>
939 </element> 1007 </element>
940 <element name="job.feature_logs.bar"> 1008 <element name="job.feature_logs.bar">
941 <assert_contents> 1009 <assert_contents>
942 <has_n_lines n="25"/> 1010 <has_n_lines n="25"/>
947 <has_n_lines n="25"/> 1015 <has_n_lines n="25"/>
948 </assert_contents> 1016 </assert_contents>
949 </element> 1017 </element>
950 <element name="job.predicted_labels"> 1018 <element name="job.predicted_labels">
951 <assert_contents> 1019 <assert_contents>
952 <has_text_matching expression="source_dataset:A-704,Erlotinib,"/> 1020 <has_text_matching expression="source_dataset:A-704\tErlotinib\t"/>
953 <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/> 1021 <has_text_matching expression="target_dataset:KMRC-20\tErlotinib\t"/>
954 </assert_contents> 1022 </assert_contents>
955 </element> 1023 </element>
956 <element name="job.stats"> 1024 <element name="job.stats">
957 <assert_contents> 1025 <assert_contents>
958 <has_text_matching expression="DirectPred,Erlotinib,numerical,mse,"/> 1026 <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tmse\t"/>
959 <has_text_matching expression="DirectPred,Erlotinib,numerical,r2,"/> 1027 <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tr2\t"/>
960 <has_text_matching expression="DirectPred,Erlotinib,numerical,pearson_corr,"/> 1028 <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tpearson_corr\t"/>
961 </assert_contents> 1029 </assert_contents>
962 </element> 1030 </element>
963 </output_collection> 1031 </output_collection>
964 </test> 1032 </test>
965 <!-- test 10: dimred plot --> 1033 <!-- test 10: dimred plot -->
966 <test expect_num_outputs="2"> 1034 <test expect_num_outputs="2">
967 <param name="non_commercial_use" value="True"/> 1035 <param name="non_commercial_use" value="True"/>
968 <conditional name="training_type"> 1036 <conditional name="training_type">
969 <param name="model" value="s_train"/> 1037 <param name="model" value="s_train"/>
970 <param name="train_clin" value="train/clin" ftype="csv"/> 1038 <param name="train_clin" value="train/clin" ftype="tabular"/>
971 <param name="test_clin" value="test/clin" ftype="csv"/> 1039 <param name="test_clin" value="test/clin" ftype="tabular"/>
972 <param name="train_omics_main" value="train/gex" ftype="csv"/> 1040 <param name="train_omics_main" value="train/gex" ftype="tabular"/>
973 <param name="test_omics_main" value="test/gex" ftype="csv"/> 1041 <param name="test_omics_main" value="test/gex" ftype="tabular"/>
974 <param name="assay_main" value="bar"/> 1042 <param name="assay_main" value="bar"/>
975 <repeat name="omics"> 1043 <repeat name="omics">
976 <param name="train_omics" value="train/cnv" ftype="csv"/> 1044 <param name="train_omics" value="train/cnv" ftype="tabular"/>
977 <param name="test_omics" value="test/cnv" ftype="csv"/> 1045 <param name="test_omics" value="test/cnv" ftype="tabular"/>
978 <param name="assay" value="foo"/> 1046 <param name="assay" value="foo"/>
979 </repeat> 1047 </repeat>
980 <conditional name="model_class"> 1048 <conditional name="model_class">
981 <param name="model_class" value="DirectPred"/> 1049 <param name="model_class" value="DirectPred"/>
982 </conditional> 1050 </conditional>
983 <param name="target_variables" value="Erlotinib"/> 1051 <repeat name="targets">
1052 <param name="target_variables" value="3"/>
1053 </repeat>
984 <section name="advanced"> 1054 <section name="advanced">
985 <param name="hpo_iter" value="1"/> 1055 <param name="hpo_iter" value="1"/>
986 </section> 1056 </section>
987 </conditional> 1057 </conditional>
988 <section name="plot"> 1058 <section name="plot">
1004 <has_n_lines n="50"/> 1074 <has_n_lines n="50"/>
1005 </assert_contents> 1075 </assert_contents>
1006 </element> 1076 </element>
1007 <element name="job.feature_importance.GradientShap"> 1077 <element name="job.feature_importance.GradientShap">
1008 <assert_contents> 1078 <assert_contents>
1009 <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> 1079 <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
1010 <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> 1080 <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
1011 <has_text_matching expression="GradientShap"/> 1081 <has_text_matching expression="GradientShap"/>
1012 </assert_contents> 1082 </assert_contents>
1013 </element> 1083 </element>
1014 <element name="job.feature_importance.IntegratedGradients"> 1084 <element name="job.feature_importance.IntegratedGradients">
1015 <assert_contents> 1085 <assert_contents>
1016 <has_text_matching expression="Erlotinib,0,,bar,A2M,"/> 1086 <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
1017 <has_text_matching expression="Erlotinib,0,,bar,ABCC4,"/> 1087 <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
1018 <has_text_matching expression="IntegratedGradients"/> 1088 <has_text_matching expression="IntegratedGradients"/>
1019 </assert_contents> 1089 </assert_contents>
1020 </element> 1090 </element>
1021 <element name="job.feature_logs.bar"> 1091 <element name="job.feature_logs.bar">
1022 <assert_contents> 1092 <assert_contents>
1028 <has_n_lines n="25"/> 1098 <has_n_lines n="25"/>
1029 </assert_contents> 1099 </assert_contents>
1030 </element> 1100 </element>
1031 <element name="job.predicted_labels"> 1101 <element name="job.predicted_labels">
1032 <assert_contents> 1102 <assert_contents>
1033 <has_text_matching expression="source_dataset:A-704,Erlotinib,"/> 1103 <has_text_matching expression="source_dataset:A-704\tErlotinib\t"/>
1034 <has_text_matching expression="target_dataset:KMRC-20,Erlotinib,"/> 1104 <has_text_matching expression="target_dataset:KMRC-20\tErlotinib\t"/>
1035 </assert_contents> 1105 </assert_contents>
1036 </element> 1106 </element>
1037 <element name="job.stats"> 1107 <element name="job.stats">
1038 <assert_contents> 1108 <assert_contents>
1039 <has_text_matching expression="DirectPred,Erlotinib,numerical,mse,"/> 1109 <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tmse\t"/>
1040 <has_text_matching expression="DirectPred,Erlotinib,numerical,r2,"/> 1110 <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tr2\t"/>
1041 <has_text_matching expression="DirectPred,Erlotinib,numerical,pearson_corr,"/> 1111 <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tpearson_corr\t"/>
1042 </assert_contents> 1112 </assert_contents>
1043 </element> 1113 </element>
1044 </output_collection> 1114 </output_collection>
1045 <output_collection name="plots" type="list"> 1115 <output_collection name="plots" type="list">
1046 <element name="job.embeddings_test_pca_Erlotinib_known"> 1116 <element name="job.embeddings_test_pca_Erlotinib_known">
1263 <has_image_width width="1941" delta="10"/> 1333 <has_image_width width="1941" delta="10"/>
1264 </assert_contents> 1334 </assert_contents>
1265 </element> 1335 </element>
1266 </output_collection> 1336 </output_collection>
1267 </test> 1337 </test>
1338 <!-- test 19: Supervised training with GEX and CNV data, DirectPred model, Erlotinib and Crizotinib targets -->
1339 <test expect_num_outputs="1">
1340 <param name="non_commercial_use" value="True"/>
1341 <conditional name="training_type">
1342 <param name="model" value="s_train"/>
1343 <param name="train_clin" value="train/clin" ftype="tabular"/>
1344 <param name="test_clin" value="test/clin" ftype="tabular"/>
1345 <param name="train_omics_main" value="train/gex" ftype="tabular"/>
1346 <param name="test_omics_main" value="test/gex" ftype="tabular"/>
1347 <param name="assay_main" value="bar"/>
1348 <repeat name="omics">
1349 <param name="train_omics" value="train/cnv" ftype="tabular"/>
1350 <param name="test_omics" value="test/cnv" ftype="tabular"/>
1351 <param name="assay" value="foo"/>
1352 </repeat>
1353 <conditional name="model_class">
1354 <param name="model_class" value="DirectPred"/>
1355 </conditional>
1356 <repeat name="targets">
1357 <param name="target_variables" value="3"/>
1358 </repeat>
1359 <repeat name="targets">
1360 <param name="target_variables" value="2"/>
1361 </repeat>
1362 <section name="advanced">
1363 <param name="hpo_iter" value="1"/>
1364 </section>
1365 </conditional>
1366 <assert_stdout>
1367 <has_text_matching expression="Target variables: Erlotinib,Crizotinib"/>
1368 </assert_stdout>
1369 <output_collection name="results" type="list">
1370 <element name="job.embeddings_test">
1371 <assert_contents>
1372 <has_n_lines n="50"/>
1373 </assert_contents>
1374 </element>
1375 <element name="job.embeddings_train">
1376 <assert_contents>
1377 <has_n_lines n="50"/>
1378 </assert_contents>
1379 </element>
1380 <element name="job.feature_importance.GradientShap">
1381 <assert_contents>
1382 <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
1383 <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
1384 <has_text_matching expression="Crizotinib\t0\t\tbar\tA2M\t"/>
1385 <has_text_matching expression="Crizotinib\t0\t\tbar\tABCC4\t"/>
1386 <has_text_matching expression="GradientShap"/>
1387 </assert_contents>
1388 </element>
1389 <element name="job.feature_importance.IntegratedGradients">
1390 <assert_contents>
1391 <has_text_matching expression="Erlotinib\t0\t\tbar\tA2M\t"/>
1392 <has_text_matching expression="Erlotinib\t0\t\tbar\tABCC4\t"/>
1393 <has_text_matching expression="Crizotinib\t0\t\tbar\tA2M\t"/>
1394 <has_text_matching expression="Crizotinib\t0\t\tbar\tABCC4\t"/>
1395 <has_text_matching expression="IntegratedGradients"/>
1396 </assert_contents>
1397 </element>
1398 <element name="job.feature_logs.bar">
1399 <assert_contents>
1400 <has_n_lines n="25"/>
1401 </assert_contents>
1402 </element>
1403 <element name="job.feature_logs.omics_foo">
1404 <assert_contents>
1405 <has_n_lines n="25"/>
1406 </assert_contents>
1407 </element>
1408 <element name="job.predicted_labels">
1409 <assert_contents>
1410 <has_text_matching expression="source_dataset:A-704\tErlotinib\t"/>
1411 <has_text_matching expression="source_dataset:A-704\tCrizotinib\t"/>
1412 <has_text_matching expression="target_dataset:KMRC-20\tErlotinib\t"/>
1413 <has_text_matching expression="target_dataset:KMRC-20\tCrizotinib\t"/>
1414 </assert_contents>
1415 </element>
1416 <element name="job.stats">
1417 <assert_contents>
1418 <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tmse\t"/>
1419 <has_text_matching expression="DirectPred\tCrizotinib\tnumerical\tmse\t"/>
1420 <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tr2\t"/>
1421 <has_text_matching expression="DirectPred\tCrizotinib\tnumerical\tr2\t"/>
1422 <has_text_matching expression="DirectPred\tErlotinib\tnumerical\tpearson_corr\t"/>
1423 <has_text_matching expression="DirectPred\tCrizotinib\tnumerical\tpearson_corr\t"/>
1424 </assert_contents>
1425 </element>
1426 </output_collection>
1427 </test>
1268 </tests> 1428 </tests>
1269 <help><![CDATA[ 1429 <help><![CDATA[
1270 @COMMON_HELP@ 1430 @COMMON_HELP@
1271 1431
1272 .. image:: https://raw.githubusercontent.com/BIMSBbioinfo/flexynesis/c4634d97f84e51f569dcfdab2caf42c9be453ef6/img/graphical_abstract.jpg 1432 .. image:: https://raw.githubusercontent.com/BIMSBbioinfo/flexynesis/c4634d97f84e51f569dcfdab2caf42c9be453ef6/img/graphical_abstract.jpg
1274 1434
1275 ----- 1435 -----
1276 1436
1277 **Input Files** 1437 **Input Files**
1278 1438
1279 **clin.csv** 1439 **clin.tabular**
1280 1440
1281 clin.csv contains the sample metadata. The first column contains unique sample identifiers. The other columns contain sample-associated clinical variables. NA values are allowed in the clinical variables. 1441 clin.tabular contains the sample metadata. The first column contains unique sample identifiers. The other columns contain sample-associated clinical variables. NA values are allowed in the clinical variables.
1282 1442
1283 The format might look like so: 1443 The format might look like so:
1284 1444
1285 ======== === === === 1445 ======== === === ===
1286 , v1 v2 ... 1446 , v1 v2 ...
1294 ... ... ... ... 1454 ... ... ... ...
1295 ======== === === === 1455 ======== === === ===
1296 1456
1297 . 1457 .
1298 1458
1299 **omics.csv** 1459 **omics.tabular**
1300 1460
1301 The first column of the feature tables must be unique feature identifiers (e.g. gene names). The column names must be sample identifiers that should overlap with those in the clin.csv. They don't have to be completely identical or in the same order. Samples from the clin.csv that are not represented in the omics table will be dropped. 1461 The first column of the feature tables must be unique feature identifiers (e.g. gene names). The column names must be sample identifiers that should overlap with those in the clin.tabular. They don't have to be completely identical or in the same order. Samples from the clin.tabular that are not represented in the omics table will be dropped.
1302 1462
1303 The format might look like so: 1463 The format might look like so:
1304 1464
1305 ===== ======= ======= ======= ======= 1465 ===== ======= ======= ======= =======
1306 , sample1 sample2 sample3 ... 1466 , sample1 sample2 sample3 ...
1318 1478
1319 .. class:: infomark 1479 .. class:: infomark
1320 1480
1321 **Concordance between train/test splits:** 1481 **Concordance between train/test splits:**
1322 1482
1323 The corresponding omics files in train/test splits must contain overlapping feature names (they don't have to be identical or in the same order). The clin.csv files in train/test must contain matching clinical variables. 1483 The corresponding omics files in train/test splits must contain overlapping feature names (they don't have to be identical or in the same order). The clin.tabular files in train/test must contain matching clinical variables.
1324 1484
1325 ----- 1485 -----
1326 1486
1327 **Supervised Training** 1487 **Supervised Training**
1328 1488
1329 **Minimum requirements** 1489 **Minimum requirements**
1330 1490
1331 * clin.csv and omics.csv files for training and testing 1491 * clin.tabular and omics.tabular files for training and testing
1332 * Selection of a tool/model 1492 * Selection of a tool/model
1333 * One target variable which can be numerical or categorical for regression/classification tasks. 1493 * One target variable which can be numerical or categorical for regression/classification tasks.
1334 1494
1335 Flexynesis supports both single-task and multi-task training. We can provide one or more target variables and optionally survival variables as input and Flexynesis will build the appropriate model architecture. If the selected variable is numerical, a Multi-Layered-Perceptron (MLP) with MSE loss will be used. If a categorical variable is provided, an MLP with cross-entropy-loss will be utilized. If survival variables are provided, an MLP with Cox-Proportional-Hazards loss will be attached to the model. 1495 Flexynesis supports both single-task and multi-task training. We can provide one or more target variables and optionally survival variables as input and Flexynesis will build the appropriate model architecture. If the selected variable is numerical, a Multi-Layered-Perceptron (MLP) with MSE loss will be used. If a categorical variable is provided, an MLP with cross-entropy-loss will be utilized. If survival variables are provided, an MLP with Cox-Proportional-Hazards loss will be attached to the model.
1336 1496