diff flexynesis_utils.xml @ 2:e5ecfffcfe45 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 1afbaf45449e25238935e222f983da62392c067a
author bgruening
date Fri, 04 Jul 2025 14:57:15 +0000
parents 433a5f3f68a1
children f413f828ef30
line wrap: on
line diff
--- a/flexynesis_utils.xml	Mon Jun 30 17:18:53 2025 +0000
+++ b/flexynesis_utils.xml	Fri Jul 04 14:57:15 2025 +0000
@@ -20,7 +20,7 @@
         python '$flexynesis_utils_config'
         #end if
         #if $utils_conditional.util == "split_data":
-            ln -s '$utils_conditional.clin' inputs/clin.csv &&
+            ln -s '$utils_conditional.clin' inputs/clin.tabular &&
             #set $omics_names = []
             #for $omics_file in $utils_conditional.omics:
                 ln -s '$omics_file' 'inputs/${omics_file.element_identifier}.${omics_file.ext}' &&
@@ -29,7 +29,7 @@
 
             python '$__tool_directory__/flexynesis_utils.py'
                 --util split
-                --clin inputs/clin.csv
+                --clin inputs/clin.tabular
                 --omics '$(",".join($omics_names))'
                 --split $utils_conditional.split
                 --out output
@@ -77,8 +77,8 @@
 cluster_df = pd.DataFrame(data=cluster_labels, index=X.index, columns=['louvain_cluster'])
 label_data = label_data.merge(cluster_df[['louvain_cluster']], left_index=True, right_index=True, how='left')
 
-output_path = f"output/clustered_labels.csv"
-label_data.to_csv(output_path, index=True)
+output_path = f"output/clustered_labels.tabular"
+label_data.to_csv(output_path, sep="\t", index=True)
 
 #else if $utils_conditional.util == "get_optimal_clusters":
 label_data = load_omics('inputs/$utils_conditional.labels.element_identifier.$utils_conditional.labels.ext')
@@ -95,8 +95,8 @@
 cluster_df = pd.DataFrame(data=kmeans_cluster_labels, index=X.index, columns=['optimal_kmeans_cluster'])
 label_data = label_data.merge(cluster_df[['optimal_kmeans_cluster']], left_index=True, right_index=True, how='left')
 
-output_path = f"output/optimal_clusters_labels.csv"
-label_data.to_csv(output_path, index=True)
+output_path = f"output/optimal_clusters_labels.tabular"
+label_data.to_csv(output_path, sep="\t", index=True)
 
 #else if $utils_conditional.util == "k_means_clustering":
 label_data = load_omics('inputs/$utils_conditional.labels.element_identifier.$utils_conditional.labels.ext')
@@ -110,8 +110,8 @@
 cluster_df = pd.DataFrame(data=cluster_labels, index=X.index, columns=['kmeans_cluster'])
 label_data = label_data.merge(cluster_df[['kmeans_cluster']], left_index=True, right_index=True, how='left')
 
-output_path = f"output/kmeans_labels.csv"
-label_data.to_csv(output_path, index=True)
+output_path = f"output/kmeans_labels.tabular"
+label_data.to_csv(output_path, sep="\t", index=True)
 
 #else if $utils_conditional.util == "compute_ami_ari":
 label_data = load_omics('inputs/$utils_conditional.labels.element_identifier.$utils_conditional.labels.ext')
@@ -147,19 +147,19 @@
                 <option value="binarize">Binarize mutation data</option>
             </param>
             <when value="louvain_clustering">
-                <param argument="--X" type="data" format="tabular,csv" label="Matrix" help="Input matrix, (samples, features)"/>
+                <param argument="--X" type="data" format="tabular" label="Matrix" help="Input matrix, (samples, features)"/>
                 <expand macro="plots_common_input"/>
                 <param argument="--threshold" type="float" min="0" optional="true" label="Distance threshold to create an edge between two nodes"/>
                 <param argument="--k" type="integer" min="0" optional="true" label="Number of nearest neighbors to connect for each node"/>
             </when>
             <when value="get_optimal_clusters">
-                <param argument="--X" type="data" format="tabular,csv" label="Matrix" help="Input matrix, (samples, features)"/>
+                <param argument="--X" type="data" format="tabular" label="Matrix" help="Input matrix, (samples, features)"/>
                 <expand macro="plots_common_input"/>
                 <param argument="--min_k" type="integer" min="0" value="2" optional="false" label="Minimum number of clusters to try"/>
                 <param argument="--max_k" type="integer" min="0" value="10" optional="false" label="Maximum number of clusters to try"/>
             </when>
             <when value="k_means_clustering">
-                <param argument="--X" type="data" format="tabular,csv" label="Matrix" help="Input matrix, (samples, features)"/>
+                <param argument="--X" type="data" format="tabular" label="Matrix" help="Input matrix, (samples, features)"/>
                 <expand macro="plots_common_input"/>
                 <param argument="--k" type="integer" min="0" optional="true" label="The number of clusters to form"/>
             </when>
@@ -169,12 +169,12 @@
                     <param name="predicted_label" type="data_column" data_ref="labels" label="Column name in the labels file to use for the predicted labels"/>
             </when>
             <when value="split_data">
-                <param argument="--clin" type="data" format="csv" optional="false" label="Clinical data" help="Samples in rows"/>
-                <param argument="--omics" type="data" format="tabular,csv" optional="false" multiple="true" label="Omics data" help="samples in columns"/>
+                <param argument="--clin" type="data" format="tabular" optional="false" label="Clinical data" help="Samples in rows"/>
+                <param argument="--omics" type="data" format="tabular" optional="false" multiple="true" label="Omics data" help="samples in columns"/>
                 <param argument="--split" type="float" min="0" max="1" value="0.7" label="Training/Test split ratio" help="Proportion of data to use for training (e.g., 0.7 means 70% train, 30% test)"/>
             </when>
             <when value="binarize">
-                <param argument="--mutation" type="data" format="tabular,csv" label="Mutation data" help="Mutation data with both genes and samples in rows"/>
+                <param argument="--mutation" type="data" format="tabular" label="Mutation data" help="Mutation data with both genes and samples in rows"/>
                 <param argument="--gene_idx" type="data_column" data_ref="mutation" label="Column in the mutation file with genes"/>
                 <param argument="--sample_idx" type="data_column" data_ref="mutation" label="Column in the mutation file with samples"/>
             </when>
@@ -185,11 +185,11 @@
             <filter>utils_conditional['util'] != "split_data"</filter>
         </data>
         <collection name="train_out" type="list" label="${tool.name} on ${on_string}: train datasets">
-            <discover_datasets pattern="__name_and_ext__" format="csv" directory="output/train"/>
+            <discover_datasets pattern="__name_and_ext__" format="tabular" directory="output/train"/>
             <filter>utils_conditional['util'] == "split_data"</filter>
         </collection>
         <collection name="test_out" type="list" label="${tool.name} on ${on_string}: test datasets">
-            <discover_datasets pattern="__name_and_ext__" format="csv" directory="output/test"/>
+            <discover_datasets pattern="__name_and_ext__" format="tabular" directory="output/test"/>
             <filter>utils_conditional['util'] == "split_data"</filter>
         </collection>
     </outputs>
@@ -199,14 +199,14 @@
             <param name="non_commercial_use" value="True"/>
             <conditional name="utils_conditional">
                 <param name="util" value="louvain_clustering"/>
-                <param name="X" value="embeddings.csv"/>
-                <param name="labels" value="labels_pr.csv"/>
+                <param name="X" value="embeddings.tabular"/>
+                <param name="labels" value="labels_pr.tabular"/>
                 <param name="k" value="15"/>
             </conditional>
             <output name="util_out">
                 <assert_contents>
-                    <has_text text="sample_id,variable,class_label,probability,known_label,predicted_label,split,louvain_cluster"/>
-                    <has_text text="MB-4818,CLAUDIN_SUBTYPE,LumA,0.8582904,LumB,LumA,test,3.0"/>
+                    <has_text_matching expression="sample_id\tvariable\tclass_label\tprobability\tknown_label\tpredicted_label\tsplit\tlouvain_cluster"/>
+                    <has_text_matching expression="MB-4818\tCLAUDIN_SUBTYPE\tLumA\t0.8582904\tLumB\tLumA\ttest\t3.0"/>
                 </assert_contents>
             </output>
         </test>
@@ -215,8 +215,8 @@
             <param name="non_commercial_use" value="True"/>
             <conditional name="utils_conditional">
                 <param name="util" value="get_optimal_clusters"/>
-                <param name="X" value="embeddings.csv"/>
-                <param name="labels" value="labels_pr.csv"/>
+                <param name="X" value="embeddings.tabular"/>
+                <param name="labels" value="labels_pr.tabular"/>
                 <param name="min_k" value="2"/>
                 <param name="max_k" value="10"/>
             </conditional>
@@ -226,8 +226,8 @@
             </assert_stdout>
             <output name="util_out">
                 <assert_contents>
-                    <has_text text="sample_id,variable,class_label,probability,known_label,predicted_label,split,optimal_kmeans_cluster"/>
-                    <has_text text="MB-4818,CLAUDIN_SUBTYPE,LumA,0.8582904,LumB,LumA,test,0.0"/>
+                    <has_text_matching expression="sample_id\tvariable\tclass_label\tprobability\tknown_label\tpredicted_label\tsplit\toptimal_kmeans_cluster"/>
+                    <has_text_matching expression="MB-4818\tCLAUDIN_SUBTYPE\tLumA\t0.8582904\tLumB\tLumA\ttest\t0.0"/>
                 </assert_contents>
             </output>
         </test>
@@ -236,8 +236,8 @@
             <param name="non_commercial_use" value="True"/>
             <conditional name="utils_conditional">
                 <param name="util" value="k_means_clustering"/>
-                <param name="X" value="embeddings.csv"/>
-                <param name="labels" value="labels_pr.csv"/>
+                <param name="X" value="embeddings.tabular"/>
+                <param name="labels" value="labels_pr.tabular"/>
                 <param name="k" value="2"/>
             </conditional>
             <assert_stdout>
@@ -245,8 +245,8 @@
             </assert_stdout>
             <output name="util_out">
                 <assert_contents>
-                    <has_text text="sample_id,variable,class_label,probability,known_label,predicted_label,split,kmeans_cluster"/>
-                    <has_text text="MB-4818,CLAUDIN_SUBTYPE,LumA,0.8582904,LumB,LumA,test,0.0"/>
+                    <has_text_matching expression="sample_id\tvariable\tclass_label\tprobability\tknown_label\tpredicted_label\tsplit\tkmeans_cluster"/>
+                    <has_text_matching expression="MB-4818\tCLAUDIN_SUBTYPE\tLumA\t0.8582904\tLumB\tLumA\ttest\t0.0"/>
                 </assert_contents>
             </output>
         </test>
@@ -255,7 +255,7 @@
             <param name="non_commercial_use" value="True"/>
             <conditional name="utils_conditional">
                 <param name="util" value="compute_ami_ari"/>
-                <param name="labels" value="labels.csv"/>
+                <param name="labels" value="labels.tabular"/>
                 <param name="true_label" value="5"/>
                 <param name="predicted_label" value="6"/>
             </conditional>
@@ -333,8 +333,8 @@
                 <assert_contents>
                     <has_n_lines n="1611"/>
                     <has_text text="Hugo_Symbol"/>
-                    <has_text text="AADACL2,0.0,0.0"/>
-                    <has_text text="ABCB1,0.0,0.0,0.0,1.0"/>
+                    <has_text_matching expression="AADACL2\t0.0\t0.0"/>
+                    <has_text_matching expression="ABCB1\t0.0\t0.0\t0.0\t1.0"/>
                 </assert_contents>
             </output>
         </test>