Mercurial > repos > bgruening > flexynesis_utils
diff flexynesis_utils.xml @ 2:e5ecfffcfe45 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/flexynesis commit 1afbaf45449e25238935e222f983da62392c067a
author | bgruening |
---|---|
date | Fri, 04 Jul 2025 14:57:15 +0000 |
parents | 433a5f3f68a1 |
children | f413f828ef30 |
line wrap: on
line diff
--- a/flexynesis_utils.xml Mon Jun 30 17:18:53 2025 +0000 +++ b/flexynesis_utils.xml Fri Jul 04 14:57:15 2025 +0000 @@ -20,7 +20,7 @@ python '$flexynesis_utils_config' #end if #if $utils_conditional.util == "split_data": - ln -s '$utils_conditional.clin' inputs/clin.csv && + ln -s '$utils_conditional.clin' inputs/clin.tabular && #set $omics_names = [] #for $omics_file in $utils_conditional.omics: ln -s '$omics_file' 'inputs/${omics_file.element_identifier}.${omics_file.ext}' && @@ -29,7 +29,7 @@ python '$__tool_directory__/flexynesis_utils.py' --util split - --clin inputs/clin.csv + --clin inputs/clin.tabular --omics '$(",".join($omics_names))' --split $utils_conditional.split --out output @@ -77,8 +77,8 @@ cluster_df = pd.DataFrame(data=cluster_labels, index=X.index, columns=['louvain_cluster']) label_data = label_data.merge(cluster_df[['louvain_cluster']], left_index=True, right_index=True, how='left') -output_path = f"output/clustered_labels.csv" -label_data.to_csv(output_path, index=True) +output_path = f"output/clustered_labels.tabular" +label_data.to_csv(output_path, sep="\t", index=True) #else if $utils_conditional.util == "get_optimal_clusters": label_data = load_omics('inputs/$utils_conditional.labels.element_identifier.$utils_conditional.labels.ext') @@ -95,8 +95,8 @@ cluster_df = pd.DataFrame(data=kmeans_cluster_labels, index=X.index, columns=['optimal_kmeans_cluster']) label_data = label_data.merge(cluster_df[['optimal_kmeans_cluster']], left_index=True, right_index=True, how='left') -output_path = f"output/optimal_clusters_labels.csv" -label_data.to_csv(output_path, index=True) +output_path = f"output/optimal_clusters_labels.tabular" +label_data.to_csv(output_path, sep="\t", index=True) #else if $utils_conditional.util == "k_means_clustering": label_data = load_omics('inputs/$utils_conditional.labels.element_identifier.$utils_conditional.labels.ext') @@ -110,8 +110,8 @@ cluster_df = pd.DataFrame(data=cluster_labels, index=X.index, columns=['kmeans_cluster']) label_data = label_data.merge(cluster_df[['kmeans_cluster']], left_index=True, right_index=True, how='left') -output_path = f"output/kmeans_labels.csv" -label_data.to_csv(output_path, index=True) +output_path = f"output/kmeans_labels.tabular" +label_data.to_csv(output_path, sep="\t", index=True) #else if $utils_conditional.util == "compute_ami_ari": label_data = load_omics('inputs/$utils_conditional.labels.element_identifier.$utils_conditional.labels.ext') @@ -147,19 +147,19 @@ <option value="binarize">Binarize mutation data</option> </param> <when value="louvain_clustering"> - <param argument="--X" type="data" format="tabular,csv" label="Matrix" help="Input matrix, (samples, features)"/> + <param argument="--X" type="data" format="tabular" label="Matrix" help="Input matrix, (samples, features)"/> <expand macro="plots_common_input"/> <param argument="--threshold" type="float" min="0" optional="true" label="Distance threshold to create an edge between two nodes"/> <param argument="--k" type="integer" min="0" optional="true" label="Number of nearest neighbors to connect for each node"/> </when> <when value="get_optimal_clusters"> - <param argument="--X" type="data" format="tabular,csv" label="Matrix" help="Input matrix, (samples, features)"/> + <param argument="--X" type="data" format="tabular" label="Matrix" help="Input matrix, (samples, features)"/> <expand macro="plots_common_input"/> <param argument="--min_k" type="integer" min="0" value="2" optional="false" label="Minimum number of clusters to try"/> <param argument="--max_k" type="integer" min="0" value="10" optional="false" label="Maximum number of clusters to try"/> </when> <when value="k_means_clustering"> - <param argument="--X" type="data" format="tabular,csv" label="Matrix" help="Input matrix, (samples, features)"/> + <param argument="--X" type="data" format="tabular" label="Matrix" help="Input matrix, (samples, features)"/> <expand macro="plots_common_input"/> <param argument="--k" type="integer" min="0" optional="true" label="The number of clusters to form"/> </when> @@ -169,12 +169,12 @@ <param name="predicted_label" type="data_column" data_ref="labels" label="Column name in the labels file to use for the predicted labels"/> </when> <when value="split_data"> - <param argument="--clin" type="data" format="csv" optional="false" label="Clinical data" help="Samples in rows"/> - <param argument="--omics" type="data" format="tabular,csv" optional="false" multiple="true" label="Omics data" help="samples in columns"/> + <param argument="--clin" type="data" format="tabular" optional="false" label="Clinical data" help="Samples in rows"/> + <param argument="--omics" type="data" format="tabular" optional="false" multiple="true" label="Omics data" help="samples in columns"/> <param argument="--split" type="float" min="0" max="1" value="0.7" label="Training/Test split ratio" help="Proportion of data to use for training (e.g., 0.7 means 70% train, 30% test)"/> </when> <when value="binarize"> - <param argument="--mutation" type="data" format="tabular,csv" label="Mutation data" help="Mutation data with both genes and samples in rows"/> + <param argument="--mutation" type="data" format="tabular" label="Mutation data" help="Mutation data with both genes and samples in rows"/> <param argument="--gene_idx" type="data_column" data_ref="mutation" label="Column in the mutation file with genes"/> <param argument="--sample_idx" type="data_column" data_ref="mutation" label="Column in the mutation file with samples"/> </when> @@ -185,11 +185,11 @@ <filter>utils_conditional['util'] != "split_data"</filter> </data> <collection name="train_out" type="list" label="${tool.name} on ${on_string}: train datasets"> - <discover_datasets pattern="__name_and_ext__" format="csv" directory="output/train"/> + <discover_datasets pattern="__name_and_ext__" format="tabular" directory="output/train"/> <filter>utils_conditional['util'] == "split_data"</filter> </collection> <collection name="test_out" type="list" label="${tool.name} on ${on_string}: test datasets"> - <discover_datasets pattern="__name_and_ext__" format="csv" directory="output/test"/> + <discover_datasets pattern="__name_and_ext__" format="tabular" directory="output/test"/> <filter>utils_conditional['util'] == "split_data"</filter> </collection> </outputs> @@ -199,14 +199,14 @@ <param name="non_commercial_use" value="True"/> <conditional name="utils_conditional"> <param name="util" value="louvain_clustering"/> - <param name="X" value="embeddings.csv"/> - <param name="labels" value="labels_pr.csv"/> + <param name="X" value="embeddings.tabular"/> + <param name="labels" value="labels_pr.tabular"/> <param name="k" value="15"/> </conditional> <output name="util_out"> <assert_contents> - <has_text text="sample_id,variable,class_label,probability,known_label,predicted_label,split,louvain_cluster"/> - <has_text text="MB-4818,CLAUDIN_SUBTYPE,LumA,0.8582904,LumB,LumA,test,3.0"/> + <has_text_matching expression="sample_id\tvariable\tclass_label\tprobability\tknown_label\tpredicted_label\tsplit\tlouvain_cluster"/> + <has_text_matching expression="MB-4818\tCLAUDIN_SUBTYPE\tLumA\t0.8582904\tLumB\tLumA\ttest\t3.0"/> </assert_contents> </output> </test> @@ -215,8 +215,8 @@ <param name="non_commercial_use" value="True"/> <conditional name="utils_conditional"> <param name="util" value="get_optimal_clusters"/> - <param name="X" value="embeddings.csv"/> - <param name="labels" value="labels_pr.csv"/> + <param name="X" value="embeddings.tabular"/> + <param name="labels" value="labels_pr.tabular"/> <param name="min_k" value="2"/> <param name="max_k" value="10"/> </conditional> @@ -226,8 +226,8 @@ </assert_stdout> <output name="util_out"> <assert_contents> - <has_text text="sample_id,variable,class_label,probability,known_label,predicted_label,split,optimal_kmeans_cluster"/> - <has_text text="MB-4818,CLAUDIN_SUBTYPE,LumA,0.8582904,LumB,LumA,test,0.0"/> + <has_text_matching expression="sample_id\tvariable\tclass_label\tprobability\tknown_label\tpredicted_label\tsplit\toptimal_kmeans_cluster"/> + <has_text_matching expression="MB-4818\tCLAUDIN_SUBTYPE\tLumA\t0.8582904\tLumB\tLumA\ttest\t0.0"/> </assert_contents> </output> </test> @@ -236,8 +236,8 @@ <param name="non_commercial_use" value="True"/> <conditional name="utils_conditional"> <param name="util" value="k_means_clustering"/> - <param name="X" value="embeddings.csv"/> - <param name="labels" value="labels_pr.csv"/> + <param name="X" value="embeddings.tabular"/> + <param name="labels" value="labels_pr.tabular"/> <param name="k" value="2"/> </conditional> <assert_stdout> @@ -245,8 +245,8 @@ </assert_stdout> <output name="util_out"> <assert_contents> - <has_text text="sample_id,variable,class_label,probability,known_label,predicted_label,split,kmeans_cluster"/> - <has_text text="MB-4818,CLAUDIN_SUBTYPE,LumA,0.8582904,LumB,LumA,test,0.0"/> + <has_text_matching expression="sample_id\tvariable\tclass_label\tprobability\tknown_label\tpredicted_label\tsplit\tkmeans_cluster"/> + <has_text_matching expression="MB-4818\tCLAUDIN_SUBTYPE\tLumA\t0.8582904\tLumB\tLumA\ttest\t0.0"/> </assert_contents> </output> </test> @@ -255,7 +255,7 @@ <param name="non_commercial_use" value="True"/> <conditional name="utils_conditional"> <param name="util" value="compute_ami_ari"/> - <param name="labels" value="labels.csv"/> + <param name="labels" value="labels.tabular"/> <param name="true_label" value="5"/> <param name="predicted_label" value="6"/> </conditional> @@ -333,8 +333,8 @@ <assert_contents> <has_n_lines n="1611"/> <has_text text="Hugo_Symbol"/> - <has_text text="AADACL2,0.0,0.0"/> - <has_text text="ABCB1,0.0,0.0,0.0,1.0"/> + <has_text_matching expression="AADACL2\t0.0\t0.0"/> + <has_text_matching expression="ABCB1\t0.0\t0.0\t0.0\t1.0"/> </assert_contents> </output> </test>