Mercurial > repos > iuc > chopin2
changeset 1:b3d46e41520e draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/chopin2 commit 8e7dd5c6726574b0a0157d675c8e70e7a2c0cce0
| author | iuc |
|---|---|
| date | Tue, 18 Jun 2024 07:43:34 +0000 |
| parents | 41c41cf6b3de |
| children | |
| files | chopin2.xml macros.xml test-data/selection.txt test-data/summary.txt test-data/summary_1.txt test-data/summary_2.txt test-data/summary_3.txt |
| diffstat | 7 files changed, 86 insertions(+), 54 deletions(-) [+] |
line wrap: on
line diff
--- a/chopin2.xml Tue Jan 31 16:31:07 2023 +0000 +++ b/chopin2.xml Tue Jun 18 07:43:34 2024 +0000 @@ -44,13 +44,13 @@ <inputs> <param name="dataset" type="data" format="csv,tabular" label="Select a dataset" - help="Input dataset with features on columns and observations on rows. The first column must contain the observation IDs, while the last column must contain classes. The header line is also required." /> + help="Input dataset with features on columns and observations on rows. The first column must contain the observation IDs, while the last column must contain classes. The header line is also required."/> <param name="dimensionality" type="integer" value="10000" min="100" label="Vectors dimensionality" help="Size of hypervectors is usually 10,000 in vector-symbolic architectures. However, lower values could work with small datasets in terms of number of features and observations. Please note that you may require - to increase this number in case of datasets with a huge number of features." /> + to increase this number in case of datasets with a huge number of features."/> <param name="levels" type="integer" value="1000" min="2" label="Levels" @@ -59,13 +59,13 @@ <param name="retrain" type="integer" value="0" min="0" label="Model retraining iterations" - help="Maximum number of retraining iterations. Class hypervectors are retrained to minimize errors caused by noise." /> + help="Maximum number of retraining iterations. Class hypervectors are retrained to minimize errors caused by noise."/> <param name="folds" type="integer" value="2" min="2" label="Number of folds for cross-validation" help="This tool makes use of k-folds cross-validation to evaluate the accuracy of the hyperdimensional model. Make sure to choose a good number of folds for validating the classification model. Please note that higher number - of folds could significantly increase the running time." /> + of folds could significantly increase the running time."/> <conditional name="feature_selection"> <param name="enable_fs" type="select" @@ -76,20 +76,20 @@ <option value="true">Enabled</option> </param> - <when value="false" /> + <when value="false"/> <when value="true"> <param name="group_min" type="integer" value="1" min="1" label="Minimum number of selected features" - help="Tool will stop removing features if its number will reach this value." /> + help="Tool will stop removing features if its number will reach this value."/> <param name="accuracy_threshold" type="float" value="60.0" min="0.0" max="100.0" label="Accuracy threshold" - help="Stop the execution if the best accuracy reached for a group of features is lower than this value." /> + help="Stop the execution if the best accuracy reached for a group of features is lower than this value."/> <param name="accuracy_uncertainty_perc" type="float" value="5.0" min="0.0" max="100.0" label="Accuracy uncertainty percentage" - help="Consider non optimal solutions if model accuracy is greater than the best accuracy minus this percentage." /> + help="Consider non optimal solutions if model accuracy is greater than the best accuracy minus this percentage."/> </when> </conditional> </inputs> @@ -97,81 +97,81 @@ <outputs> <data format="tabular" name="summary" label="${tool.name} on ${on_string}: Summary" from_work_dir="summary.txt"> <actions> - <action name="column_names" type="metadata" default="Run ID,Group Size,Retraining,Accuracy,Excluded Feature" /> - <action name="column_types" type="metadata" default="str,int,int,float,str" /> - <action name="comment_lines" type="metadata" default="7" /> + <action name="column_names" type="metadata" default="Run ID,Group Size,Retraining,Accuracy,Excluded Feature"/> + <action name="column_types" type="metadata" default="str,int,int,float,str"/> + <action name="comment_lines" type="metadata" default="7"/> </actions> </data> <data format="tabular" name="selection" label="${tool.name} on ${on_string}: Selection" from_work_dir="selection.txt"> <filter>feature_selection["enable_fs"]</filter> <actions> - <action name="column_names" type="metadata" default="Selected Features:" /> - <action name="column_types" type="metadata" default="str" /> - <action name="comment_lines" type="metadata" default="3" /> + <action name="column_names" type="metadata" default="Selected Features:"/> + <action name="column_types" type="metadata" default="str"/> + <action name="comment_lines" type="metadata" default="3"/> </actions> </data> </outputs> <tests> - <test> - <param name="dataset" value="iris.csv" /> - <param name="dimensionality" value="1000" /> - <param name="levels" value="100" /> - <param name="retrain" value="10" /> - <param name="folds" value="5" /> + <test expect_num_outputs="2"> + <param name="dataset" value="iris.csv"/> + <param name="dimensionality" value="1000"/> + <param name="levels" value="100"/> + <param name="retrain" value="10"/> + <param name="folds" value="5"/> - <output name="summary" ftype="tabular" value="summary.txt"> + <output name="summary" ftype="tabular" value="summary_1.txt"> <assert_contents> <has_text_matching expression="# Run ID\tGroup Size\tRetraining\tAccuracy"/> - <has_text text="8f0e142ff27db7f8d2cc66cfcc05e27c" /> + <has_text text="8f0e142ff27db7f8d2cc66cfcc05e27c"/> </assert_contents> </output> </test> - <test> - <param name="dataset" value="iris.tabular" /> - <param name="dimensionality" value="1000" /> - <param name="levels" value="100" /> - <param name="retrain" value="10" /> - <param name="folds" value="5" /> + <test expect_num_outputs="2"> + <param name="dataset" value="iris.tabular"/> + <param name="dimensionality" value="1000"/> + <param name="levels" value="100"/> + <param name="retrain" value="10"/> + <param name="folds" value="5"/> - <output name="summary" ftype="tabular" value="summary.txt"> + <output name="summary" ftype="tabular" value="summary_2.txt"> <assert_contents> <has_text_matching expression="# Run ID\tGroup Size\tRetraining\tAccuracy"/> - <has_text text="8f0e142ff27db7f8d2cc66cfcc05e27c" /> + <has_text text="8f0e142ff27db7f8d2cc66cfcc05e27c"/> </assert_contents> </output> </test> - <test> - <param name="dataset" value="iris.csv" /> - <param name="dimensionality" value="1000" /> - <param name="levels" value="100" /> - <param name="retrain" value="10" /> - <param name="folds" value="5" /> + <test expect_num_outputs="2"> + <param name="dataset" value="iris.csv"/> + <param name="dimensionality" value="1000"/> + <param name="levels" value="100"/> + <param name="retrain" value="10"/> + <param name="folds" value="5"/> <conditional name="feature_selection"> - <param name="enable_fs" value="true" /> - <param name="group_min" value="1" /> - <param name="accuracy_threshold" value="60.0" /> - <param name="accuracy_uncertainty_perc" value="5.0" /> + <param name="enable_fs" value="true"/> + <param name="group_min" value="1"/> + <param name="accuracy_threshold" value="60.0"/> + <param name="accuracy_uncertainty_perc" value="5.0"/> </conditional> - <output name="summary" ftype="tabular" value="summary.txt"> + <output name="summary" ftype="tabular" value="summary_3.txt"> <assert_contents> - <has_text_matching expression="# Run ID\tGroup Size\tRetraining\tAccuracy" /> - <has_text text="8f0e142ff27db7f8d2cc66cfcc05e27c" /> + <has_text_matching expression="# Run ID\tGroup Size\tRetraining\tAccuracy"/> + <has_text text="8f0e142ff27db7f8d2cc66cfcc05e27c"/> </assert_contents> </output> <output name="selection" ftype="tabular" value="selection.txt"> <assert_contents> - <has_text text="# Selected Features:" /> - <has_text text="PetalLengthCm" /> - <has_text text="PetalWidthCm" /> - <has_text text="SepalLengthCm" /> - <has_text text="SepalWidthCm" /> + <has_text text="# Selected Features:"/> + <has_text text="PetalLengthCm"/> + <has_text text="PetalWidthCm"/> + <has_text text="SepalLengthCm"/> + <has_text text="SepalWidthCm"/> </assert_contents> </output> </test>
--- a/macros.xml Tue Jan 31 16:31:07 2023 +0000 +++ b/macros.xml Tue Jun 18 07:43:34 2024 +0000 @@ -1,6 +1,6 @@ <macros> - <token name="@TOOL_VERSION@">1.0.7</token> - <token name="@GALAXY_VERSION@">1</token> + <token name="@TOOL_VERSION@">1.0.9.post1</token> + <token name="@GALAXY_VERSION@">0</token> <token name="@PROFILE@">22.05</token> <xml name="creator"> @@ -22,4 +22,4 @@ <citation type="doi">10.3390/a13090233</citation> </citations> </xml> -</macros> \ No newline at end of file +</macros>
--- a/test-data/selection.txt Tue Jan 31 16:31:07 2023 +0000 +++ b/test-data/selection.txt Tue Jun 18 07:43:34 2024 +0000 @@ -1,5 +1,5 @@ -# Best group size: 4 -# Best accuracy: 43.33333333333333 +# Best group size: 3 +# Best accuracy: 96.67 # Selected Features: PetalLengthCm PetalWidthCm
--- a/test-data/summary.txt Tue Jan 31 16:31:07 2023 +0000 +++ b/test-data/summary.txt Tue Jun 18 07:43:34 2024 +0000 @@ -5,4 +5,8 @@ # Accuracy threshold (stop condition): 60.0 # Accuracy uncertainty (percentage): 5.0 # Run ID Group Size Retraining Accuracy Excluded Feature -8f0e142ff27db7f8d2cc66cfcc05e27c 4 2 43.33333333333333 +8f0e142ff27db7f8d2cc66cfcc05e27c 4 1 96.0 +1349db4e4ec1dcbf755cda656c5e6212 3 0 96.67 SepalWidthCm +7b74a2f91bb30738131e1ff11be19f6d 3 0 96.67 SepalLengthCm +68d7a605b5709982b8fe0bde7b301a02 3 1 95.33 PetalLengthCm +7be81c8a748b3f532bdfce123e1dd87e 3 1 94.0 PetalWidthCm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/summary_1.txt Tue Jun 18 07:43:34 2024 +0000 @@ -0,0 +1,8 @@ +# Dataset: iris +# Dimensionality: 1000 +# Number of levels: 100 +# Max retraining iterations: 10 +# Accuracy threshold (stop condition): 60.0 +# Accuracy uncertainty (percentage): 5.0 +# Run ID Group Size Retraining Accuracy Excluded Feature +8f0e142ff27db7f8d2cc66cfcc05e27c 4 1 96.0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/summary_2.txt Tue Jun 18 07:43:34 2024 +0000 @@ -0,0 +1,8 @@ +# Dataset: iris +# Dimensionality: 1000 +# Number of levels: 100 +# Max retraining iterations: 10 +# Accuracy threshold (stop condition): 60.0 +# Accuracy uncertainty (percentage): 5.0 +# Run ID Group Size Retraining Accuracy Excluded Feature +8f0e142ff27db7f8d2cc66cfcc05e27c 4 1 96.0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/summary_3.txt Tue Jun 18 07:43:34 2024 +0000 @@ -0,0 +1,12 @@ +# Dataset: iris +# Dimensionality: 1000 +# Number of levels: 100 +# Max retraining iterations: 10 +# Accuracy threshold (stop condition): 60.0 +# Accuracy uncertainty (percentage): 5.0 +# Run ID Group Size Retraining Accuracy Excluded Feature +8f0e142ff27db7f8d2cc66cfcc05e27c 4 1 96.0 +1349db4e4ec1dcbf755cda656c5e6212 3 0 96.67 SepalWidthCm +7b74a2f91bb30738131e1ff11be19f6d 3 0 96.67 SepalLengthCm +68d7a605b5709982b8fe0bde7b301a02 3 1 95.33 PetalLengthCm +7be81c8a748b3f532bdfce123e1dd87e 3 1 94.0 PetalWidthCm
