Mercurial > repos > iuc > deepmicro
comparison deepmicro.xml @ 0:969ca45fd5de draft
planemo upload for repository https://github.com/paulzierep/DeepMicro commit 1bbea291a9d77beafaeba83ab775d870ec24719e
| author | iuc |
|---|---|
| date | Tue, 02 May 2023 17:39:01 +0000 |
| parents | |
| children | 5fc85e6bdc19 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:969ca45fd5de |
|---|---|
| 1 <tool id="deepmicro" name="DeepMicro" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description> | |
| 3 Representation learning and classification framework | |
| 4 </description> | |
| 5 <macros> | |
| 6 <import>macros.xml</import> | |
| 7 </macros> | |
| 8 <expand macro="biotools" /> | |
| 9 <expand macro="requirements" /> | |
| 10 <expand macro="version" /> | |
| 11 <command detect_errors="exit_code"><![CDATA[ | |
| 12 mkdir data && | |
| 13 mkdir results && | |
| 14 ln -s '$features' data/features.csv && | |
| 15 | |
| 16 #if $mode.mode_type == "only_encoding": | |
| 17 | |
| 18 #for $params in $mode.parameter_set: | |
| 19 | |
| 20 #if $params.rl_type.rl_type_choice == "--pca" or $params.rl_type.rl_type_choice == "--rp": | |
| 21 DM.py -r 1 -cd features.csv '$params.rl_type.rl_type_choice' --save_rep --no_clf -t \${GALAXY_SLOTS:-8} && | |
| 22 #else: | |
| 23 DM.py -r 1 -cd features.csv '$params.rl_type.rl_type_choice' -dm '$params.rl_type.dm' --save_rep --no_clf -t \${GALAXY_SLOTS:-8} && | |
| 24 #end if | |
| 25 #end for | |
| 26 | |
| 27 #else: | |
| 28 | |
| 29 ln -s '$mode.class_labels' data/labels.csv && | |
| 30 | |
| 31 #for $params in $mode.parameter_set: | |
| 32 | |
| 33 #if $params.rl_type.rl_type_choice == "--pca" or $params.rl_type.rl_type_choice == "--rp": | |
| 34 DM.py -r 1 -cd features.csv -cl labels.csv '$params.rl_type.rl_type_choice' --save_rep -m '$params.rl_type.classifier' -t \${GALAXY_SLOTS:-8} && | |
| 35 #else: | |
| 36 DM.py -r 1 -cd features.csv -cl labels.csv '$params.rl_type.rl_type_choice' -dm '$params.rl_type.dm' --save_rep -m '$params.rl_type.classifier' -t \${GALAXY_SLOTS:-8} && | |
| 37 #end if | |
| 38 #end for | |
| 39 | |
| 40 #end if | |
| 41 | |
| 42 echo Done ! | |
| 43 ]]> | |
| 44 </command> | |
| 45 <inputs> | |
| 46 <param argument="--features" type="data" format="tabular" label="Feature table" help="Dataset containing the features of samples"/> | |
| 47 <conditional name="mode"> | |
| 48 <param name="mode_type" type="select" label="Mode" help="The tool can either only create a latent | |
| 49 representation of the data or create a latent representation of the data and cross validate a classifier using that encoding."> | |
| 50 <option value="only_encoding">Create only encoding</option> | |
| 51 <option value="e_and_c">Create encoding and cross validate a classifier</option> | |
| 52 </param> | |
| 53 <when value="only_encoding"> | |
| 54 <repeat name="parameter_set" title="Parameter Set"> | |
| 55 <conditional name="rl_type"> | |
| 56 <param name="rl_type_choice" type="select" label="Representation learning type" help="The type of representation learning" > | |
| 57 <option value="--pca">PCA</option> | |
| 58 <option value="--rp">Random Projection</option> | |
| 59 <option value="--ae">Autoencoder or Deep Autoencoder</option> | |
| 60 <option value="--vae">Variational Autoencoder</option> | |
| 61 <option value="--cae">Convolutional Autoencoder</option> | |
| 62 </param> | |
| 63 <when value="--pca"/> | |
| 64 <when value="--rp"/> | |
| 65 <when value="--ae"> | |
| 66 <expand macro="dm" /> | |
| 67 </when> | |
| 68 <when value="--vae"> | |
| 69 <expand macro="dm" /> | |
| 70 </when> | |
| 71 <when value="--cae"> | |
| 72 <expand macro="dm" /> | |
| 73 </when> | |
| 74 </conditional> | |
| 75 </repeat> | |
| 76 </when> | |
| 77 <when value="e_and_c"> | |
| 78 <param argument="--class_labels" type="data" format="tabular" label="Class labels" help="Dataset containing the class labels corresponding to the features"/> | |
| 79 <repeat name="parameter_set" title="Parameter Set"> | |
| 80 <conditional name="rl_type"> | |
| 81 <param name="rl_type_choice" type="select" label="Representation learning type" help="The type of representation learning. `Train on input` trains the classifier on the input features without representation learning" > | |
| 82 <option value="--pca">PCA</option> | |
| 83 <option value="--rp">Random Projection</option> | |
| 84 <option value="--ae">Autoencoder or Deep Autoencoder</option> | |
| 85 <option value="--vae">Variational Autoencoder</option> | |
| 86 <option value="--cae">Convolutional Autoencoder</option> | |
| 87 <option value="no_rl">Train on input</option> | |
| 88 </param> | |
| 89 <when value="no_rl"> | |
| 90 <expand macro="clfs" /> | |
| 91 </when> | |
| 92 <when value="--pca"> | |
| 93 <expand macro="clfs" /> | |
| 94 </when> | |
| 95 <when value="--rp"> | |
| 96 <expand macro="clfs" /> | |
| 97 </when> | |
| 98 <when value="--ae"> | |
| 99 <expand macro="dm" /> | |
| 100 <expand macro="clfs" /> | |
| 101 </when> | |
| 102 <when value="--vae"> | |
| 103 <expand macro="dm" /> | |
| 104 <expand macro="clfs" /> | |
| 105 </when> | |
| 106 <when value="--cae"> | |
| 107 <expand macro="dm" /> | |
| 108 <expand macro="clfs" /> | |
| 109 </when> | |
| 110 </conditional> | |
| 111 </repeat> | |
| 112 </when> | |
| 113 </conditional> | |
| 114 </inputs> | |
| 115 <outputs> | |
| 116 <data name="results" format="tabular" from_work_dir="./results/*_result.txt" label="${tool.name} on ${on_string}: Results"> | |
| 117 <!-- results are only for classifiers --> | |
| 118 <filter>mode["mode_type"] == "e_and_c"</filter> | |
| 119 </data> | |
| 120 <collection name="encoded_features" type="list" label="Encoded Features"> | |
| 121 <!-- the encoded features generated by the tool are only for the training set, this is not very useful, therefore omitting | |
| 122 todo change tool do export features complete dataset also when classification is performed --> | |
| 123 <filter>mode["mode_type"] == "only_encoding"</filter> | |
| 124 <discover_datasets directory="results" pattern="(?P<designation>.*)_rep\.csv" format="tabular" visible="false" /> | |
| 125 </collection> | |
| 126 </outputs> | |
| 127 <tests> | |
| 128 | |
| 129 <!-- only encoding --> | |
| 130 <!-- test one parameter sets --> | |
| 131 | |
| 132 <test expect_num_outputs="1"> | |
| 133 <param name="mode_type" value="only_encoding" /> | |
| 134 <param name="features" value="UserDataExample.csv" /> | |
| 135 <param name="rl_type_choice" value="--ae" /> | |
| 136 <param name="dm" value="40" /> | |
| 137 <output_collection name="encoded_features" type="list"> | |
| 138 <!-- output is non determinisitc --> | |
| 139 <element name="AE[40]_features" ftype="tabular" > | |
| 140 <assert_contents> | |
| 141 <has_n_lines n="20"/> | |
| 142 <!-- <has_n_columns n="40" sep="," /> --> | |
| 143 </assert_contents> | |
| 144 </element> | |
| 145 </output_collection> | |
| 146 </test> | |
| 147 | |
| 148 <test expect_num_outputs="1"> | |
| 149 <param name="mode_type" value="only_encoding" /> | |
| 150 <param name="features" value="UserDataExample.csv" /> | |
| 151 <param name="rl_type_choice" value="--pca" /> | |
| 152 <output_collection name="encoded_features" type="list"> | |
| 153 <element name="PCA_features" ftype="tabular" > | |
| 154 <assert_contents> | |
| 155 <has_n_lines n="20"/> | |
| 156 <!-- <has_n_columns n="40" sep="," /> --> | |
| 157 </assert_contents> | |
| 158 </element> | |
| 159 </output_collection> | |
| 160 </test> | |
| 161 | |
| 162 <!-- test multiple parameter sets --> | |
| 163 <test expect_num_outputs="1"> | |
| 164 <param name="features" value="UserDataExample.csv" /> | |
| 165 <conditional name="mode"> | |
| 166 <param name="mode_type" value="only_encoding" /> | |
| 167 | |
| 168 <repeat name="parameter_set"> | |
| 169 <conditional name="rl_type"> | |
| 170 <param name="rl_type_choice" value="--pca" /> | |
| 171 </conditional> | |
| 172 </repeat> | |
| 173 | |
| 174 <repeat name="parameter_set"> | |
| 175 <conditional name="rl_type"> | |
| 176 <param name="rl_type_choice" value="--ae" /> | |
| 177 <param name="dm" value="40" /> | |
| 178 </conditional> | |
| 179 </repeat> | |
| 180 | |
| 181 </conditional> | |
| 182 | |
| 183 <output_collection name="encoded_features" type="list"> | |
| 184 <element name="AE[40]_features" ftype="tabular" > | |
| 185 <assert_contents> | |
| 186 <has_n_lines n="20"/> | |
| 187 <!-- <has_n_columns n="40" sep="," /> --> | |
| 188 </assert_contents> | |
| 189 </element> | |
| 190 <element name="PCA_features" ftype="tabular" > | |
| 191 <assert_contents> | |
| 192 <has_n_lines n="20"/> | |
| 193 <!-- <has_n_columns n="40" sep="," /> --> | |
| 194 </assert_contents> | |
| 195 </element> | |
| 196 </output_collection> | |
| 197 | |
| 198 </test> | |
| 199 | |
| 200 <!-- encoding and clf --> | |
| 201 <!-- test one parameter set --> | |
| 202 | |
| 203 <test expect_num_outputs="1"> | |
| 204 <param name="features" value="UserDataExample.csv" /> | |
| 205 <param name="mode_type" value="e_and_c" /> | |
| 206 <param name="class_labels" value="UserLabelExample.csv" /> | |
| 207 <param name="rl_type_choice" value="--vae" /> | |
| 208 <param name="dm" value="40" /> | |
| 209 <param name="classifier" value="rf" /> | |
| 210 <output ftype="tabular" name="results" > | |
| 211 <assert_contents> | |
| 212 <has_text text="VAE[40]_rf" /> | |
| 213 </assert_contents> | |
| 214 </output> | |
| 215 | |
| 216 </test> | |
| 217 | |
| 218 <!-- test multiple parameter sets --> | |
| 219 <test expect_num_outputs="1"> | |
| 220 <param name="features" value="UserDataExample.csv" /> | |
| 221 <conditional name="mode"> | |
| 222 <param name="mode_type" value="e_and_c" /> | |
| 223 <param name="class_labels" value="UserLabelExample.csv" /> | |
| 224 | |
| 225 <repeat name="parameter_set"> | |
| 226 <conditional name="rl_type"> | |
| 227 <param name="rl_type_choice" value="--cae" /> | |
| 228 <param name="dm" value="20" /> | |
| 229 <param name="classifier" value="rf" /> | |
| 230 </conditional> | |
| 231 </repeat> | |
| 232 | |
| 233 <repeat name="parameter_set"> | |
| 234 <conditional name="rl_type"> | |
| 235 <param name="rl_type_choice" value="--vae" /> | |
| 236 <param name="dm" value="40" /> | |
| 237 <param name="classifier" value="mlp" /> | |
| 238 </conditional> | |
| 239 </repeat> | |
| 240 | |
| 241 </conditional> | |
| 242 | |
| 243 <output ftype="tabular" name="results" > | |
| 244 <assert_contents> | |
| 245 <has_text text="CAE[20]_rf" /> | |
| 246 <has_text text="VAE[40]_mlp" /> | |
| 247 </assert_contents> | |
| 248 </output> | |
| 249 | |
| 250 </test> | |
| 251 | |
| 252 </tests> | |
| 253 <help> | |
| 254 <![CDATA[ | |
| 255 DeepMicro is a deep representation learning framework exploiting various autoencoders | |
| 256 to learn robust low-dimensional representations from high-dimensional data and training | |
| 257 classification models based on the learned representation. | |
| 258 | |
| 259 ====================================== | |
| 260 Option 1) Only representation learning | |
| 261 ====================================== | |
| 262 | |
| 263 The representation learning does not require class labels and can be learned from the features alone. | |
| 264 The wrapper allows to explore multiple paramertes (i.e. different modes to | |
| 265 generate the features, please refer to the publication for details), for each | |
| 266 added parameter the encoded features are generated. Those features can then be passed to subsequent ML tools, | |
| 267 such as `Ensemble methods for classification and regression` or `Split Dataset into training and test subsets` | |
| 268 | |
| 269 ===================================================== | |
| 270 Option 2) Representation learning and classification | |
| 271 ===================================================== | |
| 272 | |
| 273 The tool itself can also evaluate the performance of the generated representation learning for different | |
| 274 classifiers internally using 5 fold CV. The wrapper allows to explore multiple paramertes and clfs. | |
| 275 Each parameter run will be stored as a line to the result file. If this option is chosen the latent representation is not | |
| 276 exported as output. To create the latent representation of the complete feature set, run the tool again | |
| 277 with the same parameters using the `Only representation learning` option. | |
| 278 The header of the result file is: | |
| 279 | |
| 280 '{Encoding}_{classifier}, AUC, ACC, Recall, Precision, F1_score, time-end, runtime(sec), classfication time(sec), best hyper-parameter' | |
| 281 | |
| 282 The overall schema of the tool is shown in: | |
| 283 | |
| 284 .. image:: ML_Workflow.png | |
| 285 ]]> | |
| 286 </help> | |
| 287 <expand macro="citations" /> | |
| 288 <expand macro="creator" /> | |
| 289 </tool> |
