Mercurial > repos > tduigou > icfree_sampler

diff sampler.xml @ 0:a71884785dcc draft
planemo upload for repository https://github.com/brsynth/icfree-ml commit d87d4a4d7c894f2d884a71a2acc35f93bc5bbe4b
author: tduigou
date: Mon, 06 Mar 2023 12:51:20 +0000
children: 80f5b5db4bb0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sampler.xml	Mon Mar 06 12:51:20 2023 +0000
@@ -0,0 +1,86 @@
+<tool id="icfree_sampler" name="iCFree sampler" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="@LICENCE@">
+    <description>Generate data points using latin hypercube sampling (LHS)</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <command detect_errors="exit_code"><![CDATA[
+        #set ratios = []
+        #for $x in $adv.rep
+            #silent ratios.append($x.ratio)
+        #end for
+        #set sratios = ' '.join($ratios)
+        echo "${sratios}" &&
+        python -m icfree.sampler
+            '$cfps_file'
+            #if len(ratios) >0
+                --sampling-ratios "${sratios}"
+            #end if
+            --nb-sampling-steps '$adv.nb_sampling_steps'
+            --nb-samples '$adv.nb_samples'
+            --seed '0'
+            --output-format 'tsv'
+            --output-folder . && ls
+    ]]></command>
+    <inputs>
+        <param name="cfps_file" type="data" format="tabular" label="CFPS parameters and features" />
+        <section name="adv" title="Advanced Options" expanded="false">
+            <param argument="nb_sampling_steps" type="integer" value="5" min="1" max="10" label="Number of values for all factors when performing the sampling" />
+            <param argument="nb_samples" type="integer" value="99" min="1" max="198" label="Number of samples to generate" />
+            <repeat name="rep" title="Sampling ratios" min="0">
+                <param name="ratio" type="float" value="0.1" min="0.0" max="1.0" label="Ratio associated to each factor" />
+            </repeat>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="sampling" format="tabular">
+            <discover_datasets pattern="__designation__" ext="tabular"/>
+        </data>
+    </outputs>
+    <tests>
+        <!-- test 1: check if identical outputs are produced with default parameters  -->
+        <test>
+            <param name="cfps_file" value="converter_proCFPS_parameters.tsv" />
+            <output name="sampling" ftype="tabular">
+                <discovered_dataset designation="sampling.tsv" ftype="tabular">
+                    <assert_contents>
+                        <has_n_lines n="100"/>
+                        <has_line_matching expression="^Mg-glutamate\tk-glutamate\tCoA\t3-PGA\tNTP\tNAD\tFolinic acid\tSpermidine\ttRNA\tAmino acids\tCAMP\tExtract\tHEPES\tPEG\tPromoter\tRBS$"/>
+                    </assert_contents>
+                </discovered_dataset>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+Sampler
+=======
+
+This module generates a list of values for all parameters given in the input file.
+The values are generated using a Latin Hypercube Sampling (LHS) method (lhs function from the pyDOE package).
+The number of values generated is given by the user and the values are saved in csv or tsv file.
+
+It is important to note that the user can pass some values that he whishes to combine. In this case, we are dealing with discrete space, and because LHS is working on continuous space the result sampling can contain duplicates.
+To avoid this, we have set some filters to select the appropriate sampling method:
+
+* If the result sampling contain duplicates, then we replace them by random samples.
+* Proceed with full random sampling.
+* Generate all the combinations.
+
+Input
+-----
+* **CFPS Parameters File**: The first column is the parameter (or factor) names. The second column is the maxValue of the parameter that will be used in the sampling. The third column is the concnetration of the stock. The fourth column is the deadVolume of the parameter. This is used to calculate the volume of the parameter that will not be pipetted by the robot (because of viscosity). The fifth column is the specific ratios we want to have for this parameter. If nothing defined, then take ratios given in program options. If one single number is given, then take this number as a const value.
+
+Advanced options:
+-----------------
+* **Number of sampling steps**: Number of values for all factors when performing the sampling (default: 5)
+* **Sampling ratios**: Ratios for all factors when performing the sampling
+* **Number of samples**: Number of samples to generate for all factors when performing the sampling (default: 99)
+
+Output
+------
+* a TSV file with the sampling values for each parameter
+    ]]></help>
+    <expand macro="creator"/>
+    <expand macro="citation"/>
+</tool>
author	tduigou
date	Mon, 06 Mar 2023 12:51:20 +0000
parents
children	80f5b5db4bb0