Mercurial > repos > bgruening > eden_toolbox

diff EDeN_feature.xml @ 2:a3edc97e056c draft
Uploaded
author: bgruening
date: Thu, 05 Sep 2013 11:40:29 -0400
parents: 64a1fb09b10d
children: e1fc8ecabba7
--- a/EDeN_feature.xml	Wed Sep 04 07:59:08 2013 -0400
+++ b/EDeN_feature.xml	Thu Sep 05 11:40:29 2013 -0400
@@ -14,10 +14,10 @@
         #set $temp_gspan = $temp_gspan.name
 
         #if $file_type_opts.file_type_opts_selector == 'sdf':
-            obabel -i sdf -o sdf $infile | python \$EDEN_SCRIPT_PATH/mol2gspan.py --infile - --outfile $temp_gspan
+            obabel -i sdf -o sdf $infile ---errorlevel 1 | python \$EDEN_SCRIPT_PATH/mol2gspan.py --infile - --outfile $temp_gspan
             #set $file_type = 'GRAPH'
         #elif $file_type_opts.file_type_opts_selector == 'smi':
-            obabel -i smi -o sdf $infile | python \$EDEN_SCRIPT_PATH/mol2gspan.py --infile - --outfile $temp_gspan
+            obabel -i smi -o sdf $infile ---errorlevel 1 | python \$EDEN_SCRIPT_PATH/mol2gspan.py --infile - --outfile $temp_gspan
             #set $file_type = 'GRAPH'
         #else:
             #set $temp_gspan = $infile
@@ -48,7 +48,7 @@
         $no_normalization
         $min_kernel
 
-        --kernel_type $kernel_type  ##NSPDK | WDK | PBK | USPK | DDK | NSDDK | ANSDDK | SK [NSPDK]
+        --kernel_type $kernel_type_opts.kernel_type  ##NSPDK | WDK | PBK | USPK | DDK | NSDDK | ANSDDK | SK [NSPDK]
         --graph_type $graph_type    ##DIRECTED | UNDIRECTED [UNDIRECTED]
 
         #if $file_type_opts.file_type_opts_selector == 'SEQUENCE':
@@ -60,9 +60,10 @@
 
         #end if
 
-        --tree_lambda $tree_lambda
-        --radius_two $radius_two
-
+        #if $kernel_type_opts.kernel_type in ['DDK','NSDDK','SK']:
+            --tree_lambda $kernel_type_opts.tree_lambda
+            --radius_two $kernel_type_opts.radius_two
+        #end if
 
         ### Adds rescaled features from nearest neighbors ###
 
@@ -87,6 +88,12 @@
 
 
     </command>
+    <stdio>
+        <regex match="Error" 
+           source="both" 
+           level="fatal" 
+           description="An error occured with your Job." />
+    </stdio>
     <inputs>
         <param format="smi,gspan,inchi,sdf,mol,mol2,txt" name="infile" type="data" label="Input file" 
             help="File can contain molecule data types (SMILES, InChI, SDF) or Graph datatypes (gSpan, sparse vector, sequence)."/>
@@ -113,85 +120,40 @@
             <when value="smi" />
         </conditional>
 
-        <param name="kernel_type" type="select" display="radio" label="Type of the Kernel">
-            <option value="NSPDK">NSPDK</option>
-            <option value="WDK">WDK</option>
-            <option value="PBK">PBK</option>
-            <option value="USPK">USPK</option>
-            <option value="DDK">DDK</option>
-            <option value="NSDDK">ANSDDK</option>
-            <option value="SK">SK [NSPDK]</option>
-        </param>
+        <conditional name="kernel_type_opts">
+            <param name="kernel_type_opts_selector" type="select" label="Type of the Kernel">
+                <option value="NSPDK">NSPDK</option>
+                <option value="WDK">WDK</option>
+                <option value="PBK">PBK</option>
+                <option value="USPK">USPK</option>
+                <option value="DDK">DDK</option>
+                <option value="NSDDK">ANSDDK</option>
+                <option value="SK">SK [NSPDK]</option>
+            </param>
+            <when value="NSPDK" />
+            <when value="WDK" />
+            <when value="PBK" />
+            <when value="USPK" />
+            <when value="DDK,NSDDK,SK">
+                <param name="radius_two" type="integer" value="2" label="Radius Two" help="">
+                    <validator type="in_range" min="1" />
+                </param>
+                <param name="tree_lambda" type="float" value="1.2" label="Tree lambda" help="">
+                    <validator type="in_range" min="0.0" />
+                </param>
+            </when>
+        </conditional>
+
 
         <param name="graph_type" type="select" display="radio" label="Type of Graph">
             <option value="DIRECTED">directed</option>
             <option value="UNDIRECTED">undirected</option>
         </param>
 
-
-        <conditional name="smooth_opts">
-            <param name="smooth_opts_selector" type="select" label="Adds rescaled features from nearest neighbors (--smooth)">
-              <option value="non_smooth" selected="True">Disable smooth</option>
-              <option value="smooth">Enable smooth</option>
-            </param>
-            <when value="non_smooth" />
-            <when value="smooth">
-
-                <param name="smoother_param" type="float" value="0.95" label="Scaling features from neighbors"
-                    help="Features from neighbors are scaled by the kernel value to the power value assigned to this switch.">
-                    <validator type="in_range" min="0.0" />
-                </param>
-
-                <param name="no_minhash_cache" type="boolean" label="Deactivate minhash cache" truevalue="--no_minhash_cache" falsevalue="" checked="false" />
-                <param name="no_neighborhood_cache" type="boolean" label="Deactivate neighborhood cache" truevalue="-no_neighborhood_cache" falsevalue="" checked="false" />
-                <param name="shared_neighborhood" type="boolean" label="Activate shared neighborhood" truevalue="--shared_neighborhood" falsevalue="" checked="false" />
-
-                <param name="num_hash_functions" type="integer" value="400" label="Number of hash functions" help="">
-                    <validator type="in_range" min="1" />
-                </param>
-                <param name="num_repeat_hash_functions" type="integer" value="10" label="Number of repeats for each hash functions" help="">
-                    <validator type="in_range" min="1" />
-                </param>
-                <param name="max_size_bin" type="float" value="0.3" label="Maximum size of one bin" 
-                    help="Expressed as the maximum fraction of the datset size. When a bin contains references to more instances than this quantity, the bin is erased. The ratio is that this featrue is common to too many instances and it is therefore not informative. Morover the runtimes become non sub-linear if a significant fraction of the dataset size has to be checked.">
-                    <validator type="in_range" min="0.0" />
-                </param>
-                <param name="eccess_neighbour_size_factor" type="float" value="5.0" label="Access neighborhood size factor" 
-                    help="Expressed as a multiplicative factor w.r.t. the neighborhood size required. It means that the approximate neighborhood query stops at the X most frequent instances, where X = eccess_neighbor_size_factor * neighborhood size.">
-                    <validator type="in_range" min="0.0" />
-                </param>
-                <param name="num_nearest_neighbours" type="integer" value="10" label="Number of nearest neighbors" help="">
-                    <validator type="in_range" min="1" />
-                </param>
-
-                <param name="row_index" type="text" size="30" label="Row indieces of your input file that should be converted" 
-                    help="Specify a subset of your dataset by providing the row indieces that should be taken into account.">
-                    <sanitizer>
-                        <valid initial="string.digits">
-                            <add value="," />
-                            <add value="-" />
-                            <add value=" " />
-                        </valid>
-                    </sanitizer>
-                    <validator type="empty_field" message="You need to specify row indieces"/>
-                </param>
-                <param name="col_index" type="text" size="30" label="Column indieces of your input file that should be converted" 
-                    help="Specify a subset of your dataset by providing the column indieces that should be taken into account.">
-                    <sanitizer>
-                        <valid initial="string.digits">
-                            <add value="," />
-                            <add value="-" />
-                            <add value=" " />
-                        </valid>
-                    </sanitizer>
-                    <validator type="empty_field" message="You need to specify column indieces"/>
-                </param>
-
-            </when>
-        </conditional>
+        <expand macro="input_smooth_conditional" />
 
         <param name="no_normalization" type="boolean" label="Skip normalization" truevalue="--no_normalization" falsevalue="" checked="false" />
-        <param name="min_kernel" type="boolean" label="Use minimal kernel" truevalue="--min_kernel" falsevalue="" checked="false" />
+        <param name="min_kernel" type="boolean" label="Use min kernel" truevalue="--min_kernel" falsevalue="" checked="false" />
 
         <param name="hash_bit_size" type="integer" value="15" label="Bit size of the used hashing function" help="">
             <validator type="in_range" min="1" />
@@ -205,12 +167,7 @@
         <param name="vertex_degree_threshold" type="integer" value="7" label="Vertex degree threshold" help="">
             <validator type="in_range" min="1" />
         </param>
-        <param name="radius_two" type="integer" value="2" label="Radius Two" help="">
-            <validator type="in_range" min="1" />
-        </param>
-        <param name="tree_lambda" type="float" value="1.2" label="Tree lambda" help="">
-            <validator type="in_range" min="0.0" />
-        </param>
+
 
     </inputs>
     <configfiles>
author	bgruening
date	Thu, 05 Sep 2013 11:40:29 -0400
parents	64a1fb09b10d
children	e1fc8ecabba7