Mercurial > repos > bgruening > eden_toolbox

diff EDeN_feature.xml @ 7:59b3b6ce10bb draft
Uploaded
author: bgruening
date: Tue, 29 Oct 2013 11:07:49 -0400
parents: 7d49e315cb95
children: 5be8af51780d
--- a/EDeN_feature.xml	Thu Sep 05 12:52:45 2013 -0400
+++ b/EDeN_feature.xml	Tue Oct 29 11:07:49 2013 -0400
@@ -1,93 +1,42 @@
-<tool id="bg_eden_feature" name="EDeN Converter" version="0.1">
-    <description></description>
-    <requirements>
-        <requirement type="package" version="2.3.2">openbabel</requirement>
-        <requirement type="set_environment">EDEN_SCRIPT_PATH</requirement>
-    </requirements>
+<tool id="bg_eden_feature" name="EDeN Converters" version="0.1">
+    <description>to produce sparce vectors</description>
     <macros>
         <import>eden_macros.xml</import>
     </macros>
+    <expand macro="requirements" />
     <command>
-
-        ## pre-processing step if we have a molecule type we need to convert it to the gSpan format at first
-
-        #import tempfile, os
-        #set $temp_gspan = tempfile.NamedTemporaryFile( delete=False )
-        #silent $temp_gspan.close()
-        #set $temp_gspan = $temp_gspan.name
-
-        #if $file_type_opts.file_type_opts_selector == 'sdf':
-            obabel -i sdf -o sdf $infile ---errorlevel 1 | python \$EDEN_SCRIPT_PATH/mol2gspan.py --infile - --outfile $temp_gspan
-            #set $file_type = 'GRAPH'
-        #elif $file_type_opts.file_type_opts_selector == 'smi':
-            obabel -i smi -o sdf $infile ---errorlevel 1 | python \$EDEN_SCRIPT_PATH/mol2gspan.py --infile - --outfile $temp_gspan
-            #set $file_type = 'GRAPH'
-        #else:
-            #set $temp_gspan = $infile
-            #set $file_type = $file_type_opts.file_type_opts_selector
-        #end if
-        ;
+        tmp_dir=`mktemp -d -u`;
 
         EDeN --action FEATURE
 
-        --input_data_file_name $temp_gspan
+        --output_directory_path \$tmp_dir
+
+        --input_data_file_name $infile
         --model_file_name $outfile
 
-        ## if we have an molecule datatype the file_type is set to GRAPH, after convertion to the gSpan Graph format
-        --file_type $file_type
+        --file_type $file_type_opts.file_type_opts_selector
 
         --binary_file_type ## create a binary sparse vector as output
 
-
-        $no_normalization
-        $min_kernel
-
-        --hash_bit_size $hash_bit_size
-        --radius $radius
-        --distance $distance
-        --vertex_degree_threshold $vertex_degree_threshold
-
-        $no_normalization
-        $min_kernel
+        @normalization_kernel_hash_radius_dist_vertex@
 
         --kernel_type $kernel_type_opts.kernel_type_opts_selector
         --graph_type $graph_type
 
         #if $file_type_opts.file_type_opts_selector == 'SEQUENCE':
-
             --sequence_degree $sequence_degree
             $sequence_token
             $sequence_multi_line
             $sequence_pairwise_interaction
-
-        #end if
-
-        #if $kernel_type_opts.kernel_type_opts_selector in ['DDK','NSDDK','SK']:
-            --tree_lambda $kernel_type_opts.tree_lambda
-            --radius_two $kernel_type_opts.radius_two
         #end if
 
-        ### Adds rescaled features from nearest neighbors ###
-
-        #if $smooth_opts.smooth_opts_selector == 'smooth':
-            --smooth
-            --smooth_param $smooth_opts.smoother_param
+        @kernel_type_options@
 
-            --row_index_file_name $row_index_file_name
-            --col_index_file_name $col_index_file_name
-            --num_hash_functions $smooth_opts.num_hash_functions
-            --num_repeat_hash_functions $smooth_opts.num_repeat_hash_functions
-            --max_size_bin $smooth_opts.max_size_bin
-            --eccess_neighbour_size_factor $smooth_opts.eccess_neighbour_size_factor
-            --num_nearest_neighbours $smooth_opts.num_nearest_neighbours
-            $smooth_opts.shared_neighborhood
-            $smooth_opts.no_neighborhood_cache
-            $smooth_opts.no_minhash_cache
-        #end if
+        @input_smooth_conditional@
 
         ;
-        rm $temp_gspan
-
+        cp \$tmp_dir/feature $outfile;
+        rm \$tmp_dir -rf;
 
     </command>
     <stdio>
@@ -97,16 +46,14 @@
            description="An error occured with your Job." />
     </stdio>
     <inputs>
-        <param format="smi,gspan,inchi,sdf,mol,mol2,txt" name="infile" type="data" label="Input file" 
-            help="File can contain molecule data types (SMILES, InChI, SDF) or Graph datatypes (gSpan, sparse vector, sequence)."/>
+        <param format="gspan,txt" name="infile" type="data" label="Input file" 
+            help="File can contain Graph datatypes (gSpan, sparse vector, sequence)."/>
 
         <conditional name="file_type_opts">
             <param name="file_type_opts_selector" type="select" label="Type of Input file">
                 <option value="GRAPH">Graph</option>
                 <option value="SPARSE_VECTOR">sparse vector</option>
                 <option value="SEQUENCE">Sequence</option>
-                <option value="sdf">SDF</option>
-                <option value="smi">SMILES</option>
             </param>
             <when value="GRAPH" />
             <when value="SPARSE_VECTOR" />
@@ -122,70 +69,13 @@
             <when value="smi" />
         </conditional>
 
-        <conditional name="kernel_type_opts">
-            <param name="kernel_type_opts_selector" type="select" label="Type of the Kernel">
-                <option value="NSPDK">NSPDK</option>
-                <option value="WDK">WDK</option>
-                <option value="PBK">PBK</option>
-                <option value="USPK">USPK</option>
-                <option value="DDK">DDK</option>
-                <option value="NSDDK">ANSDDK</option>
-                <option value="SK">SK [NSPDK]</option>
-            </param>
-            <when value="NSPDK" />
-            <when value="WDK" />
-            <when value="PBK" />
-            <when value="USPK" />
-            <when value="SK">
-                <param name="radius_two" type="integer" value="2" label="Radius Two" help="">
-                    <validator type="in_range" min="1" />
-                </param>
-                <param name="tree_lambda" type="float" value="1.2" label="Tree lambda" help="">
-                    <validator type="in_range" min="0.0" />
-                </param>
-            </when>
-            <when value="DDK">
-                <param name="radius_two" type="integer" value="2" label="Radius Two" help="">
-                    <validator type="in_range" min="1" />
-                </param>
-                <param name="tree_lambda" type="float" value="1.2" label="Tree lambda" help="">
-                    <validator type="in_range" min="0.0" />
-                </param>
-            </when>
-            <when value="NSDDK">
-                <param name="radius_two" type="integer" value="2" label="Radius Two" help="">
-                    <validator type="in_range" min="1" />
-                </param>
-                <param name="tree_lambda" type="float" value="1.2" label="Tree lambda" help="">
-                    <validator type="in_range" min="0.0" />
-                </param>
-            </when>
-        </conditional>
+        <expand macro="kernel_type_options" />
 
-
-        <param name="graph_type" type="select" display="radio" label="Type of Graph">
-            <option value="DIRECTED">directed</option>
-            <option value="UNDIRECTED">undirected</option>
-        </param>
+        <expand macro="graph_types" />
 
         <expand macro="input_smooth_conditional" />
 
-        <param name="no_normalization" type="boolean" label="Skip normalization" truevalue="--no_normalization" falsevalue="" checked="false" />
-        <param name="min_kernel" type="boolean" label="Use min kernel" truevalue="--min_kernel" falsevalue="" checked="false" />
-
-        <param name="hash_bit_size" type="integer" value="15" label="Bit size of the used hashing function" help="">
-            <validator type="in_range" min="1" />
-        </param>
-        <param name="radius" type="integer" value="2" label="Radius that defines a neighborhood" help="">
-            <validator type="in_range" min="1" />
-        </param>
-        <param name="distance" type="integer" value="5" label="Distance that defines a neighborhood" help="">
-            <validator type="in_range" min="1" />
-        </param>
-        <param name="vertex_degree_threshold" type="integer" value="7" label="Vertex degree threshold" help="">
-            <validator type="in_range" min="1" />
-        </param>
-
+        <expand macro="normalization_kernel_hash_radius_dist_vertex" />
 
     </inputs>
     <configfiles>
@@ -224,7 +114,7 @@
 </configfile>
     </configfiles>
     <outputs>
-        <data format="eden_sparse_vector" name="outfile" label="Sparse vector from ${on_string}"/>
+        <data format="sparsevector" name="outfile" label="Sparse Vector from ${on_string}"/>
     </outputs>
     <tests>
         <test>
@@ -242,7 +132,7 @@
 When the target information is 0, a self-training algorithm is used to impute a positive or negative class to the unsupervised instances.
 If the target information is imbalanced a minority class resampling technique is used to rebalance the training set.
 
-This tool is part of the EDeN (Explicit Decomposition with Neighborhoods) suite, developed by Fabrizio Costa.
+@references@
 
     </help>
 </tool>
author	bgruening
date	Tue, 29 Oct 2013 11:07:49 -0400
parents	7d49e315cb95
children	5be8af51780d