|
7
|
1 <tool id="bg_eden_feature" name="EDeN Converters" version="0.1">
|
|
|
2 <description>to produce sparce vectors</description>
|
|
3
|
3 <macros>
|
|
|
4 <import>eden_macros.xml</import>
|
|
|
5 </macros>
|
|
7
|
6 <expand macro="requirements" />
|
|
0
|
7 <command>
|
|
|
8 EDeN --action FEATURE
|
|
|
9
|
|
7
|
10 --input_data_file_name $infile
|
|
0
|
11 --model_file_name $outfile
|
|
|
12
|
|
7
|
13 --file_type $file_type_opts.file_type_opts_selector
|
|
0
|
14
|
|
|
15 --binary_file_type ## create a binary sparse vector as output
|
|
5
|
16
|
|
7
|
17 @normalization_kernel_hash_radius_dist_vertex@
|
|
0
|
18
|
|
5
|
19 --kernel_type $kernel_type_opts.kernel_type_opts_selector
|
|
|
20 --graph_type $graph_type
|
|
0
|
21
|
|
|
22 #if $file_type_opts.file_type_opts_selector == 'SEQUENCE':
|
|
|
23 --sequence_degree $sequence_degree
|
|
|
24 $sequence_token
|
|
|
25 $sequence_multi_line
|
|
|
26 $sequence_pairwise_interaction
|
|
2
|
27 #end if
|
|
0
|
28
|
|
7
|
29 @kernel_type_options@
|
|
0
|
30
|
|
7
|
31 @input_smooth_conditional@
|
|
0
|
32
|
|
|
33 </command>
|
|
2
|
34 <stdio>
|
|
|
35 <regex match="Error"
|
|
|
36 source="both"
|
|
|
37 level="fatal"
|
|
|
38 description="An error occured with your Job." />
|
|
|
39 </stdio>
|
|
0
|
40 <inputs>
|
|
7
|
41 <param format="gspan,txt" name="infile" type="data" label="Input file"
|
|
|
42 help="File can contain Graph datatypes (gSpan, sparse vector, sequence)."/>
|
|
0
|
43
|
|
|
44 <conditional name="file_type_opts">
|
|
1
|
45 <param name="file_type_opts_selector" type="select" label="Type of Input file">
|
|
0
|
46 <option value="GRAPH">Graph</option>
|
|
|
47 <option value="SPARSE_VECTOR">sparse vector</option>
|
|
|
48 <option value="SEQUENCE">Sequence</option>
|
|
9
|
49 <option value="STRINGSEQ">String (can be any word like character sequence)</option>
|
|
0
|
50 </param>
|
|
|
51 <when value="GRAPH" />
|
|
|
52 <when value="SPARSE_VECTOR" />
|
|
|
53 <when value="SEQUENCE">
|
|
|
54 <param name="sequence_degree" type="integer" value="1" label="Sequence degree" help="">
|
|
|
55 <validator type="in_range" min="1" />
|
|
|
56 </param>
|
|
|
57 <param name="sequence_token" type="boolean" label="Sequence token" truevalue="--sequence_token" falsevalue="" checked="false" />
|
|
|
58 <param name="sequence_multi_line" type="boolean" label="Sequence is in multi-line notation" truevalue="--sequence_multi_line" falsevalue="" checked="false" />
|
|
|
59 <param name="sequence_pairwise_interaction" type="boolean" label="Sequence pairwise iterations" truevalue="--sequence_pairwise_interaction" falsevalue="" checked="false" />
|
|
|
60 </when>
|
|
9
|
61 <when value="STRINGSEQ" />
|
|
0
|
62 </conditional>
|
|
|
63
|
|
7
|
64 <expand macro="kernel_type_options" />
|
|
2
|
65
|
|
7
|
66 <expand macro="graph_types" />
|
|
0
|
67
|
|
2
|
68 <expand macro="input_smooth_conditional" />
|
|
0
|
69
|
|
7
|
70 <expand macro="normalization_kernel_hash_radius_dist_vertex" />
|
|
0
|
71
|
|
|
72 </inputs>
|
|
|
73 <configfiles>
|
|
|
74 <!-- The strange indentation is necessary, otherwise we get line breaks or white space in our file -->
|
|
|
75 <configfile name="row_index_file_name">#if $smooth_opts.smooth_opts_selector == 'smooth':
|
|
|
76 #for $element in str( $smooth_opts.row_index ).split(','):
|
|
|
77 #set $element = $element.strip().split('-')
|
|
|
78 #if len($element) == 2:
|
|
|
79 #for $index in range( int($element[0]), int($element[1]) ):
|
|
|
80 ## the following writes the value at the beginning of each line
|
|
|
81 ## #echo $index# inserts a line break automatically, but do not write it
|
|
|
82 ## to the beginning of the line
|
|
|
83 #echo '%s\n' % $index
|
|
|
84 #end for
|
|
|
85 #else:
|
|
|
86 #echo '%s\n' % $element[0]
|
|
|
87 #end if
|
|
|
88 #end for
|
|
|
89 #end if
|
|
|
90 </configfile>
|
|
|
91 <configfile name="col_index_file_name">#if $smooth_opts.smooth_opts_selector == 'smooth':
|
|
|
92 #for $element in str( $smooth_opts.col_index ).split(','):
|
|
|
93 #set $element = $element.strip().split('-')
|
|
|
94 #if len($element) == 2:
|
|
|
95 #for $index in range( int($element[0]), int($element[1]) ):
|
|
|
96 ## the following writes the value at the beginning of each line
|
|
|
97 ## #echo $index# inserts a line break automatically, but do not write it
|
|
|
98 ## to the beginning of the line
|
|
|
99 #echo '%s\n' % $index
|
|
|
100 #end for
|
|
|
101 #else:
|
|
|
102 #echo '%s\n' % $element[0]
|
|
|
103 #end if
|
|
|
104 #end for
|
|
|
105 #end if
|
|
|
106 </configfile>
|
|
|
107 </configfiles>
|
|
|
108 <outputs>
|
|
9
|
109 <data format="sparsevector" name="outfile" from_work_dir="feature" label="Sparse Vector from ${on_string}"/>
|
|
0
|
110 </outputs>
|
|
|
111 <tests>
|
|
|
112 <test>
|
|
|
113 <param name="infile" value="3_molceuls.sdf" />
|
|
|
114 <output name="outfile" file="3_molecules.gspan" />
|
|
|
115 </test>
|
|
|
116 </tests>
|
|
|
117 <help>
|
|
|
118
|
|
|
119 .. class:: infomark
|
|
|
120
|
|
|
121 **What it does**
|
|
|
122
|
|
|
123 The linear model is induced using the accelerated stochastic gradient descent technique by Léon Bottou and Yann LeCun.
|
|
|
124 When the target information is 0, a self-training algorithm is used to impute a positive or negative class to the unsupervised instances.
|
|
|
125 If the target information is imbalanced a minority class resampling technique is used to rebalance the training set.
|
|
|
126
|
|
7
|
127 @references@
|
|
0
|
128
|
|
|
129 </help>
|
|
|
130 </tool>
|