annotate EDeN_feature.xml @ 11:bf63bd4cf462 draft default tip

Uploaded
author bgruening
date Thu, 15 May 2014 17:25:44 -0400
parents 5be8af51780d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
59b3b6ce10bb Uploaded
bgruening
parents: 6
diff changeset
1 <tool id="bg_eden_feature" name="EDeN Converters" version="0.1">
59b3b6ce10bb Uploaded
bgruening
parents: 6
diff changeset
2 <description>to produce sparce vectors</description>
3
e1fc8ecabba7 Uploaded
bgruening
parents: 2
diff changeset
3 <macros>
e1fc8ecabba7 Uploaded
bgruening
parents: 2
diff changeset
4 <import>eden_macros.xml</import>
e1fc8ecabba7 Uploaded
bgruening
parents: 2
diff changeset
5 </macros>
7
59b3b6ce10bb Uploaded
bgruening
parents: 6
diff changeset
6 <expand macro="requirements" />
0
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
7 <command>
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
8 EDeN --action FEATURE
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
9
7
59b3b6ce10bb Uploaded
bgruening
parents: 6
diff changeset
10 --input_data_file_name $infile
0
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
11 --model_file_name $outfile
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
12
7
59b3b6ce10bb Uploaded
bgruening
parents: 6
diff changeset
13 --file_type $file_type_opts.file_type_opts_selector
0
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
14
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
15 --binary_file_type ## create a binary sparse vector as output
5
be1433b0833b Uploaded
bgruening
parents: 4
diff changeset
16
7
59b3b6ce10bb Uploaded
bgruening
parents: 6
diff changeset
17 @normalization_kernel_hash_radius_dist_vertex@
0
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
18
5
be1433b0833b Uploaded
bgruening
parents: 4
diff changeset
19 --kernel_type $kernel_type_opts.kernel_type_opts_selector
be1433b0833b Uploaded
bgruening
parents: 4
diff changeset
20 --graph_type $graph_type
0
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
21
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
22 #if $file_type_opts.file_type_opts_selector == 'SEQUENCE':
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
23 --sequence_degree $sequence_degree
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
24 $sequence_token
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
25 $sequence_multi_line
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
26 $sequence_pairwise_interaction
2
a3edc97e056c Uploaded
bgruening
parents: 1
diff changeset
27 #end if
0
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
28
7
59b3b6ce10bb Uploaded
bgruening
parents: 6
diff changeset
29 @kernel_type_options@
0
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
30
7
59b3b6ce10bb Uploaded
bgruening
parents: 6
diff changeset
31 @input_smooth_conditional@
0
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
32
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
33 </command>
2
a3edc97e056c Uploaded
bgruening
parents: 1
diff changeset
34 <stdio>
a3edc97e056c Uploaded
bgruening
parents: 1
diff changeset
35 <regex match="Error"
a3edc97e056c Uploaded
bgruening
parents: 1
diff changeset
36 source="both"
a3edc97e056c Uploaded
bgruening
parents: 1
diff changeset
37 level="fatal"
a3edc97e056c Uploaded
bgruening
parents: 1
diff changeset
38 description="An error occured with your Job." />
a3edc97e056c Uploaded
bgruening
parents: 1
diff changeset
39 </stdio>
0
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
40 <inputs>
7
59b3b6ce10bb Uploaded
bgruening
parents: 6
diff changeset
41 <param format="gspan,txt" name="infile" type="data" label="Input file"
59b3b6ce10bb Uploaded
bgruening
parents: 6
diff changeset
42 help="File can contain Graph datatypes (gSpan, sparse vector, sequence)."/>
0
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
43
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
44 <conditional name="file_type_opts">
1
64a1fb09b10d Uploaded
bgruening
parents: 0
diff changeset
45 <param name="file_type_opts_selector" type="select" label="Type of Input file">
0
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
46 <option value="GRAPH">Graph</option>
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
47 <option value="SPARSE_VECTOR">sparse vector</option>
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
48 <option value="SEQUENCE">Sequence</option>
9
5be8af51780d Uploaded
bgruening
parents: 7
diff changeset
49 <option value="STRINGSEQ">String (can be any word like character sequence)</option>
0
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
50 </param>
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
51 <when value="GRAPH" />
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
52 <when value="SPARSE_VECTOR" />
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
53 <when value="SEQUENCE">
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
54 <param name="sequence_degree" type="integer" value="1" label="Sequence degree" help="">
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
55 <validator type="in_range" min="1" />
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
56 </param>
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
57 <param name="sequence_token" type="boolean" label="Sequence token" truevalue="--sequence_token" falsevalue="" checked="false" />
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
58 <param name="sequence_multi_line" type="boolean" label="Sequence is in multi-line notation" truevalue="--sequence_multi_line" falsevalue="" checked="false" />
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
59 <param name="sequence_pairwise_interaction" type="boolean" label="Sequence pairwise iterations" truevalue="--sequence_pairwise_interaction" falsevalue="" checked="false" />
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
60 </when>
9
5be8af51780d Uploaded
bgruening
parents: 7
diff changeset
61 <when value="STRINGSEQ" />
0
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
62 </conditional>
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
63
7
59b3b6ce10bb Uploaded
bgruening
parents: 6
diff changeset
64 <expand macro="kernel_type_options" />
2
a3edc97e056c Uploaded
bgruening
parents: 1
diff changeset
65
7
59b3b6ce10bb Uploaded
bgruening
parents: 6
diff changeset
66 <expand macro="graph_types" />
0
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
67
2
a3edc97e056c Uploaded
bgruening
parents: 1
diff changeset
68 <expand macro="input_smooth_conditional" />
0
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
69
7
59b3b6ce10bb Uploaded
bgruening
parents: 6
diff changeset
70 <expand macro="normalization_kernel_hash_radius_dist_vertex" />
0
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
71
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
72 </inputs>
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
73 <configfiles>
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
74 <!-- The strange indentation is necessary, otherwise we get line breaks or white space in our file -->
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
75 <configfile name="row_index_file_name">#if $smooth_opts.smooth_opts_selector == 'smooth':
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
76 #for $element in str( $smooth_opts.row_index ).split(','):
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
77 #set $element = $element.strip().split('-')
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
78 #if len($element) == 2:
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
79 #for $index in range( int($element[0]), int($element[1]) ):
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
80 ## the following writes the value at the beginning of each line
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
81 ## #echo $index# inserts a line break automatically, but do not write it
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
82 ## to the beginning of the line
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
83 #echo '%s\n' % $index
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
84 #end for
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
85 #else:
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
86 #echo '%s\n' % $element[0]
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
87 #end if
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
88 #end for
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
89 #end if
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
90 </configfile>
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
91 <configfile name="col_index_file_name">#if $smooth_opts.smooth_opts_selector == 'smooth':
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
92 #for $element in str( $smooth_opts.col_index ).split(','):
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
93 #set $element = $element.strip().split('-')
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
94 #if len($element) == 2:
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
95 #for $index in range( int($element[0]), int($element[1]) ):
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
96 ## the following writes the value at the beginning of each line
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
97 ## #echo $index# inserts a line break automatically, but do not write it
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
98 ## to the beginning of the line
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
99 #echo '%s\n' % $index
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
100 #end for
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
101 #else:
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
102 #echo '%s\n' % $element[0]
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
103 #end if
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
104 #end for
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
105 #end if
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
106 </configfile>
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
107 </configfiles>
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
108 <outputs>
9
5be8af51780d Uploaded
bgruening
parents: 7
diff changeset
109 <data format="sparsevector" name="outfile" from_work_dir="feature" label="Sparse Vector from ${on_string}"/>
0
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
110 </outputs>
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
111 <tests>
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
112 <test>
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
113 <param name="infile" value="3_molceuls.sdf" />
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
114 <output name="outfile" file="3_molecules.gspan" />
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
115 </test>
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
116 </tests>
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
117 <help>
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
118
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
119 .. class:: infomark
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
120
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
121 **What it does**
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
122
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
123 The linear model is induced using the accelerated stochastic gradient descent technique by Léon Bottou and Yann LeCun.
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
124 When the target information is 0, a self-training algorithm is used to impute a positive or negative class to the unsupervised instances.
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
125 If the target information is imbalanced a minority class resampling technique is used to rebalance the training set.
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
126
7
59b3b6ce10bb Uploaded
bgruening
parents: 6
diff changeset
127 @references@
0
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
128
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
129 </help>
99091a5d5c84 Uploaded
bgruening
parents:
diff changeset
130 </tool>