annotate cmbuild.xml @ 13:ffa96097e5b2 draft

Uploaded
author bgruening
date Fri, 13 Feb 2015 03:12:04 -0500
parents c17ed5a530d2
children 2f8a8962ae22
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12
c17ed5a530d2 Uploaded
bgruening
parents: 11
diff changeset
1 <tool id="infernal_cmbuild" name="Build covariance models" version="1.1.0.1">
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
2 <description>from sequence alignments (cmbuild)</description>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
3 <parallelism method="multi" split_inputs="alignment_infile" split_mode="to_size" split_size="10" shared_inputs="" merge_outputs="cmfile_outfile"></parallelism>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
4 <requirements>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
5 <requirement type="package">infernal</requirement>
12
c17ed5a530d2 Uploaded
bgruening
parents: 11
diff changeset
6 <requirement type="package" version="1.1">infernal</requirement>
13
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
7 <requirement type="package" version="8.22">gnu_coreutils</requirement>
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
8 </requirements>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
9 <command>
13
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
10 <![CDATA[
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
11 cmbuild
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
12 #if $is_summery_output:
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
13 -o $summary_outfile
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
14 #end if
13
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
15
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
16 ## to many outputs, is that one really needed?
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
17 ##-O $annotated_source_alignment_outfile
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
18
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
19 $model_construction_opts.model_construction_opts_selector
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
20 #if $model_construction_opts.model_construction_opts_selector == '--fast':
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
21 --symfrac $model_construction_opts.symfrac
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
22 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
23
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
24 $noss
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
25
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
26 $relative_weights_opts.relative_weights_opts_selector
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
27 #if $relative_weights_opts.relative_weights_opts_selector == '--wblosum':
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
28 --wid $relative_weights_opts.wid
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
29 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
30
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
31 $effective_opts.effective_opts_selector
11
5c6344f67ad0 Uploaded
bgruening
parents: 10
diff changeset
32 #if str($effective_opts.effective_opts_selector) == '--eent':
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
33 --ere $effective_opts.ere
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
34 --eminseq $effective_opts.eminseq
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
35 --ehmmre $effective_opts.ehmmre
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
36 --eset $effective_opts.eset
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
37 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
38
11
5c6344f67ad0 Uploaded
bgruening
parents: 10
diff changeset
39 #if str($refining_opts.refining_opts_selector) == '--refine':
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
40 #if $refining_opts.refine_output:
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
41 --refine $refined_multiple_alignment_output
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
42 #else:
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
43 --refine /dev/null
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
44 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
45
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
46 $l
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
47 $refining_opts.gibbs_opts.gibbs_opts_selector
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
48
11
5c6344f67ad0 Uploaded
bgruening
parents: 10
diff changeset
49 #if str($refining_opts.gibbs_opts.gibbs_opts_selector) == '--gibbs':
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
50 $refining_opts.gibbs_opts.random_seed
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
51 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
52
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
53 $notrunc
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
54 $cyk
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
55 #end if
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
56
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
57 $cmfile_outfile
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
58 $alignment_infile
13
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
59
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
60 ]]>
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
61 </command>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
62 <inputs>
13
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
63 <!-- Stockholm or SELEX
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
64 SELEX is defined in EMBOSS datatypes
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
65 -->
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
66 <param name="alignment_infile" type="data" format="stockholm,selex" label="Sequence database"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
67
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
68 <conditional name="model_construction_opts">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
69 <param name="model_construction_opts_selector" type="select" label="These options control how consensus columns are defined in an alignment" help="">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
70 <option value="--fast" selected="true">automatic (--fast)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
71 <option value="--hand">user defined (--hand)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
72 </param>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
73 <when value="--fast">
13
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
74 <param name="symfrac" type="float" value="0.5" size="5"
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
75 label="Define the residue fraction threshold necessary to define a consensus (--symfrac)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
76 </when>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
77 <when value="--hand"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
78 </conditional>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
79
13
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
80 <param name="noss" truevalue="--noss" falsevalue="" checked="False" type="boolean"
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
81 label="Ignore the secondary structure annotation, if any, in your multiple alignment file (--noss)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
82
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
83 <conditional name="relative_weights_opts">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
84 <param name="relative_weights_opts_selector" type="select" label="Options controlling relative weights" help="">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
85 <option value="--wpb" selected="true">Henikoff (--wgb)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
86 <option value="--wgsc">Gerstein/Sonnhammer/Chothia (--wgsc)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
87 <option value="--wnone">no sequence weighting (--wnone)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
88 <option value="--wgiven">Sequence weight from given in input file (--wgiven)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
89 <option value="--wblosum">BLOSUM filtering algorithm (--wblosum)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
90 </param>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
91 <when value="--wpb"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
92 <when value="--wgsc"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
93 <when value="--wnone"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
94 <when value="--wgiven"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
95 <when value="--wblosum">
13
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
96 <param name="wid" type="float" value="0.5" size="5"
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
97 label="Percent identity for clustering the alignment (--wid)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
98 </when>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
99 </conditional>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
100
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
101
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
102 <conditional name="effective_opts">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
103 <param name="effective_opts_selector" type="select" label="Options controlling effective sequence number" help="">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
104 <option value="--eent" selected="true">entropy weighting strategy (--eent)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
105 <option value="--enone">Turn off the entropy weighting strategy (--enone)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
106 </param>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
107 <when value="--enone"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
108 <when value="--eent">
13
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
109 <param name="ere" type="float" value="0.59" size="5"
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
110 label="Set the target mean match state relative entropy (--ere)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
111
13
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
112 <param name="eminseq" type="integer" value="" size="5"
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
113 label="Define the minimum allowed effective sequence number (--eminseq)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
114
13
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
115 <param name="ehmmre" type="float" value="" size="5"
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
116 label="Set the target HMM mean match state relative entropy (--ehmmre)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
117
13
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
118 <param name="eset" type="integer" value="" size="5"
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
119 label="Set the effective sequence number for entropy weighting (--eset)" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
120 </when>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
121 </conditional>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
122
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
123
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
124 <conditional name="refining_opts">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
125 <param name="refining_opts_selector" type="select" label="Options for refining the input alignment" help="">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
126 <option value="" selected="true">No refinement</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
127 <option value="--refine">refine the input alignment</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
128 </param>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
129 <when value=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
130 <when value="--refine">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
131
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
132 <conditional name="gibbs_opts">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
133 <param name="gibbs_opts_selector" type="select" label="refinement mode" help="">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
134 <option value="" selected="true">expectation-maximization (EM)</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
135 <option value="--gibbs">Gibbs sampling</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
136 </param>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
137 <when value=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
138 <when value="--gibbs">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
139 <param name="random_seed" type="integer" value="0" label="Randam Seed" help="" />
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
140 </when>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
141 </conditional>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
142
13
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
143 <param name="l" truevalue="-l" falsevalue="" checked="False" type="boolean"
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
144 label="Turn on the local alignment algorithm" help="... which allows the alignment to span two or more subsequences if necessary"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
145
13
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
146 <param name="notrunc" truevalue="--notrunc" falsevalue="" checked="False" type="boolean"
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
147 label="Turn off the truncated alignment algorithm" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
148
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
149 <param name="cyk" type="select" label="Options for refining the input alignment" help="">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
150 <option value="" selected="true">optimal accuracy algorithm</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
151 <option value="--cyk">align with the CYK algorithm</option>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
152 </param>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
153
13
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
154 <param name="refine_output" truevalue="" falsevalue="" checked="False" type="boolean"
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
155 label="Output the refined alignment file as it is used to build the covariance model" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
156
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
157 </when>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
158 </conditional>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
159
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
160
13
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
161 <param name="is_summery_output" truevalue="" falsevalue="" checked="False" type="boolean"
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
162 label="Output a summery file?" help=""/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
163
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
164 </inputs>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
165 <outputs>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
166
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
167 <data format="text" name="summary_outfile" label="cmbuild summary on ${on_string}">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
168 <filter>is_summery_output is True</filter>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
169 </data>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
170 <!--<data format="stockholm" name="annotated_source_alignment_outfile" label="Annotated alignment from ${on_string}"/>-->
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
171 <data format="cm" name="cmfile_outfile" label="Covariance models from ${on_string}"/>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
172
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
173 <data format="stockholm" name="refined_multiple_alignment_output" label="refined alignment file of ${on_string}">
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
174 <filter>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
175 ((
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
176 refining_opts['refining_opts_selector'] == "--refine" and
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
177 refining_opts['refine_output'] is True
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
178 ))
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
179 </filter>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
180 </data>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
181
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
182 </outputs>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
183 <help>
13
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
184 <![CDATA[
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
185
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
186
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
187 **What it does**
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
188
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
189 For each multiple sequence alignment build a covariance model.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
190 The alignment file must be in Stockholm or SELEX format, and must contain consensus secondary structure annotation.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
191 cmbuild uses the consensus structure to determine the architecture of the CM.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
192
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
193 In addition to writing CM(s) to CMFILE_OUT, cmbuild also outputs a single line for each model created to stdout. Each
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
194 line has the following fields: ”aln”: the index of the alignment used to build the CM; ”idx”: the index of the CM in the
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
195 CMFILE_OUT; ”name”: the name of the CM; ”nseq”: the number of sequences in the alignment used to build the CM;
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
196 ”eff nseq”: the effective number of sequences used to build the model; ”alen”: the length of the alignment used to build
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
197 the CM; ”clen”: the number of columns from the alignment defined as consensus (match) columns; ”bps”: the number
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
198 of basepairs in the CM; ”bifs”: the number of bifurcations in the CM; ”rel entropy: CM”: the total relative entropy of the
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
199 model divided by the number of consensus columns; ”rel entropy: HMM”: the total relative entropy of the model ignoring
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
200 secondary structure divided by the number of consensus columns. ”description”: description of the model/alignment.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
201
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
202
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
203 Options controlling model construction
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
204 --------------------------------------
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
205
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
206 These options control how consensus columns are defined in an alignment.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
207
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
208 * --fast Define consensus columns automatically as those that have a fraction >= symfrac of residues as opposed to gaps. (See below for the --symfrac option.) This is the default.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
209 * --hand Use reference coordinate annotation (#=GC RF line, in Stockholm) to determine which columns are consensus, and which are inserts. Any non-gap character indicates a consensus column. (For example, mark consensus columns with ”x”, and insert columns with ”.”.)
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
210 * --symfrac Define the residue fraction threshold necessary to define a consensus column when not using --hand. The default is 0.5. The symbol fraction in each column is calculated after taking relative sequence weighting into account. Setting this to 0.0 means that every alignment column will be assigned as consensus, which may be useful in some cases. Setting it to 1.0 means that only columns that include 0 gaps will be assigned as consensus.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
211 * --noss Ignore the secondary structure annotation, if any, in MSA-Infile and build a CM with zero basepairs. This model will be similar to a profile HMM and the cmsearch and cmscan programs will use HMM algorithms which are faster than CM ones for this model. Additionally, a zero basepair model need not be calibrated with cmcalibrate prior to running cmsearch with it. The --noss option must be used if there is no secondary structure annotation in MSA-Infile.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
212
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
213
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
214 Options controlling relative weights
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
215 ------------------------------------
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
216
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
217 cmbuild uses an ad hoc sequence weighting algorithm to downweight closely related sequences and upweight distantly
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
218 related ones. This has the effect of making models less biased by uneven phylogenetic representation. For example,
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
219 two identical sequences would typically each receive half the weight that one sequence would. These options control
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
220 which algorithm gets used.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
221
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
222 * --wpb Use the Henikoff position-based sequence weighting scheme [Henikoff and Henikoff, J. Mol. Biol. 243:574, 1994]. This is the default.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
223 * --wgsc Use the Gerstein/Sonnhammer/Chothia weighting algorithm [Gerstein et al, J. Mol. Biol. 235:1067, 1994].
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
224 * --wnone Turn sequence weighting off; e.g. explicitly set all sequence weights to 1.0.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
225 * --wgiven Use sequence weights as given in annotation in the input alignment file. If no weights were given, assume they are all 1.0. The default is to determine new sequence weights by the Gerstein/Sonnhammer/Chothia algorithm, ignoring any annotated weights.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
226 * --wblosum Use the BLOSUM filtering algorithm to weight the sequences, instead of the default GSC weighting. Cluster the sequences at a given percentage identity (see --wid); assign each cluster a total weight of 1.0, distributed equally amongst the members of that cluster.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
227 * --wid Controls the behavior of the --wblosum weighting option by setting the percent identity for clustering the alignment.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
228
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
229
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
230 Options controlling effective sequence number
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
231 ---------------------------------------------
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
232
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
233 After relative weights are determined, they are normalized to sum to a total effective sequence number, eff nseq. This
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
234 number may be the actual number of sequences in the alignment, but it is almost always smaller than that. The default
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
235 entropy weighting method (--eent) reduces the effective sequence number to reduce the information content (relative
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
236 entropy, or average expected score on true homologs) per consensus position. The target relative entropy is controlled
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
237 by a two-parameter function, where the two parameters are settable with --ere and --esigma.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
238
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
239 * --eent Use the entropy weighting strategy to determine the effective sequence number that gives a target mean match state relative entropy. This option is the default, and can be turned off with --enone. The default target mean match state relative entropy is 0.59 bits for models with at least 1 basepair and 0.38 bits for models with zero basepairs, but changed with --ere. The default of 0.59 or 0.38 bits is automatically changed if the total relative entropy of the model (summed match state relative entropy) is less than a cutoff, which is is 6.0 bits by default, but can be changed with the expert, undocumented --eX option. If you really want to play with that option, consult the source code.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
240 * --enone Turn off the entropy weighting strategy. The effective sequence number is just the number of sequences in the alignment.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
241 * --ere Set the target mean match state relative entropy. By default the target relative entropy per match position is 0.59 bits for models with at least 1 basepair and 0.38 for models with zero basepairs.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
242 * --eminseq Define the minimum allowed effective sequence number.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
243 * --ehmmre Set the target HMM mean match state relative entropy. Entropy for basepairing match states is calculated using marginalized basepair emission probabilities.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
244 * --eset Set the effective sequence number for entropy weighting.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
245
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
246
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
247
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
248 Options for refining the input alignment
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
249 ----------------------------------------
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
250
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
251 * --refine Attempt to refine the alignment before building the CM using expectation-maximization (EM). A CM is first built from the initial alignment as usual. Then, the sequences in the alignment are realigned optimally (with the HMM banded CYK algorithm, optimal means optimal given the bands) to the CM, and a new CM is built from the resulting alignment. The sequences are then realigned to the new CM, and a new CM is built from that alignment. This is continued until convergence, specifically when the alignments for two successive iterations are not significantly different (the summed bit scores of all the sequences in the alignment changes less than 1% between two successive iterations).
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
252 * -l Turn on the local alignment algorithm, which allows the alignment to span two or more subsequences if necessary (e.g. if the structures of the query model and target sequence are only partially shared), allowing certain large insertions and deletions in the structure to be penalized differently than normal indels. The default is to globally align the query model to the target sequences.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
253 * --gibbs Modifies the behavior of --refine so Gibbs sampling is used instead of EM. The difference is that during the alignment stage the alignment is not necessarily optimal, instead an alignment (parsetree) for each sequences is sampled from the posterior distribution of alignments as determined by the Inside algorithm. Due to this sampling step --gibbs is non- deterministic, so different runs with the same alignment may yield different results. This is not true when --refine is used without the --gibbs option, in which case the final alignment and CM will always be the same. When --gibbs is enabled, the --seed "number" option can be used to seed the random number generator predictably, making the results reproducible. The goal of the --gibbs option is to help expert RNA alignment curators refine structural alignments by allowing them to observe alternative high scoring alignments.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
254 * --seed Seed the random number generator with an integer >= 0. This option can only be used in combination with --gibbs. If the given number is nonzero, stochastic sampling of alignments will be reproducible; the same command will give the same results. If the given number is 0, the random number generator is seeded arbitrarily, and stochastic samplings may vary from run to run of the same command. The default seed is 0.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
255 * --cyk With --refine, align with the CYK algorithm. By default the optimal accuracy algorithm is used. There is more information on this in the cmalign manual page.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
256 * --notrunc With --refine, turn off the truncated alignment algorithm. There is more information on this in the cmalign manual page.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
257
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
258
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
259 For further questions please refere to the Infernal Userguide_.
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
260
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
261 .. _Userguide: http://selab.janelia.org/software/infernal/Userguide.pdf
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
262
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
263
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
264 How do I cite Infernal?
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
265 -----------------------
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
266
13
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
267 The recommended citation for using Infernal 1.1 is E. P. Nawrocki and S. R. Eddy, Infernal 1.1: 100-fold faster RNA homology searches , Bioinformatics 29:2933-2935 (2013).
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
268
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
269 **Galaxy Wrapper Author**::
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
270
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
271 * Bjoern Gruening, University of Freiburg
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
272
13
ffa96097e5b2 Uploaded
bgruening
parents: 12
diff changeset
273 ]]>
9
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
274 </help>
1aa4c036e41c Uploaded
bgruening
parents:
diff changeset
275 </tool>