comparison testing_cgatools-982e19c29ec0/cgatools/tools/cgatools_1.6/mkvcf.xml @ 0:ef23f9cd599b draft default tip

Uploaded
author devteam
date Thu, 27 Sep 2012 13:37:59 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:ef23f9cd599b
1 <tool id="cg_mkvcf" name="mkvcf(beta) 1.6" version="1.0.0">
2 <!--
3 This tool creates a GUI for the mkvcf function of cgatools from Complete Genomics, Inc.
4 written 7-31-2012 by bcrain@completegenomics.com
5 -->
6
7 <description>converts to vcf</description>
8
9 <command interpreter="perl">
10 <!--run wrapper script-->
11 mkvcf_wrapper.pl
12 --reference $crr.fields.path
13 --output $output
14 --genomes $count.genomes
15 --source $count.sources.source
16 --datasource $count.sources.data_sources.data_source
17 #if $count.sources.data_sources.data_source=="in"
18 #for $m in $count.sources.data_sources.files
19 --input $m.input
20 #end for
21 #else
22 --input $count.sources.data_sources.input
23 #end if
24 #if $count.sources.source=="masterVar" or $count.sources.source=="masterVar,CNV"
25 $count.sources.nocalls
26 --calibration $count.sources.calibration
27 #else if $count.sources.source=="SV"
28 --jctscore $count.sources.jctscore
29 --jctside $count.sources.jctside
30 --jctdistance $count.sources.jctdistance
31 --jctlength $count.sources.jctlength
32 $count.sources.jctpriority
33 $count.sources.jcttumor
34 #else if $count.sources.source=="masterVar,CNV,SV" or $count.sources.source=="masterVar,CNV,SV,MEI"
35 $count.sources.nocalls
36 --calibration $count.sources.calibration
37 --jctscore $count.sources.jctscore
38 --jctside $count.sources.jctside
39 --jctdistance $count.sources.jctdistance
40 --jctlength $count.sources.jctlength
41 $count.sources.jctpriority
42 $count.sources.jcttumor
43 #end if
44 --fields $count.sources.fields
45 </command>
46
47 <outputs>
48 <data format="vcf" name="output" label="${tool.name} output"/>
49 </outputs>
50
51 <inputs>
52 <!--form field to select crr file-->
53 <param name="crr" type="select" label="Reference genome (.crr file)">
54 <options from_data_table="cg_crr_files" />
55 </param>
56
57 <!--select number of genomes - determines which input sources to show-->
58 <conditional name="count">
59 <param name="genomes" type="select" label="Select the number of genomes to add to the vcf file" help="Note: multi-genome vcfs (2 or more genomes) can only be generated for format version 2.0 and up">
60 <option value="1" selected="true">1 - allowed data sources are masterVar, CNV, SV, MEI</option>
61 <option value="2">2 - allowed data sources are masterVar, CNV, SV (format v2.x)</option>
62 <option value="3">3 or more - allowed data sources are masterVar, CNV (format v2.x)</option>
63 </param>
64
65 <when value="1">
66 <!--form field to select input sources-->
67 <conditional name="sources">
68 <param name="source" type="select" label="Data sources to be included for this genome">
69 <option value="masterVar,CNV,SV,MEI" selected="true">masterVar + CNV + SV + MEI</option>
70 <option value="masterVar">masterVar</option>
71 <option value="CNV">CNV</option>
72 <option value="SV">SV</option>
73 <option value="MEI">MEI</option>
74 </param>
75
76 <when value="masterVar,CNV,SV,MEI">
77 <!--conditional to select inputs-->
78 <conditional name="data_sources">
79 <param name="data_source" type="select" label="Where are the input files?">
80 <option value="out" selected="true">located outside Galaxy (data on server or mounted drive)</option>
81 </param>
82
83 <when value="out">
84 <!--form field to enter input file-->
85 <param name="input" type="text" label="Genome root directory" size="200" help="Enter full path /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01).">
86 <validator type="empty_field" message="You must supply the genome root directory"/>
87 </param>
88 </when>
89 </conditional>
90
91 <!--form field to select no-calls-->
92 <param name="nocalls" type="select" label="Include no-calls?">
93 <option value="" selected="true">no</option>
94 <option value="--nocalls">yes</option>
95 </param>
96
97 <!--form field to enter calibration directory-->
98 <param name="calibration" type="text" size="300" label="Directory calibration data (/path/calibration-root)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v2.tgz"/>
99
100 <!--form fields junction threshold options-->
101 <param name="jctscore" type="integer" value="10" label="Junction score thresholds (discordant mate pair count) (default 10)">
102 <validator type="empty_field" message="You must enter a value, for the default value enter 10" />
103 </param>
104 <param name="jctside" type="integer" value="70" label="Junction side length threshold (default 70)">
105 <validator type="empty_field" message="You must enter a value, for the default value enter 70" />
106 </param>
107 <param name="jctdistance" type="integer" value="200" label="Distance tolerance for junction compatibility (default 200)">
108 <validator type="empty_field" message="You must enter a value, for the default value enter 200" />
109 </param>
110 <param name="jctlength" type="integer" value="500" label="Length threshold for compatible junctions (default 500)">
111 <validator type="empty_field" message="You must enter a value, for the default value enter 500" />
112 </param>
113
114 <!--form field to select junction confidence in tumors-->
115 <param name="jctpriority" type="select" label="Use normal junction priority for vcf output?">
116 <option value="" selected="true">no</option>
117 <option value="--jctpriority">yes</option>
118 </param>
119
120 <!--form field to select junction confidence in tumors-->
121 <param name="jcttumor" type="select" label="Use high confidence junctions for tumors?">
122 <option value="" selected="true">no</option>
123 <option value="--jcttumor">yes</option>
124 </param>
125
126 <!--form field to select field names to include in vcf-->
127 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
128 <option value="all" selected="true">-- all (default) --</option>
129 <option value="NS">NS - Number of samples</option>
130 <option value="AN">AN - Total number of alleles in called genotypes</option>
131 <option value="AC">AC - Allele count in genotypes</option>
132 <option value="CGA_XR">CGA_XR - External database reference</option>
133 <option value="CGA_FI">CGA_FI - Functional impact</option>
134 <option value="CGA_PFAM">CGA_PFAM - PFAM domain </option>
135 <option value="CGA_MIRB">CGA_MIRB - miRBaseId</option>
136 <option value="CGA_SDO">CGA_SDO - Depth of overlapping segmental duplications</option>
137 <option value="CGA_RPT">CGA_RPT - Overlapping repeatMasker annotations</option>
138 <option value="GT">GT - Genotype</option>
139 <option value="PS">PS - Phase set</option>
140 <option value="FT">FT - Sample genotype filters</option>
141 <option value="GL">GL - Genotype likelihoods</option>
142 <option value="CGA_CEHQ">CGA_CEHQ - Calibrated haplotype quality based on EAF assumption</option>
143 <option value="CGA_CEGL">CGA_CEGL - Genotype likelihoods based on CEHQ</option>
144 <option value="SS">SS - Somatic status</option>
145 <option value="HQ">HQ - Haplotype quality</option>
146 <option value="EHQ">EHQ - Haplotype quality based on EAF assumption</option>
147 <option value="GQ">GQ - Genotype quality</option>
148 <option value="DP">DP - Total read depth</option>
149 <option value="AD">AD - Allelic depths</option>
150 <option value="CGA_RDP">CGA_RDP - Read depth in reference</option>
151 <option value="CGA_ODP">CGA_ODP - Other total read depth: somatic comparison</option>
152 <option value="CGA_OAD">CGA_OAD - Other allelic depths: somatic comparison</option>
153 <option value="CGA_ORDP">CGA_ORDP - Other reference depth: somatic comparison </option>
154 <option value="CGA_SOMC">CGA_SOMC - Somatic Category</option>
155 <option value="CGA_SOMR">CGA_SOMR - Somatic Rank</option>
156 <option value="CGA_SOMS">CGA_SOMS - Somatic Score</option>
157 <option value="CGA_GP">CGA_GP - Normalized mean GC corrected coverage</option>
158 <option value="CGA_NP">CGA_NP - Normalized mean coverage for 2k window</option>
159 <option value="CGA_CP">CGA_CP - Diploid-model ploidy call for segment including this interval</option>
160 <option value="CGA_PS">CGA_PS - Diploid-model called ploidy score</option>
161 <option value="CGA_CT">CGA_CT - Diploid-model CNV type</option>
162 <option value="CGA_TS">CGA_TS - Diploid-model CNV type score</option>
163 <option value="CGA_CL">CGA_CL - Nondiploid-model called level</option>
164 <option value="CGA_LS">CGA_LS - Nondiploid-model called level score</option>
165 <option value="CGA_SCL">CGA_SCL - Nondiploid-model somatic called level</option>
166 <option value="CGA_SLS">CGA_SLS - Non-diploid-model somatic called level score</option>
167 <option value="CGA_LAF">CGA_LAF - Lesser Allele Fraction estimate, 100k window</option>
168 <option value="CGA_LLAF">CGA_LLAF - Lesser Allele Fraction lower bound, 100k window</option>
169 <option value="CGA_ULAF">CGA_ULAF - Lesser Allele Fraction upper bound, 100k window</option>
170 <option value="SVTYPE">SVTYPE - Type of structural variation</option>
171 <option value="CGA_BF">CGA_BF - Frequency in set of baseline genomes</option>
172 <option value="CGA_MEDEL">CGA_MEDEL - Mobile element deletion</option>
173 <option value="MATEID">MATEID - ID of mate breakend</option>
174 <option value="CGA_BNDG">CGA_BNDG - Transcript name and strand of genes containing breakend</option>
175 <option value="CGA_BNDGO">CGA_BNDGO - Transcript name and strand of genes containing mate breakend</option>
176 <option value="CGA_BNDP">CGA_BNDP - Precision of breakend</option>
177 <option value="CGA_BNDMPC">CGA_BNDMPC - Mate pair count supporting a breakend</option>
178 <option value="CGA_BNDPOS">CGA_BNDPOS - Position of breakend as detected in individual genome</option>
179 <option value="CGA_BNDDEF">CGA_BNDDEF - Breakend definition in individual genome</option>
180 <option value="CGA_IS">CGA_IS - Measure of confidence that there is a mobile element insertion</option>
181 <option value="CGA_IDC">CGA_IDC - Count of paired ends consistently indicating a mobile element insertion</option>
182 <option value="CGA_IDCL">CGA_IDCL - Count of paired ends indicating a mobile element insertion anchored 5&#39;</option>
183 <option value="CGA_IDCR">CGA_IDCR - Count of paired ends indicating a mobile element insertion anchored 3&#39;</option>
184 <option value="CGA_RDC">CGA_RDC - Count of paired ends supporting the presence of a reference allele</option>
185 <option value="CGA_NBET">CGA_NBET - Next-best estimate of type of MEI</option>
186 <option value="CGA_ETS">CGA_ETS - Measure of confidence that the ElementType (MEINFO:NAME) is correct</option>
187 <option value="CGA_KES">CGA_KES - Fraction of known MEI with at least as good an InsertionScore</option>
188 </param>
189 </when>
190
191 <when value="masterVar">
192 <!--conditional to select inputs-->
193 <conditional name="data_sources">
194 <param name="data_source" type="select" label="Where is the input file?">
195 <option value="in" selected="true">imported into Galaxy</option>
196 <option value="out">located outside Galaxy (data on server or mounted drive)</option>
197 </param>
198
199 <when value="in">
200 <!--form field to select mastervar files-->
201 <repeat name="files" title="MasterVar file" min="1" max="1">
202 <param name="input" type="data" format="cg_mastervar" label="Dataset">
203 <validator type="dataset_ok_validator" />
204 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
205 metadata_name="dbkey" metadata_column="1"
206 message="cgatools is not currently available for this build."/>
207 </param>
208 </repeat>
209 </when>
210
211 <when value="out">
212 <!--form field to enter input file-->
213 <param name="input" type="text" label="Genome root directory or masterVar file" size="200" help="Enter full path /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01), or /path/masterVarfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01/ASM/masterVarBeta-GS00000YYYY-ASM.tsv.bz2).">
214 <validator type="empty_field" message="You must supply the genome root directory or masterVar file"/>
215 </param>
216 </when>
217 </conditional>
218
219 <!--form field to select no-calls-->
220 <param name="nocalls" type="select" label="Include no-calls?">
221 <option value="" selected="true">no</option>
222 <option value="--nocalls">yes</option>
223 </param>
224
225 <!--form field to enter calibration directory-->
226 <param name="calibration" type="text" size="300" label="Directory calibration data (/path/calibration-root)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v2.tgz"/>
227
228 <!--form field to select field names to include in vcf-->
229 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
230 <option value="all" selected="true">-- all (default) --</option>
231 <option value="NS">NS - Number of samples</option>
232 <option value="AN">AN - Total number of alleles in called genotypes</option>
233 <option value="AC">AC - Allele count in genotypes</option>
234 <option value="CGA_XR">CGA_XR - External database reference</option>
235 <option value="CGA_FI">CGA_FI - Functional impact</option>
236 <option value="CGA_PFAM">CGA_PFAM - PFAM domain </option>
237 <option value="CGA_MIRB">CGA_MIRB - miRBaseId</option>
238 <option value="CGA_SDO">CGA_SDO - Depth of overlapping segmental duplications</option>
239 <option value="CGA_RPT">CGA_RPT - Overlapping repeatMasker annotations</option>
240 <option value="GT">GT - Genotype</option>
241 <option value="PS">PS - Phase set</option>
242 <option value="FT">FT - Sample genotype filters</option>
243 <option value="GL">GL - Genotype likelihoods</option>
244 <option value="CGA_CEHQ">CGA_CEHQ - Calibrated haplotype quality based on EAF assumption</option>
245 <option value="CGA_CEGL">CGA_CEGL - Genotype likelihoods based on CEHQ</option>
246 <option value="SS">SS - Somatic status</option>
247 <option value="HQ">HQ - Haplotype quality</option>
248 <option value="EHQ">EHQ - Haplotype quality based on EAF assumption</option>
249 <option value="GQ">GQ - Genotype quality</option>
250 <option value="DP">DP - Total read depth</option>
251 <option value="AD">AD - Allelic depths</option>
252 <option value="CGA_RDP">CGA_RDP - Read depth in reference</option>
253 <option value="CGA_ODP">CGA_ODP - Other total read depth: somatic comparison</option>
254 <option value="CGA_OAD">CGA_OAD - Other allelic depths: somatic comparison</option>
255 <option value="CGA_ORDP">CGA_ORDP - Other reference depth: somatic comparison </option>
256 <option value="CGA_SOMC">CGA_SOMC - Somatic Category</option>
257 <option value="CGA_SOMR">CGA_SOMR - Somatic Rank</option>
258 <option value="CGA_SOMS">CGA_SOMS - Somatic Score</option>
259 </param>
260 </when>
261
262 <when value="CNV">
263 <!--conditional to select inputs-->
264 <conditional name="data_sources">
265 <param name="data_source" type="select" label="Where are the input files?">
266 <option value="out" selected="true">located outside Galaxy (data on server or mounted drive)</option>
267 </param>
268
269 <when value="out">
270 <!--form field to enter input file-->
271 <param name="input" type="text" label="Genome root directory" size="200" help="Enter full path /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01).">
272 <validator type="empty_field" message="You must supply the genome root directory"/>
273 </param>
274 </when>
275 </conditional>
276
277 <!--form field to select field names to include in vcf-->
278 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
279 <option value="all" selected="true">-- all (default) --</option>
280 <option value="GT">GT - Genotype</option>
281 <option value="CGA_GP">CGA_GP - Normalized mean GC corrected coverage</option>
282 <option value="CGA_NP">CGA_NP - Normalized mean coverage for 2k window</option>
283 <option value="CGA_CP">CGA_CP - Diploid-model ploidy call for segment</option>
284 <option value="CGA_PS">CGA_PS - Diploid-model called ploidy score</option>
285 <option value="CGA_CT">CGA_CT - Diploid-model CNV type</option>
286 <option value="CGA_TS">CGA_TS - Diploid-model CNV type score</option>
287 <option value="CGA_CL">CGA_CL - Nondiploid-model called level</option>
288 <option value="CGA_LS">CGA_LS - Nondiploid-model called level score</option>
289 <option value="CGA_SCL">CGA_SCL - Nondiploid-model somatic called level</option>
290 <option value="CGA_SLS">CGA_SLS - Non-diploid-model somatic called level score</option>
291 <option value="CGA_LAF">CGA_LAF - Lesser Allele Fraction estimate, 100k window</option>
292 <option value="CGA_LLAF">CGA_LLAF - Lesser Allele Fraction lower bound, 100k window</option>
293 <option value="CGA_ULAF">CGA_ULAF - Lesser Allele Fraction upper bound, 100k window</option>
294 </param>
295 </when>
296
297 <when value="SV">
298 <!--conditional to select inputs-->
299 <conditional name="data_sources">
300 <param name="data_source" type="select" label="Where are the input files?">
301 <option value="in" selected="true">imported into Galaxy</option>
302 <option value="out">located outside Galaxy (data on server or mounted drive)</option>
303 </param>
304
305 <when value="in">
306 <!--form field to select SV file-->
307 <repeat name="files" title="SV file" min="1" max="1">
308 <param name="input" type="data" format="tabular" label="Dataset">
309 <validator type="dataset_ok_validator" />
310 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
311 metadata_name="dbkey" metadata_column="1"
312 message="cgatools is not currently available for this build."/>
313 </param>
314 </repeat>
315 </when>
316
317 <when value="out">
318 <!--form field to enter input file-->
319 <param name="input" type="text" label="Genome root directory or SV file" size="200" help="Enter full path /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01), or /path/SVfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01/ASM/SV/allJunctionsBeta-GS00000YYYY-ASM.tsv).">
320 <validator type="empty_field" message="You must supply the genome root directory or SV file"/>
321 </param>
322 </when>
323 </conditional>
324
325 <!--form fields junction threshold options-->
326 <param name="jctscore" type="integer" value="10" label="Junction score thresholds (discordant mate pair count) (default 10)">
327 <validator type="empty_field" message="You must enter a value, for the default value enter 10" />
328 </param>
329 <param name="jctside" type="integer" value="70" label="Junction side length threshold (default 70)">
330 <validator type="empty_field" message="You must enter a value, for the default value enter 70" />
331 </param>
332 <param name="jctdistance" type="integer" value="200" label="Distance tolerance for junction compatibility (default 200)">
333 <validator type="empty_field" message="You must enter a value, for the default value enter 200" />
334 </param>
335 <param name="jctlength" type="integer" value="500" label="Length threshold for compatible junctions (default 500)">
336 <validator type="empty_field" message="You must enter a value, for the default value enter 500" />
337 </param>
338
339 <!--form field to select junction confidence in tumors-->
340 <param name="jctpriority" type="select" label="Use normal junction priority for vcf output?">
341 <option value="" selected="true">no</option>
342 <option value="--jctpriority">yes</option>
343 </param>
344
345 <!--form field to select junction confidence in tumors-->
346 <param name="jcttumor" type="select" label="Use high confidence junctions for tumors?">
347 <option value="" selected="true">no</option>
348 <option value="--jcttumor">yes</option>
349 </param>
350
351 <!--form field to select field names to include in vcf-->
352 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
353 <option value="all" selected="true">-- all (default) --</option>
354 <option value="GT">GT - Genotype</option>
355 <option value="FT">FT - Sample genotype filters</option>
356 <option value="NS">NS - Number of samples</option>
357 <option value="CGA_XR">CGA_XR - External database reference</option>
358 <option value="SVTYPE">SVTYPE - Type of structural variation</option>
359 <option value="CGA_BF">CGA_BF - Frequency in set of baseline genomes</option>
360 <option value="CGA_MEDEL">CGA_MEDEL - Mobile element deletion</option>
361 <option value="MATEID">MATEID - ID of mate breakend</option>
362 <option value="CGA_BNDG">CGA_BNDG - Transcript name and strand of genes containing breakend</option>
363 <option value="CGA_BNDGO">CGA_BNDGO - Transcript name and strand of genes containing mate breakend</option>
364 <option value="CGA_BNDP">CGA_BNDP - Precision of breakend</option>
365 <option value="CGA_BNDMPC">CGA_BNDMPC - Mate pair count supporting a breakend</option>
366 <option value="CGA_BNDPOS">CGA_BNDPOS - Position of breakend as detected in individual genome</option>
367 <option value="CGA_BNDDEF">CGA_BNDDEF - Breakend definition in individual genome</option>
368 </param>
369 </when>
370
371 <when value="MEI">
372 <!--conditional to select inputs-->
373 <conditional name="data_sources">
374 <param name="data_source" type="select" label="Where are the input files?">
375 <option value="out" selected="true">located outside Galaxy</option>
376 </param>
377
378 <when value="out">
379 <!--form field to select outside list of genome directories or mastervar files-->
380 <param name="input" type="text" label="Genome root directory" size="200" help="Enter full path /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01).">
381 <validator type="empty_field" message="You must supply the genome root directory"/>
382 </param>
383 </when>
384 </conditional>
385
386 <!--form field to select field names to include in vcf-->
387 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
388 <option value="all" selected="true">-- all (default) --</option>
389 <option value="GT">GT - Genotype</option>
390 <option value="FT">FT - Sample genotype filters</option>
391 <option value="CGA_IS">CGA_IS - Measure of confidence that there is a mobile element insertion</option>
392 <option value="CGA_IDC">CGA_IDC - Count of paired ends consistently indicating a mobile element insertion</option>
393 <option value="CGA_IDCL">CGA_IDCL - Count of paired ends indicating a mobile element insertion, anchored 5&#39;</option>
394 <option value="CGA_IDCR">CGA_IDCR - Count of paired ends indicating a mobile element insertion, anchored 3&#39;</option>
395 <option value="CGA_RDC">CGA_RDC - Count of paired ends supporting the presence of a reference allele</option>
396 <option value="CGA_NBET">CGA_NBET - Next-best estimate of type of MEI</option>
397 <option value="CGA_ETS">CGA_ETS - Measure of confidence that the ElementType (MEINFO:NAME) is correct</option>
398 <option value="CGA_KES">CGA_KES - Fraction of known MEI with at least as good an InsertionScore</option>
399 </param>
400 </when>
401
402 </conditional>
403 </when>
404
405 <when value="2">
406 <!--form field to select input sources-->
407 <conditional name="sources">
408 <param name="source" type="select" label="Data sources to be included for each genome">
409 <option value="masterVar,CNV,SV" selected="true">masterVar + CNV + SV</option>
410 <option value="masterVar">masterVar</option>
411 <option value="CNV">CNV</option>
412 <option value="SV">SV</option>
413 </param>
414
415 <when value="masterVar,CNV,SV">
416 <!--conditional to select inputs-->
417 <conditional name="data_sources">
418 <param name="data_source" type="select" label="Where are the input files?">
419 <option value="out" selected="true">located outside Galaxy (data on server or mounted drive)</option>
420 </param>
421
422 <when value="out">
423 <!--form field to enter input file-->
424 <param name="input" type="text" label="File with list of genome root directories" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01). For normal/tumor comparisons list the baseline genome first.">
425 <validator type="empty_field" message="You must supply the list of genome root directories"/>
426 </param>
427 </when>
428 </conditional>
429
430 <!--form field to select no-calls-->
431 <param name="nocalls" type="select" label="Include no-calls?">
432 <option value="" selected="true">no</option>
433 <option value="--nocalls">yes</option>
434 </param>
435
436 <!--form field to enter calibration directory-->
437 <param name="calibration" type="text" size="300" label="Directory calibration data (/path/calibration-root)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v2.tgz"/>
438
439 <!--form fields junction threshold options-->
440 <param name="jctscore" type="integer" value="10" label="Junction score thresholds (discordant mate pair count) (default 10)">
441 <validator type="empty_field" message="You must enter a value, for the default value enter 10" />
442 </param>
443 <param name="jctside" type="integer" value="70" label="Junction side length threshold (default 70)">
444 <validator type="empty_field" message="You must enter a value, for the default value enter 70" />
445 </param>
446 <param name="jctdistance" type="integer" value="200" label="Distance tolerance for junction compatibility (default 200)">
447 <validator type="empty_field" message="You must enter a value, for the default value enter 200" />
448 </param>
449 <param name="jctlength" type="integer" value="500" label="Length threshold for compatible junctions (default 500)">
450 <validator type="empty_field" message="You must enter a value, for the default value enter 500" />
451 </param>
452
453 <!--form field to select junction confidence in tumors-->
454 <param name="jctpriority" type="select" label="Use normal junction priority for vcf output?">
455 <option value="" selected="true">no</option>
456 <option value="--jctpriority">yes</option>
457 </param>
458
459 <!--form field to select junction confidence in tumors-->
460 <param name="jcttumor" type="select" label="Use high confidence junctions for tumors?">
461 <option value="" selected="true">no</option>
462 <option value="--jcttumor">yes</option>
463 </param>
464
465 <!--form field to select field names to include in vcf-->
466 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
467 <option value="all" selected="true">-- all (default) --</option>
468 <option value="NS">NS - Number of samples</option>
469 <option value="AN">AN - Total number of alleles in called genotypes</option>
470 <option value="AC">AC - Allele count in genotypes</option>
471 <option value="CGA_XR">CGA_XR - External database reference</option>
472 <option value="CGA_FI">CGA_FI - Functional impact</option>
473 <option value="CGA_PFAM">CGA_PFAM - PFAM domain </option>
474 <option value="CGA_MIRB">CGA_MIRB - miRBaseId</option>
475 <option value="CGA_SDO">CGA_SDO - Depth of overlapping segmental duplications</option>
476 <option value="CGA_RPT">CGA_RPT - Overlapping repeatMasker annotations</option>
477 <option value="GT">GT - Genotype</option>
478 <option value="PS">PS - Phase set</option>
479 <option value="FT">FT - Sample genotype filters</option>
480 <option value="GL">GL - Genotype likelihoods</option>
481 <option value="CGA_CEHQ">CGA_CEHQ - Calibrated haplotype quality based on EAF assumption</option>
482 <option value="CGA_CEGL">CGA_CEGL - Genotype likelihoods based on CEHQ</option>
483 <option value="SS">SS - Somatic status</option>
484 <option value="HQ">HQ - Haplotype quality</option>
485 <option value="EHQ">EHQ - Haplotype quality based on EAF assumption</option>
486 <option value="GQ">GQ - Genotype quality</option>
487 <option value="DP">DP - Total read depth</option>
488 <option value="AD">AD - Allelic depths</option>
489 <option value="CGA_RDP">CGA_RDP - Read depth in reference</option>
490 <option value="CGA_ODP">CGA_ODP - Other total read depth: somatic comparison</option>
491 <option value="CGA_OAD">CGA_OAD - Other allelic depths: somatic comparison</option>
492 <option value="CGA_ORDP">CGA_ORDP - Other reference depth: somatic comparison </option>
493 <option value="CGA_SOMC">CGA_SOMC - Somatic Category</option>
494 <option value="CGA_SOMR">CGA_SOMR - Somatic Rank</option>
495 <option value="CGA_SOMS">CGA_SOMS - Somatic Score</option>
496 <option value="CGA_GP">CGA_GP - Normalized mean GC corrected coverage</option>
497 <option value="CGA_NP">CGA_NP - Normalized mean coverage for 2k window</option>
498 <option value="CGA_CP">CGA_CP - Diploid-model ploidy call for segment</option>
499 <option value="CGA_PS">CGA_PS - Diploid-model called ploidy score</option>
500 <option value="CGA_CT">CGA_CT - Diploid-model CNV type</option>
501 <option value="CGA_TS">CGA_TS - Diploid-model CNV type score</option>
502 <option value="CGA_CL">CGA_CL - Nondiploid-model called level</option>
503 <option value="CGA_LS">CGA_LS - Nondiploid-model called level score</option>
504 <option value="CGA_SCL">CGA_SCL - Nondiploid-model somatic called level</option>
505 <option value="CGA_SLS">CGA_SLS - Non-diploid-model somatic called level score</option>
506 <option value="CGA_LAF">CGA_LAF - Lesser Allele Fraction estimate, 100k window</option>
507 <option value="CGA_LLAF">CGA_LLAF - Lesser Allele Fraction lower bound, 100k window</option>
508 <option value="CGA_ULAF">CGA_ULAF - Lesser Allele Fraction upper bound, 100k window</option>
509 <option value="SVTYPE">SVTYPE - Type of structural variation</option>
510 <option value="CGA_BF">CGA_BF - Frequency in set of baseline genomes</option>
511 <option value="CGA_MEDEL">CGA_MEDEL - Mobile element deletion</option>
512 <option value="MATEID">MATEID - ID of mate breakend</option>
513 <option value="CGA_BNDG">CGA_BNDG - Transcript name and strand of genes containing breakend</option>
514 <option value="CGA_BNDGO">CGA_BNDGO - Transcript name and strand of genes containing mate breakend</option>
515 <option value="CGA_BNDP">CGA_BNDP - Precision of breakend</option>
516 <option value="CGA_BNDMPC">CGA_BNDMPC - Mate pair count supporting a breakend</option>
517 <option value="CGA_BNDPOS">CGA_BNDPOS - Position of breakend as detected in individual genome</option>
518 <option value="CGA_BNDDEF">CGA_BNDDEF - Breakend definition in individual genome</option>
519 </param>
520 </when>
521
522 <when value="masterVar">
523 <!--conditional to select inputs-->
524 <conditional name="data_sources">
525 <param name="data_source" type="select" label="Where are the input files?">
526 <option value="in" selected="true">imported into Galaxy</option>
527 <option value="out">located outside Galaxy (data on server or mounted drive)</option>
528 </param>
529
530 <when value="in">
531 <!--form field to select input files-->
532 <repeat name="files" title="MasterVar file" min="1" max="2">
533 <param name="input" type="data" format="cg_mastervar" label="Dataset">
534 <validator type="dataset_ok_validator"/>
535 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
536 metadata_name="dbkey" metadata_column="1"
537 message="cgatools is not currently available for this build."/>
538 </param>
539 </repeat>
540 </when>
541
542 <when value="out">
543 <!--form field to enter input file-->
544 <param name="input" type="text" label="File with list of genome root directories or masterVar files" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01), or a list of masterVar files, one per line in the format /path/masterVarfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01/ASM/masterVarBeta-GS00000YYYY-ASM.tsv.bz2).">
545 <validator type="empty_field" message="You must supply the list of genome root directories or masterVar files"/>
546 </param>
547 </when>
548 </conditional>
549
550 <!--form field to select no-calls-->
551 <param name="nocalls" type="select" label="Include no-calls?">
552 <option value="" selected="true">no</option>
553 <option value="--nocalls">yes</option>
554 </param>
555
556 <!--form field to enter calibration directory-->
557 <param name="calibration" type="text" size="300" label="Directory calibration data (/path/calibration-root)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v2.tgz"/>
558
559 <!--form field to select field names to include in vcf-->
560 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
561 <option value="all" selected="true">-- all (default) --</option>
562 <option value="NS">NS - Number of samples</option>
563 <option value="AN">AN - Total number of alleles in called genotypes</option>
564 <option value="AC">AC - Allele count in genotypes</option>
565 <option value="CGA_XR">CGA_XR - External database reference</option>
566 <option value="CGA_FI">CGA_FI - Functional impact</option>
567 <option value="CGA_PFAM">CGA_PFAM - PFAM domain </option>
568 <option value="CGA_MIRB">CGA_MIRB - miRBaseId</option>
569 <option value="CGA_SDO">CGA_SDO - Depth of overlapping segmental duplications</option>
570 <option value="CGA_RPT">CGA_RPT - Overlapping repeatMasker annotations</option>
571 <option value="GT">GT - Genotype</option>
572 <option value="PS">PS - Phase set</option>
573 <option value="FT">FT - Sample genotype filters</option>
574 <option value="GL">GL - Genotype likelihoods</option>
575 <option value="CGA_CEHQ">CGA_CEHQ - Calibrated haplotype quality based on EAF assumption</option>
576 <option value="CGA_CEGL">CGA_CEGL - Genotype likelihoods based on CEHQ</option>
577 <option value="SS">SS - Somatic status</option>
578 <option value="HQ">HQ - Haplotype quality</option>
579 <option value="EHQ">EHQ - Haplotype quality based on EAF assumption</option>
580 <option value="GQ">GQ - Genotype quality</option>
581 <option value="DP">DP - Total read depth</option>
582 <option value="AD">AD - Allelic depths</option>
583 <option value="CGA_RDP">CGA_RDP - Read depth in reference</option>
584 <option value="CGA_ODP">CGA_ODP - Other total read depth: somatic comparison</option>
585 <option value="CGA_OAD">CGA_OAD - Other allelic depths: somatic comparison</option>
586 <option value="CGA_ORDP">CGA_ORDP - Other reference depth: somatic comparison </option>
587 <option value="CGA_SOMC">CGA_SOMC - Somatic Category</option>
588 <option value="CGA_SOMR">CGA_SOMR - Somatic Rank</option>
589 <option value="CGA_SOMS">CGA_SOMS - Somatic Score</option>
590 </param>
591 </when>
592
593 <when value="CNV">
594 <!--conditional to select inputs-->
595 <conditional name="data_sources">
596 <param name="data_source" type="select" label="Where are the input files?">
597 <option value="out" selected="true">located outside Galaxy (data on server or mounted drive)</option>
598 </param>
599
600 <when value="out">
601 <!--form field to enter input file-->
602 <param name="input" type="text" label="File with list of genome root directories" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01).">
603 <validator type="empty_field" message="You must supply the list of genome root directories"/>
604 </param>
605 </when>
606 </conditional>
607
608 <!--form field to select field names to include in vcf-->
609 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
610 <option value="all" selected="true">-- all (default) --</option>
611 <option value="GT">GT - Genotype</option>
612 <option value="CGA_GP">CGA_GP - Normalized mean GC corrected coverage</option>
613 <option value="CGA_NP">CGA_NP - Normalized mean coverage for 2k window</option>
614 <option value="CGA_CP">CGA_CP - Diploid-model ploidy call for segment</option>
615 <option value="CGA_PS">CGA_PS - Diploid-model called ploidy score</option>
616 <option value="CGA_CT">CGA_CT - Diploid-model CNV type</option>
617 <option value="CGA_TS">CGA_TS - Diploid-model CNV type score</option>
618 <option value="CGA_CL">CGA_CL - Nondiploid-model called level</option>
619 <option value="CGA_LS">CGA_LS - Nondiploid-model called level score</option>
620 <option value="CGA_SCL">CGA_SCL - Nondiploid-model somatic called level</option>
621 <option value="CGA_SLS">CGA_SLS - Non-diploid-model somatic called level score</option>
622 <option value="CGA_LAF">CGA_LAF - Lesser Allele Fraction estimate, 100k window</option>
623 <option value="CGA_LLAF">CGA_LLAF - Lesser Allele Fraction lower bound, 100k window</option>
624 <option value="CGA_ULAF">CGA_ULAF - Lesser Allele Fraction upper bound, 100k window</option>
625 </param>
626 </when>
627
628 <when value="SV">
629 <!--conditional to select inputs-->
630 <conditional name="data_sources">
631 <param name="data_source" type="select" label="Where are the input files?">
632 <option value="in" selected="true">imported into Galaxy</option>
633 <option value="out">located outside Galaxy (data on server or mounted drive)</option>
634 </param>
635
636 <when value="in">
637 <!--form field to select mastervar files-->
638 <repeat name="files" title="SV files" min="1" max="2">
639 <param name="input" type="data" format="tabular" label="Dataset">
640 <validator type="dataset_ok_validator" />
641 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
642 metadata_name="dbkey" metadata_column="1"
643 message="cgatools is not currently available for this build."/>
644 </param>
645 </repeat>
646 </when>
647
648 <when value="out">
649 <!--form field to enter input file-->
650 <param name="input" type="text" label="File with list of genome root directories or SV files" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01), or a list of SV files, one per line in the format /path/SVfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01/ASM/SV/allJunctionsBeta-GS00000YYYY-ASM.tsv).">
651 <validator type="empty_field" message="You must supply the list of genome root directories or SV files"/>
652 </param>
653 </when>
654 </conditional>
655
656 <!--form fields junction threshold options-->
657 <param name="jctscore" type="integer" value="10" label="Junction score thresholds (discordant mate pair count) (default 10)">
658 <validator type="empty_field" message="You must enter a value, for the default value enter 10" />
659 </param>
660 <param name="jctside" type="integer" value="70" label="Junction side length threshold (default 70)">
661 <validator type="empty_field" message="You must enter a value, for the default value enter 70" />
662 </param>
663 <param name="jctdistance" type="integer" value="200" label="Distance tolerance for junction compatibility (default 200)">
664 <validator type="empty_field" message="You must enter a value, for the default value enter 200" />
665 </param>
666 <param name="jctlength" type="integer" value="500" label="Length threshold for compatible junctions (default 500)">
667 <validator type="empty_field" message="You must enter a value, for the default value enter 500" />
668 </param>
669
670 <!--form field to select junction confidence in tumors-->
671 <param name="jctpriority" type="select" label="Use normal junction priority for vcf output?">
672 <option value="" selected="true">no</option>
673 <option value="--jctpriority">yes</option>
674 </param>
675
676 <!--form field to select junction confidence in tumors-->
677 <param name="jcttumor" type="select" label="Use high confidence junctions for tumors?">
678 <option value="" selected="true">no</option>
679 <option value="--jcttumor">yes</option>
680 </param>
681
682 <!--form field to select field names to include in vcf-->
683 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
684 <option value="all" selected="true">-- all (default) --</option>
685 <option value="GT">GT - Genotype</option>
686 <option value="FT">FT - Sample genotype filters</option>
687 <option value="SVTYPE">SVTYPE - Type of structural variation</option>
688 <option value="CGA_BF">CGA_BF - Frequency in set of baseline genomes</option>
689 <option value="CGA_MEDEL">CGA_MEDEL - Mobile element deletion</option>
690 <option value="MATEID">MATEID - ID of mate breakend</option>
691 <option value="CGA_BNDG">CGA_BNDG - Transcript name and strand of genes containing breakend</option>
692 <option value="CGA_BNDGO">CGA_BNDGO - Transcript name and strand of genes containing mate breakend</option>
693 <option value="CGA_BNDP">CGA_BNDP - Precision of breakend</option>
694 <option value="CGA_BNDMPC">CGA_BNDMPC - Mate pair count supporting a breakend</option>
695 <option value="CGA_BNDPOS">CGA_BNDPOS - Position of breakend as detected in individual genome</option>
696 <option value="CGA_BNDDEF">CGA_BNDDEF - Breakend definition in individual genome</option>
697 </param>
698 </when>
699 </conditional>
700 </when>
701
702 <when value="3">
703 <!--form field to select input sources-->
704 <conditional name="sources">
705 <param name="source" type="select" label="Data sources to be included for each genome">
706 <option value="masterVar,CNV" selected="true">masterVar + CNV</option>
707 <option value="masterVar">masterVar</option>
708 <option value="CNV">CNV</option>
709 </param>
710
711 <when value="masterVar,CNV">
712 <!--conditional to select inputs-->
713 <conditional name="data_sources">
714 <param name="data_source" type="select" label="Where are the input files?">
715 <option value="out" selected="true">located outside Galaxy (data on server or mounted drive)</option>
716 </param>
717
718 <when value="out">
719 <!--form field to select outside list of genome directories or mastervar files-->
720 <param name="input" type="text" label="File with list of genome root directories" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01).">
721 <validator type="empty_field" message="You must supply the list of genome root directories"/>
722 </param>
723 </when>
724 </conditional>
725
726 <!--form field to select no-calls-->
727 <param name="nocalls" type="select" label="Include no-calls?">
728 <option value="" selected="true">no</option>
729 <option value="--nocalls">yes</option>
730 </param>
731
732 <!--form field to enter calibration directory-->
733 <param name="calibration" type="text" size="300" label="Directory calibration data (/path/calibration-root)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v2.tgz"/>
734
735 <!--form field to select field names to include in vcf-->
736 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
737 <option value="all" selected="true">-- all (default) --</option>
738 <option value="NS">NS - Number of samples</option>
739 <option value="AN">AN - Total number of alleles in called genotypes</option>
740 <option value="AC">AC - Allele count in genotypes</option>
741 <option value="CGA_XR">CGA_XR - External database reference</option>
742 <option value="CGA_FI">CGA_FI - Functional impact</option>
743 <option value="CGA_PFAM">CGA_PFAM - PFAM domain </option>
744 <option value="CGA_MIRB">CGA_MIRB - miRBaseId</option>
745 <option value="CGA_SDO">CGA_SDO - Depth of overlapping segmental duplications</option>
746 <option value="CGA_RPT">CGA_RPT - Overlapping repeatMasker annotations</option>
747 <option value="GT">GT - Genotype</option>
748 <option value="PS">PS - Phase set</option>
749 <option value="FT">FT - Sample genotype filters</option>
750 <option value="GL">GL - Genotype likelihoods</option>
751 <option value="CGA_CEHQ">CGA_CEHQ - Calibrated haplotype quality based on EAF assumption</option>
752 <option value="CGA_CEGL">CGA_CEGL - Genotype likelihoods based on CEHQ</option>
753 <option value="SS">SS - Somatic status</option>
754 <option value="HQ">HQ - Haplotype quality</option>
755 <option value="EHQ">EHQ - Haplotype quality based on EAF assumption</option>
756 <option value="GQ">GQ - Genotype quality</option>
757 <option value="DP">DP - Total read depth</option>
758 <option value="AD">AD - Allelic depths</option>
759 <option value="CGA_RDP">CGA_RDP - Read depth in reference</option>
760 <option value="CGA_ODP">CGA_ODP - Other total read depth: somatic comparison</option>
761 <option value="CGA_OAD">CGA_OAD - Other allelic depths: somatic comparison</option>
762 <option value="CGA_ORDP">CGA_ORDP - Other reference depth: somatic comparison </option>
763 <option value="CGA_SOMC">CGA_SOMC - Somatic Category</option>
764 <option value="CGA_SOMR">CGA_SOMR - Somatic Rank</option>
765 <option value="CGA_SOMS">CGA_SOMS - Somatic Score</option>
766 <option value="CGA_GP">CGA_GP - Normalized mean GC corrected coverage</option>
767 <option value="CGA_NP">CGA_NP - Normalized mean coverage for 2k window</option>
768 <option value="CGA_CP">CGA_CP - Diploid-model ploidy call for segment</option>
769 <option value="CGA_PS">CGA_PS - Diploid-model called ploidy score</option>
770 <option value="CGA_CT">CGA_CT - Diploid-model CNV type</option>
771 <option value="CGA_TS">CGA_TS - Diploid-model CNV type score</option>
772 <option value="CGA_CL">CGA_CL - Nondiploid-model called level</option>
773 <option value="CGA_LS">CGA_LS - Nondiploid-model called level score</option>
774 <option value="CGA_SCL">CGA_SCL - Nondiploid-model somatic called level</option>
775 <option value="CGA_SLS">CGA_SLS - Non-diploid-model somatic called level score</option>
776 <option value="CGA_LAF">CGA_LAF - Lesser Allele Fraction estimate, 100k window</option>
777 <option value="CGA_LLAF">CGA_LLAF - Lesser Allele Fraction lower bound, 100k window</option>
778 <option value="CGA_ULAF">CGA_ULAF - Lesser Allele Fraction upper bound, 100k window</option>
779 </param>
780 </when>
781
782 <when value="masterVar">
783 <!--conditional to select inputs-->
784 <conditional name="data_sources">
785 <param name="data_source" type="select" label="Where are the input files?">
786 <option value="in" selected="true">imported into Galaxy</option>
787 <option value="out">located outside Galaxy (data on server or mounted drive)</option>
788 </param>
789
790 <when value="in">
791 <!--form field to select mastervar files-->
792 <repeat name="files" title="MasterVar files" min="1">
793 <param name="input" type="data" format="cg_mastervar" label="Dataset">
794 <validator type="dataset_ok_validator" />
795 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
796 metadata_name="dbkey" metadata_column="1"
797 message="cgatools is not currently available for this build."/>
798 </param>
799 </repeat>
800 </when>
801
802 <when value="out">
803 <!--form field to enter input file-->
804 <param name="input" type="text" label="File with list of genome root directories or masterVar files" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01), or a list of masterVar files, one per line in the format /path/masterVarfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01/ASM/masterVarBeta-GS00000YYYY-ASM.tsv.bz2).">
805 <validator type="empty_field" message="You must supply the list of genome root directories or masterVar files"/>
806 </param>
807 </when>
808 </conditional>
809
810 <!--form field to select no-calls-->
811 <param name="nocalls" type="select" label="Include no-calls?">
812 <option value="" selected="true">no</option>
813 <option value="--nocalls">yes</option>
814 </param>
815
816 <!--form field to enter calibration directory-->
817 <param name="calibration" type="text" size="300" label="Directory calibration data (/path/calibration-root)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v2.tgz"/>
818
819 <!--form field to select field names to include in vcf-->
820 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
821 <option value="all" selected="true">-- all (default) --</option>
822 <option value="NS">NS - Number of samples</option>
823 <option value="AN">AN - Total number of alleles in called genotypes</option>
824 <option value="AC">AC - Allele count in genotypes</option>
825 <option value="CGA_XR">CGA_XR - External database reference</option>
826 <option value="CGA_FI">CGA_FI - Functional impact</option>
827 <option value="CGA_PFAM">CGA_PFAM - PFAM domain </option>
828 <option value="CGA_MIRB">CGA_MIRB - miRBaseId</option>
829 <option value="CGA_SDO">CGA_SDO - Depth of overlapping segmental duplications</option>
830 <option value="CGA_RPT">CGA_RPT - Overlapping repeatMasker annotations</option>
831 <option value="GT">GT - Genotype</option>
832 <option value="PS">PS - Phase set</option>
833 <option value="FT">FT - Sample genotype filters</option>
834 <option value="GL">GL - Genotype likelihoods</option>
835 <option value="CGA_CEHQ">CGA_CEHQ - Calibrated haplotype quality based on EAF assumption</option>
836 <option value="CGA_CEGL">CGA_CEGL - Genotype likelihoods based on CEHQ</option>
837 <option value="SS">SS - Somatic status</option>
838 <option value="HQ">HQ - Haplotype quality</option>
839 <option value="EHQ">EHQ - Haplotype quality based on EAF assumption</option>
840 <option value="GQ">GQ - Genotype quality</option>
841 <option value="DP">DP - Total read depth</option>
842 <option value="AD">AD - Allelic depths</option>
843 <option value="CGA_RDP">CGA_RDP - Read depth in reference</option>
844 <option value="CGA_ODP">CGA_ODP - Other total read depth: somatic comparison</option>
845 <option value="CGA_OAD">CGA_OAD - Other allelic depths: somatic comparison</option>
846 <option value="CGA_ORDP">CGA_ORDP - Other reference depth: somatic comparison </option>
847 <option value="CGA_SOMC">CGA_SOMC - Somatic Category</option>
848 <option value="CGA_SOMR">CGA_SOMR - Somatic Rank</option>
849 <option value="CGA_SOMS">CGA_SOMS - Somatic Score</option>
850 </param>
851 </when>
852
853 <when value="CNV">
854 <!--conditional to select inputs-->
855 <conditional name="data_sources">
856 <param name="data_source" type="select" label="Where are the input files?">
857 <option value="out" selected="true">located outside Galaxy (data on server or mounted drive)</option>
858 </param>
859
860 <when value="out">
861 <!--form field to enter input file-->
862 <param name="input" type="text" label="File with list of genome root directories" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01).">
863 <validator type="empty_field" message="You must supply the list of genome root directories"/>
864 </param>
865 </when>
866 </conditional>
867
868 <!--form field to select field names to include in vcf-->
869 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
870 <option value="all" selected="true">-- all (default) --</option>
871 <option value="GT">GT - Genotype</option>
872 <option value="CGA_GP">CGA_GP - Normalized mean GC corrected coverage</option>
873 <option value="CGA_NP">CGA_NP - Normalized mean coverage for 2k window</option>
874 <option value="CGA_CP">CGA_CP - Diploid-model ploidy call for segment</option>
875 <option value="CGA_PS">CGA_PS - Diploid-model called ploidy score</option>
876 <option value="CGA_CT">CGA_CT - Diploid-model CNV type</option>
877 <option value="CGA_TS">CGA_TS - Diploid-model CNV type score</option>
878 <option value="CGA_CL">CGA_CL - Nondiploid-model called level</option>
879 <option value="CGA_LS">CGA_LS - Nondiploid-model called level score</option>
880 <option value="CGA_SCL">CGA_SCL - Nondiploid-model somatic called level</option>
881 <option value="CGA_SLS">CGA_SLS - Non-diploid-model somatic called level score</option>
882 <option value="CGA_LAF">CGA_LAF - Lesser Allele Fraction estimate, 100k window</option>
883 <option value="CGA_LLAF">CGA_LLAF - Lesser Allele Fraction lower bound, 100k window</option>
884 <option value="CGA_ULAF">CGA_ULAF - Lesser Allele Fraction upper bound, 100k window</option>
885 </param>
886 </when>
887 </conditional>
888 </when>
889 </conditional>
890 </inputs>
891
892 <help>
893
894 **What it does**
895
896 This tool uses cgatools mkvcf to convert Complete Genomics masterVar files, including CNV, SV and/or MEI data, to vcf format version.
897
898 **cgatools 1.6.0 Documentation**
899
900 Userguide: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-user-guide.pdf
901
902 Release notes: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-release-notes.pdf
903
904 **Command line reference**::
905
906 COMMAND NAME
907 mkvcf - Converts var file(s) or masterVar file(s) to VCF.
908
909 DESCRIPTION
910 Converts var file(s) or masterVar file(s) to VCF.
911
912 OPTIONS
913 -h [ --help ]
914 Print this help message.
915
916 --beta
917 This is a beta command. To run this command, you must pass the --beta
918 flag.
919
920 --reference arg
921 The reference crr file.
922
923 --output arg (=STDOUT)
924 The output file (may be omitted for stdout).
925
926 --field-names arg (=GT,PS,NS,AN,AC,SS,FT,CGA_XR,CGA_FI,GQ,HQ,EHQ,CGA_CEHQ,GL,
927 CGA_CEGL,DP,AD,CGA_RDP,CGA_ODP,CGA_OAD,CGA_ORDP,CGA_PFAM,CGA_MIRB,CGA_RPT,
928 CGA_SDO,CGA_SOMC,CGA_SOMR,CGA_SOMS,CGA_GP,CGA_NP,CGA_CP,CGA_PS,CGA_CT,
929 CGA_TS,CGA_CL,CGA_LS,CGA_SCL,CGA_SLS,CGA_LAF,CGA_LLAF,CGA_ULAF,CGA_IS,
930 CGA_IDC,CGA_IDCL,CGA_IDCR,CGA_RDC,CGA_NBET,CGA_ETS,CGA_KES,CGA_BF,
931 CGA_MEDEL,MATEID,SVTYPE,CGA_BNDG,CGA_BNDGO,CGA_BNDMPC,CGA_BNDPOS,CGA_BNDDEF,
932 CGA_BNDP)
933 Comma-separated list of field names. By default, all fields are
934 included, but you may override this option to ensure only a subset of
935 the fields is included in the VCF output. For a description of each
936 field, see the cgatools user guide.
937
938 --source-names arg (=masterVar,CNV,SV,MEI)
939 Comma-separated list of source names. The following source names are
940 available:
941 masterVar - Includes records extracted from the masterVar file.
942 CNV - Includes CNV-related records.
943 SV - Includes records derived from junctions files.
944 MEI - Includes records describing mobile element insertions.
945 Some of these source types are only available for more recent pipeline
946 versions, and some of these source types do not support multi-genome
947 VCFs. For more information about which source types are available for
948 which versions of the Complete Genomics pipeline software, see the
949 cgatools user guide.
950
951 --genome-root arg
952 For each genome to include in the VCF, the genome root directory, for
953 example /data/GS00118-DNA_A01; this directory is expected to contain
954 the ASM and LIB subdirectories, for example. You must supply this
955 option for each genome in the VCF, unless you are using
956 --source-names=masterVar and you have specified the --master-var option
957 for each genome in the VCF.
958
959 --master-var arg
960 For each genome to include in the VCF, the masterVar file. If
961 genome-roots parameter is given, this parameter defaults to the
962 masterVar in the given genome-root.
963
964 --include-no-calls
965 Small variants VCF records include loci that have no
966 reference-inconsistent calls.
967
968 --calibration-root arg
969 The directory containing calibration data. For example, there should
970 exist a file calibration-root/version0.0.0/metrics.tsv. This option is only
971 required if CGA_CEHQ or CGA_CEGL are included in the --field-names
972 parameter.
973
974 --junction-file arg
975 For each genome to include in the VCF, the junctions file. If
976 genome-roots parameter is given, this parameter defaults to the
977 respective junctions file in the export directory.
978
979 --junction-score-threshold arg (=10)
980 Junction score thresholds (discordant mate pair count).
981
982 --junction-side-length-threshold arg (=70)
983 Junction side length threshold.
984
985 --junction-distance-tolerance arg (=200)
986 Distance tolerance for junction compatibility.
987
988 --junction-length-threshold arg (=500)
989 Length threshold for compatible junctions.
990
991 --junction-normal-priority
992 Normal junction priority for vcf output.
993
994 --junction-tumor-hc
995 use high confidence junctions for tumors.
996
997
998 SUPPORTED FORMAT_VERSION
999 0.3 or later
1000 </help>
1001 </tool>