0
|
1 <tool id="cg_mkvcf" name="mkvcf(beta) 1.6" version="1.0.0">
|
|
2 <!--
|
|
3 This tool creates a GUI for the mkvcf function of cgatools from Complete Genomics, Inc.
|
|
4 written 7-31-2012 by bcrain@completegenomics.com
|
|
5 -->
|
|
6
|
|
7 <description>converts to vcf</description>
|
|
8
|
|
9 <command interpreter="perl">
|
|
10 <!--run wrapper script-->
|
|
11 mkvcf_wrapper.pl
|
|
12 --reference $crr.fields.path
|
|
13 --output $output
|
|
14 --genomes $count.genomes
|
|
15 --source $count.sources.source
|
|
16 --datasource $count.sources.data_sources.data_source
|
|
17 #if $count.sources.data_sources.data_source=="in"
|
|
18 #for $m in $count.sources.data_sources.files
|
|
19 --input $m.input
|
|
20 #end for
|
|
21 #else
|
|
22 --input $count.sources.data_sources.input
|
|
23 #end if
|
|
24 #if $count.sources.source=="masterVar" or $count.sources.source=="masterVar,CNV"
|
|
25 $count.sources.nocalls
|
|
26 --calibration $count.sources.calibration
|
|
27 #else if $count.sources.source=="SV"
|
|
28 --jctscore $count.sources.jctscore
|
|
29 --jctside $count.sources.jctside
|
|
30 --jctdistance $count.sources.jctdistance
|
|
31 --jctlength $count.sources.jctlength
|
|
32 $count.sources.jctpriority
|
|
33 $count.sources.jcttumor
|
|
34 #else if $count.sources.source=="masterVar,CNV,SV" or $count.sources.source=="masterVar,CNV,SV,MEI"
|
|
35 $count.sources.nocalls
|
|
36 --calibration $count.sources.calibration
|
|
37 --jctscore $count.sources.jctscore
|
|
38 --jctside $count.sources.jctside
|
|
39 --jctdistance $count.sources.jctdistance
|
|
40 --jctlength $count.sources.jctlength
|
|
41 $count.sources.jctpriority
|
|
42 $count.sources.jcttumor
|
|
43 #end if
|
|
44 --fields $count.sources.fields
|
|
45 </command>
|
|
46
|
|
47 <outputs>
|
|
48 <data format="vcf" name="output" label="${tool.name} output"/>
|
|
49 </outputs>
|
|
50
|
|
51 <inputs>
|
|
52 <!--form field to select crr file-->
|
|
53 <param name="crr" type="select" label="Reference genome (.crr file)">
|
|
54 <options from_data_table="cg_crr_files" />
|
|
55 </param>
|
|
56
|
|
57 <!--select number of genomes - determines which input sources to show-->
|
|
58 <conditional name="count">
|
|
59 <param name="genomes" type="select" label="Select the number of genomes to add to the vcf file" help="Note: multi-genome vcfs (2 or more genomes) can only be generated for format version 2.0 and up">
|
|
60 <option value="1" selected="true">1 - allowed data sources are masterVar, CNV, SV, MEI</option>
|
|
61 <option value="2">2 - allowed data sources are masterVar, CNV, SV (format v2.x)</option>
|
|
62 <option value="3">3 or more - allowed data sources are masterVar, CNV (format v2.x)</option>
|
|
63 </param>
|
|
64
|
|
65 <when value="1">
|
|
66 <!--form field to select input sources-->
|
|
67 <conditional name="sources">
|
|
68 <param name="source" type="select" label="Data sources to be included for this genome">
|
|
69 <option value="masterVar,CNV,SV,MEI" selected="true">masterVar + CNV + SV + MEI</option>
|
|
70 <option value="masterVar">masterVar</option>
|
|
71 <option value="CNV">CNV</option>
|
|
72 <option value="SV">SV</option>
|
|
73 <option value="MEI">MEI</option>
|
|
74 </param>
|
|
75
|
|
76 <when value="masterVar,CNV,SV,MEI">
|
|
77 <!--conditional to select inputs-->
|
|
78 <conditional name="data_sources">
|
|
79 <param name="data_source" type="select" label="Where are the input files?">
|
|
80 <option value="out" selected="true">located outside Galaxy (data on server or mounted drive)</option>
|
|
81 </param>
|
|
82
|
|
83 <when value="out">
|
|
84 <!--form field to enter input file-->
|
|
85 <param name="input" type="text" label="Genome root directory" size="200" help="Enter full path /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01).">
|
|
86 <validator type="empty_field" message="You must supply the genome root directory"/>
|
|
87 </param>
|
|
88 </when>
|
|
89 </conditional>
|
|
90
|
|
91 <!--form field to select no-calls-->
|
|
92 <param name="nocalls" type="select" label="Include no-calls?">
|
|
93 <option value="" selected="true">no</option>
|
|
94 <option value="--nocalls">yes</option>
|
|
95 </param>
|
|
96
|
|
97 <!--form field to enter calibration directory-->
|
|
98 <param name="calibration" type="text" size="300" label="Directory calibration data (/path/calibration-root)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v2.tgz"/>
|
|
99
|
|
100 <!--form fields junction threshold options-->
|
|
101 <param name="jctscore" type="integer" value="10" label="Junction score thresholds (discordant mate pair count) (default 10)">
|
|
102 <validator type="empty_field" message="You must enter a value, for the default value enter 10" />
|
|
103 </param>
|
|
104 <param name="jctside" type="integer" value="70" label="Junction side length threshold (default 70)">
|
|
105 <validator type="empty_field" message="You must enter a value, for the default value enter 70" />
|
|
106 </param>
|
|
107 <param name="jctdistance" type="integer" value="200" label="Distance tolerance for junction compatibility (default 200)">
|
|
108 <validator type="empty_field" message="You must enter a value, for the default value enter 200" />
|
|
109 </param>
|
|
110 <param name="jctlength" type="integer" value="500" label="Length threshold for compatible junctions (default 500)">
|
|
111 <validator type="empty_field" message="You must enter a value, for the default value enter 500" />
|
|
112 </param>
|
|
113
|
|
114 <!--form field to select junction confidence in tumors-->
|
|
115 <param name="jctpriority" type="select" label="Use normal junction priority for vcf output?">
|
|
116 <option value="" selected="true">no</option>
|
|
117 <option value="--jctpriority">yes</option>
|
|
118 </param>
|
|
119
|
|
120 <!--form field to select junction confidence in tumors-->
|
|
121 <param name="jcttumor" type="select" label="Use high confidence junctions for tumors?">
|
|
122 <option value="" selected="true">no</option>
|
|
123 <option value="--jcttumor">yes</option>
|
|
124 </param>
|
|
125
|
|
126 <!--form field to select field names to include in vcf-->
|
|
127 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
|
|
128 <option value="all" selected="true">-- all (default) --</option>
|
|
129 <option value="NS">NS - Number of samples</option>
|
|
130 <option value="AN">AN - Total number of alleles in called genotypes</option>
|
|
131 <option value="AC">AC - Allele count in genotypes</option>
|
|
132 <option value="CGA_XR">CGA_XR - External database reference</option>
|
|
133 <option value="CGA_FI">CGA_FI - Functional impact</option>
|
|
134 <option value="CGA_PFAM">CGA_PFAM - PFAM domain </option>
|
|
135 <option value="CGA_MIRB">CGA_MIRB - miRBaseId</option>
|
|
136 <option value="CGA_SDO">CGA_SDO - Depth of overlapping segmental duplications</option>
|
|
137 <option value="CGA_RPT">CGA_RPT - Overlapping repeatMasker annotations</option>
|
|
138 <option value="GT">GT - Genotype</option>
|
|
139 <option value="PS">PS - Phase set</option>
|
|
140 <option value="FT">FT - Sample genotype filters</option>
|
|
141 <option value="GL">GL - Genotype likelihoods</option>
|
|
142 <option value="CGA_CEHQ">CGA_CEHQ - Calibrated haplotype quality based on EAF assumption</option>
|
|
143 <option value="CGA_CEGL">CGA_CEGL - Genotype likelihoods based on CEHQ</option>
|
|
144 <option value="SS">SS - Somatic status</option>
|
|
145 <option value="HQ">HQ - Haplotype quality</option>
|
|
146 <option value="EHQ">EHQ - Haplotype quality based on EAF assumption</option>
|
|
147 <option value="GQ">GQ - Genotype quality</option>
|
|
148 <option value="DP">DP - Total read depth</option>
|
|
149 <option value="AD">AD - Allelic depths</option>
|
|
150 <option value="CGA_RDP">CGA_RDP - Read depth in reference</option>
|
|
151 <option value="CGA_ODP">CGA_ODP - Other total read depth: somatic comparison</option>
|
|
152 <option value="CGA_OAD">CGA_OAD - Other allelic depths: somatic comparison</option>
|
|
153 <option value="CGA_ORDP">CGA_ORDP - Other reference depth: somatic comparison </option>
|
|
154 <option value="CGA_SOMC">CGA_SOMC - Somatic Category</option>
|
|
155 <option value="CGA_SOMR">CGA_SOMR - Somatic Rank</option>
|
|
156 <option value="CGA_SOMS">CGA_SOMS - Somatic Score</option>
|
|
157 <option value="CGA_GP">CGA_GP - Normalized mean GC corrected coverage</option>
|
|
158 <option value="CGA_NP">CGA_NP - Normalized mean coverage for 2k window</option>
|
|
159 <option value="CGA_CP">CGA_CP - Diploid-model ploidy call for segment including this interval</option>
|
|
160 <option value="CGA_PS">CGA_PS - Diploid-model called ploidy score</option>
|
|
161 <option value="CGA_CT">CGA_CT - Diploid-model CNV type</option>
|
|
162 <option value="CGA_TS">CGA_TS - Diploid-model CNV type score</option>
|
|
163 <option value="CGA_CL">CGA_CL - Nondiploid-model called level</option>
|
|
164 <option value="CGA_LS">CGA_LS - Nondiploid-model called level score</option>
|
|
165 <option value="CGA_SCL">CGA_SCL - Nondiploid-model somatic called level</option>
|
|
166 <option value="CGA_SLS">CGA_SLS - Non-diploid-model somatic called level score</option>
|
|
167 <option value="CGA_LAF">CGA_LAF - Lesser Allele Fraction estimate, 100k window</option>
|
|
168 <option value="CGA_LLAF">CGA_LLAF - Lesser Allele Fraction lower bound, 100k window</option>
|
|
169 <option value="CGA_ULAF">CGA_ULAF - Lesser Allele Fraction upper bound, 100k window</option>
|
|
170 <option value="SVTYPE">SVTYPE - Type of structural variation</option>
|
|
171 <option value="CGA_BF">CGA_BF - Frequency in set of baseline genomes</option>
|
|
172 <option value="CGA_MEDEL">CGA_MEDEL - Mobile element deletion</option>
|
|
173 <option value="MATEID">MATEID - ID of mate breakend</option>
|
|
174 <option value="CGA_BNDG">CGA_BNDG - Transcript name and strand of genes containing breakend</option>
|
|
175 <option value="CGA_BNDGO">CGA_BNDGO - Transcript name and strand of genes containing mate breakend</option>
|
|
176 <option value="CGA_BNDP">CGA_BNDP - Precision of breakend</option>
|
|
177 <option value="CGA_BNDMPC">CGA_BNDMPC - Mate pair count supporting a breakend</option>
|
|
178 <option value="CGA_BNDPOS">CGA_BNDPOS - Position of breakend as detected in individual genome</option>
|
|
179 <option value="CGA_BNDDEF">CGA_BNDDEF - Breakend definition in individual genome</option>
|
|
180 <option value="CGA_IS">CGA_IS - Measure of confidence that there is a mobile element insertion</option>
|
|
181 <option value="CGA_IDC">CGA_IDC - Count of paired ends consistently indicating a mobile element insertion</option>
|
|
182 <option value="CGA_IDCL">CGA_IDCL - Count of paired ends indicating a mobile element insertion anchored 5'</option>
|
|
183 <option value="CGA_IDCR">CGA_IDCR - Count of paired ends indicating a mobile element insertion anchored 3'</option>
|
|
184 <option value="CGA_RDC">CGA_RDC - Count of paired ends supporting the presence of a reference allele</option>
|
|
185 <option value="CGA_NBET">CGA_NBET - Next-best estimate of type of MEI</option>
|
|
186 <option value="CGA_ETS">CGA_ETS - Measure of confidence that the ElementType (MEINFO:NAME) is correct</option>
|
|
187 <option value="CGA_KES">CGA_KES - Fraction of known MEI with at least as good an InsertionScore</option>
|
|
188 </param>
|
|
189 </when>
|
|
190
|
|
191 <when value="masterVar">
|
|
192 <!--conditional to select inputs-->
|
|
193 <conditional name="data_sources">
|
|
194 <param name="data_source" type="select" label="Where is the input file?">
|
|
195 <option value="in" selected="true">imported into Galaxy</option>
|
|
196 <option value="out">located outside Galaxy (data on server or mounted drive)</option>
|
|
197 </param>
|
|
198
|
|
199 <when value="in">
|
|
200 <!--form field to select mastervar files-->
|
|
201 <repeat name="files" title="MasterVar file" min="1" max="1">
|
|
202 <param name="input" type="data" format="cg_mastervar" label="Dataset">
|
|
203 <validator type="dataset_ok_validator" />
|
|
204 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
|
|
205 metadata_name="dbkey" metadata_column="1"
|
|
206 message="cgatools is not currently available for this build."/>
|
|
207 </param>
|
|
208 </repeat>
|
|
209 </when>
|
|
210
|
|
211 <when value="out">
|
|
212 <!--form field to enter input file-->
|
|
213 <param name="input" type="text" label="Genome root directory or masterVar file" size="200" help="Enter full path /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01), or /path/masterVarfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01/ASM/masterVarBeta-GS00000YYYY-ASM.tsv.bz2).">
|
|
214 <validator type="empty_field" message="You must supply the genome root directory or masterVar file"/>
|
|
215 </param>
|
|
216 </when>
|
|
217 </conditional>
|
|
218
|
|
219 <!--form field to select no-calls-->
|
|
220 <param name="nocalls" type="select" label="Include no-calls?">
|
|
221 <option value="" selected="true">no</option>
|
|
222 <option value="--nocalls">yes</option>
|
|
223 </param>
|
|
224
|
|
225 <!--form field to enter calibration directory-->
|
|
226 <param name="calibration" type="text" size="300" label="Directory calibration data (/path/calibration-root)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v2.tgz"/>
|
|
227
|
|
228 <!--form field to select field names to include in vcf-->
|
|
229 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
|
|
230 <option value="all" selected="true">-- all (default) --</option>
|
|
231 <option value="NS">NS - Number of samples</option>
|
|
232 <option value="AN">AN - Total number of alleles in called genotypes</option>
|
|
233 <option value="AC">AC - Allele count in genotypes</option>
|
|
234 <option value="CGA_XR">CGA_XR - External database reference</option>
|
|
235 <option value="CGA_FI">CGA_FI - Functional impact</option>
|
|
236 <option value="CGA_PFAM">CGA_PFAM - PFAM domain </option>
|
|
237 <option value="CGA_MIRB">CGA_MIRB - miRBaseId</option>
|
|
238 <option value="CGA_SDO">CGA_SDO - Depth of overlapping segmental duplications</option>
|
|
239 <option value="CGA_RPT">CGA_RPT - Overlapping repeatMasker annotations</option>
|
|
240 <option value="GT">GT - Genotype</option>
|
|
241 <option value="PS">PS - Phase set</option>
|
|
242 <option value="FT">FT - Sample genotype filters</option>
|
|
243 <option value="GL">GL - Genotype likelihoods</option>
|
|
244 <option value="CGA_CEHQ">CGA_CEHQ - Calibrated haplotype quality based on EAF assumption</option>
|
|
245 <option value="CGA_CEGL">CGA_CEGL - Genotype likelihoods based on CEHQ</option>
|
|
246 <option value="SS">SS - Somatic status</option>
|
|
247 <option value="HQ">HQ - Haplotype quality</option>
|
|
248 <option value="EHQ">EHQ - Haplotype quality based on EAF assumption</option>
|
|
249 <option value="GQ">GQ - Genotype quality</option>
|
|
250 <option value="DP">DP - Total read depth</option>
|
|
251 <option value="AD">AD - Allelic depths</option>
|
|
252 <option value="CGA_RDP">CGA_RDP - Read depth in reference</option>
|
|
253 <option value="CGA_ODP">CGA_ODP - Other total read depth: somatic comparison</option>
|
|
254 <option value="CGA_OAD">CGA_OAD - Other allelic depths: somatic comparison</option>
|
|
255 <option value="CGA_ORDP">CGA_ORDP - Other reference depth: somatic comparison </option>
|
|
256 <option value="CGA_SOMC">CGA_SOMC - Somatic Category</option>
|
|
257 <option value="CGA_SOMR">CGA_SOMR - Somatic Rank</option>
|
|
258 <option value="CGA_SOMS">CGA_SOMS - Somatic Score</option>
|
|
259 </param>
|
|
260 </when>
|
|
261
|
|
262 <when value="CNV">
|
|
263 <!--conditional to select inputs-->
|
|
264 <conditional name="data_sources">
|
|
265 <param name="data_source" type="select" label="Where are the input files?">
|
|
266 <option value="out" selected="true">located outside Galaxy (data on server or mounted drive)</option>
|
|
267 </param>
|
|
268
|
|
269 <when value="out">
|
|
270 <!--form field to enter input file-->
|
|
271 <param name="input" type="text" label="Genome root directory" size="200" help="Enter full path /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01).">
|
|
272 <validator type="empty_field" message="You must supply the genome root directory"/>
|
|
273 </param>
|
|
274 </when>
|
|
275 </conditional>
|
|
276
|
|
277 <!--form field to select field names to include in vcf-->
|
|
278 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
|
|
279 <option value="all" selected="true">-- all (default) --</option>
|
|
280 <option value="GT">GT - Genotype</option>
|
|
281 <option value="CGA_GP">CGA_GP - Normalized mean GC corrected coverage</option>
|
|
282 <option value="CGA_NP">CGA_NP - Normalized mean coverage for 2k window</option>
|
|
283 <option value="CGA_CP">CGA_CP - Diploid-model ploidy call for segment</option>
|
|
284 <option value="CGA_PS">CGA_PS - Diploid-model called ploidy score</option>
|
|
285 <option value="CGA_CT">CGA_CT - Diploid-model CNV type</option>
|
|
286 <option value="CGA_TS">CGA_TS - Diploid-model CNV type score</option>
|
|
287 <option value="CGA_CL">CGA_CL - Nondiploid-model called level</option>
|
|
288 <option value="CGA_LS">CGA_LS - Nondiploid-model called level score</option>
|
|
289 <option value="CGA_SCL">CGA_SCL - Nondiploid-model somatic called level</option>
|
|
290 <option value="CGA_SLS">CGA_SLS - Non-diploid-model somatic called level score</option>
|
|
291 <option value="CGA_LAF">CGA_LAF - Lesser Allele Fraction estimate, 100k window</option>
|
|
292 <option value="CGA_LLAF">CGA_LLAF - Lesser Allele Fraction lower bound, 100k window</option>
|
|
293 <option value="CGA_ULAF">CGA_ULAF - Lesser Allele Fraction upper bound, 100k window</option>
|
|
294 </param>
|
|
295 </when>
|
|
296
|
|
297 <when value="SV">
|
|
298 <!--conditional to select inputs-->
|
|
299 <conditional name="data_sources">
|
|
300 <param name="data_source" type="select" label="Where are the input files?">
|
|
301 <option value="in" selected="true">imported into Galaxy</option>
|
|
302 <option value="out">located outside Galaxy (data on server or mounted drive)</option>
|
|
303 </param>
|
|
304
|
|
305 <when value="in">
|
|
306 <!--form field to select SV file-->
|
|
307 <repeat name="files" title="SV file" min="1" max="1">
|
|
308 <param name="input" type="data" format="tabular" label="Dataset">
|
|
309 <validator type="dataset_ok_validator" />
|
|
310 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
|
|
311 metadata_name="dbkey" metadata_column="1"
|
|
312 message="cgatools is not currently available for this build."/>
|
|
313 </param>
|
|
314 </repeat>
|
|
315 </when>
|
|
316
|
|
317 <when value="out">
|
|
318 <!--form field to enter input file-->
|
|
319 <param name="input" type="text" label="Genome root directory or SV file" size="200" help="Enter full path /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01), or /path/SVfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01/ASM/SV/allJunctionsBeta-GS00000YYYY-ASM.tsv).">
|
|
320 <validator type="empty_field" message="You must supply the genome root directory or SV file"/>
|
|
321 </param>
|
|
322 </when>
|
|
323 </conditional>
|
|
324
|
|
325 <!--form fields junction threshold options-->
|
|
326 <param name="jctscore" type="integer" value="10" label="Junction score thresholds (discordant mate pair count) (default 10)">
|
|
327 <validator type="empty_field" message="You must enter a value, for the default value enter 10" />
|
|
328 </param>
|
|
329 <param name="jctside" type="integer" value="70" label="Junction side length threshold (default 70)">
|
|
330 <validator type="empty_field" message="You must enter a value, for the default value enter 70" />
|
|
331 </param>
|
|
332 <param name="jctdistance" type="integer" value="200" label="Distance tolerance for junction compatibility (default 200)">
|
|
333 <validator type="empty_field" message="You must enter a value, for the default value enter 200" />
|
|
334 </param>
|
|
335 <param name="jctlength" type="integer" value="500" label="Length threshold for compatible junctions (default 500)">
|
|
336 <validator type="empty_field" message="You must enter a value, for the default value enter 500" />
|
|
337 </param>
|
|
338
|
|
339 <!--form field to select junction confidence in tumors-->
|
|
340 <param name="jctpriority" type="select" label="Use normal junction priority for vcf output?">
|
|
341 <option value="" selected="true">no</option>
|
|
342 <option value="--jctpriority">yes</option>
|
|
343 </param>
|
|
344
|
|
345 <!--form field to select junction confidence in tumors-->
|
|
346 <param name="jcttumor" type="select" label="Use high confidence junctions for tumors?">
|
|
347 <option value="" selected="true">no</option>
|
|
348 <option value="--jcttumor">yes</option>
|
|
349 </param>
|
|
350
|
|
351 <!--form field to select field names to include in vcf-->
|
|
352 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
|
|
353 <option value="all" selected="true">-- all (default) --</option>
|
|
354 <option value="GT">GT - Genotype</option>
|
|
355 <option value="FT">FT - Sample genotype filters</option>
|
|
356 <option value="NS">NS - Number of samples</option>
|
|
357 <option value="CGA_XR">CGA_XR - External database reference</option>
|
|
358 <option value="SVTYPE">SVTYPE - Type of structural variation</option>
|
|
359 <option value="CGA_BF">CGA_BF - Frequency in set of baseline genomes</option>
|
|
360 <option value="CGA_MEDEL">CGA_MEDEL - Mobile element deletion</option>
|
|
361 <option value="MATEID">MATEID - ID of mate breakend</option>
|
|
362 <option value="CGA_BNDG">CGA_BNDG - Transcript name and strand of genes containing breakend</option>
|
|
363 <option value="CGA_BNDGO">CGA_BNDGO - Transcript name and strand of genes containing mate breakend</option>
|
|
364 <option value="CGA_BNDP">CGA_BNDP - Precision of breakend</option>
|
|
365 <option value="CGA_BNDMPC">CGA_BNDMPC - Mate pair count supporting a breakend</option>
|
|
366 <option value="CGA_BNDPOS">CGA_BNDPOS - Position of breakend as detected in individual genome</option>
|
|
367 <option value="CGA_BNDDEF">CGA_BNDDEF - Breakend definition in individual genome</option>
|
|
368 </param>
|
|
369 </when>
|
|
370
|
|
371 <when value="MEI">
|
|
372 <!--conditional to select inputs-->
|
|
373 <conditional name="data_sources">
|
|
374 <param name="data_source" type="select" label="Where are the input files?">
|
|
375 <option value="out" selected="true">located outside Galaxy</option>
|
|
376 </param>
|
|
377
|
|
378 <when value="out">
|
|
379 <!--form field to select outside list of genome directories or mastervar files-->
|
|
380 <param name="input" type="text" label="Genome root directory" size="200" help="Enter full path /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01).">
|
|
381 <validator type="empty_field" message="You must supply the genome root directory"/>
|
|
382 </param>
|
|
383 </when>
|
|
384 </conditional>
|
|
385
|
|
386 <!--form field to select field names to include in vcf-->
|
|
387 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
|
|
388 <option value="all" selected="true">-- all (default) --</option>
|
|
389 <option value="GT">GT - Genotype</option>
|
|
390 <option value="FT">FT - Sample genotype filters</option>
|
|
391 <option value="CGA_IS">CGA_IS - Measure of confidence that there is a mobile element insertion</option>
|
|
392 <option value="CGA_IDC">CGA_IDC - Count of paired ends consistently indicating a mobile element insertion</option>
|
|
393 <option value="CGA_IDCL">CGA_IDCL - Count of paired ends indicating a mobile element insertion, anchored 5'</option>
|
|
394 <option value="CGA_IDCR">CGA_IDCR - Count of paired ends indicating a mobile element insertion, anchored 3'</option>
|
|
395 <option value="CGA_RDC">CGA_RDC - Count of paired ends supporting the presence of a reference allele</option>
|
|
396 <option value="CGA_NBET">CGA_NBET - Next-best estimate of type of MEI</option>
|
|
397 <option value="CGA_ETS">CGA_ETS - Measure of confidence that the ElementType (MEINFO:NAME) is correct</option>
|
|
398 <option value="CGA_KES">CGA_KES - Fraction of known MEI with at least as good an InsertionScore</option>
|
|
399 </param>
|
|
400 </when>
|
|
401
|
|
402 </conditional>
|
|
403 </when>
|
|
404
|
|
405 <when value="2">
|
|
406 <!--form field to select input sources-->
|
|
407 <conditional name="sources">
|
|
408 <param name="source" type="select" label="Data sources to be included for each genome">
|
|
409 <option value="masterVar,CNV,SV" selected="true">masterVar + CNV + SV</option>
|
|
410 <option value="masterVar">masterVar</option>
|
|
411 <option value="CNV">CNV</option>
|
|
412 <option value="SV">SV</option>
|
|
413 </param>
|
|
414
|
|
415 <when value="masterVar,CNV,SV">
|
|
416 <!--conditional to select inputs-->
|
|
417 <conditional name="data_sources">
|
|
418 <param name="data_source" type="select" label="Where are the input files?">
|
|
419 <option value="out" selected="true">located outside Galaxy (data on server or mounted drive)</option>
|
|
420 </param>
|
|
421
|
|
422 <when value="out">
|
|
423 <!--form field to enter input file-->
|
|
424 <param name="input" type="text" label="File with list of genome root directories" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01). For normal/tumor comparisons list the baseline genome first.">
|
|
425 <validator type="empty_field" message="You must supply the list of genome root directories"/>
|
|
426 </param>
|
|
427 </when>
|
|
428 </conditional>
|
|
429
|
|
430 <!--form field to select no-calls-->
|
|
431 <param name="nocalls" type="select" label="Include no-calls?">
|
|
432 <option value="" selected="true">no</option>
|
|
433 <option value="--nocalls">yes</option>
|
|
434 </param>
|
|
435
|
|
436 <!--form field to enter calibration directory-->
|
|
437 <param name="calibration" type="text" size="300" label="Directory calibration data (/path/calibration-root)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v2.tgz"/>
|
|
438
|
|
439 <!--form fields junction threshold options-->
|
|
440 <param name="jctscore" type="integer" value="10" label="Junction score thresholds (discordant mate pair count) (default 10)">
|
|
441 <validator type="empty_field" message="You must enter a value, for the default value enter 10" />
|
|
442 </param>
|
|
443 <param name="jctside" type="integer" value="70" label="Junction side length threshold (default 70)">
|
|
444 <validator type="empty_field" message="You must enter a value, for the default value enter 70" />
|
|
445 </param>
|
|
446 <param name="jctdistance" type="integer" value="200" label="Distance tolerance for junction compatibility (default 200)">
|
|
447 <validator type="empty_field" message="You must enter a value, for the default value enter 200" />
|
|
448 </param>
|
|
449 <param name="jctlength" type="integer" value="500" label="Length threshold for compatible junctions (default 500)">
|
|
450 <validator type="empty_field" message="You must enter a value, for the default value enter 500" />
|
|
451 </param>
|
|
452
|
|
453 <!--form field to select junction confidence in tumors-->
|
|
454 <param name="jctpriority" type="select" label="Use normal junction priority for vcf output?">
|
|
455 <option value="" selected="true">no</option>
|
|
456 <option value="--jctpriority">yes</option>
|
|
457 </param>
|
|
458
|
|
459 <!--form field to select junction confidence in tumors-->
|
|
460 <param name="jcttumor" type="select" label="Use high confidence junctions for tumors?">
|
|
461 <option value="" selected="true">no</option>
|
|
462 <option value="--jcttumor">yes</option>
|
|
463 </param>
|
|
464
|
|
465 <!--form field to select field names to include in vcf-->
|
|
466 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
|
|
467 <option value="all" selected="true">-- all (default) --</option>
|
|
468 <option value="NS">NS - Number of samples</option>
|
|
469 <option value="AN">AN - Total number of alleles in called genotypes</option>
|
|
470 <option value="AC">AC - Allele count in genotypes</option>
|
|
471 <option value="CGA_XR">CGA_XR - External database reference</option>
|
|
472 <option value="CGA_FI">CGA_FI - Functional impact</option>
|
|
473 <option value="CGA_PFAM">CGA_PFAM - PFAM domain </option>
|
|
474 <option value="CGA_MIRB">CGA_MIRB - miRBaseId</option>
|
|
475 <option value="CGA_SDO">CGA_SDO - Depth of overlapping segmental duplications</option>
|
|
476 <option value="CGA_RPT">CGA_RPT - Overlapping repeatMasker annotations</option>
|
|
477 <option value="GT">GT - Genotype</option>
|
|
478 <option value="PS">PS - Phase set</option>
|
|
479 <option value="FT">FT - Sample genotype filters</option>
|
|
480 <option value="GL">GL - Genotype likelihoods</option>
|
|
481 <option value="CGA_CEHQ">CGA_CEHQ - Calibrated haplotype quality based on EAF assumption</option>
|
|
482 <option value="CGA_CEGL">CGA_CEGL - Genotype likelihoods based on CEHQ</option>
|
|
483 <option value="SS">SS - Somatic status</option>
|
|
484 <option value="HQ">HQ - Haplotype quality</option>
|
|
485 <option value="EHQ">EHQ - Haplotype quality based on EAF assumption</option>
|
|
486 <option value="GQ">GQ - Genotype quality</option>
|
|
487 <option value="DP">DP - Total read depth</option>
|
|
488 <option value="AD">AD - Allelic depths</option>
|
|
489 <option value="CGA_RDP">CGA_RDP - Read depth in reference</option>
|
|
490 <option value="CGA_ODP">CGA_ODP - Other total read depth: somatic comparison</option>
|
|
491 <option value="CGA_OAD">CGA_OAD - Other allelic depths: somatic comparison</option>
|
|
492 <option value="CGA_ORDP">CGA_ORDP - Other reference depth: somatic comparison </option>
|
|
493 <option value="CGA_SOMC">CGA_SOMC - Somatic Category</option>
|
|
494 <option value="CGA_SOMR">CGA_SOMR - Somatic Rank</option>
|
|
495 <option value="CGA_SOMS">CGA_SOMS - Somatic Score</option>
|
|
496 <option value="CGA_GP">CGA_GP - Normalized mean GC corrected coverage</option>
|
|
497 <option value="CGA_NP">CGA_NP - Normalized mean coverage for 2k window</option>
|
|
498 <option value="CGA_CP">CGA_CP - Diploid-model ploidy call for segment</option>
|
|
499 <option value="CGA_PS">CGA_PS - Diploid-model called ploidy score</option>
|
|
500 <option value="CGA_CT">CGA_CT - Diploid-model CNV type</option>
|
|
501 <option value="CGA_TS">CGA_TS - Diploid-model CNV type score</option>
|
|
502 <option value="CGA_CL">CGA_CL - Nondiploid-model called level</option>
|
|
503 <option value="CGA_LS">CGA_LS - Nondiploid-model called level score</option>
|
|
504 <option value="CGA_SCL">CGA_SCL - Nondiploid-model somatic called level</option>
|
|
505 <option value="CGA_SLS">CGA_SLS - Non-diploid-model somatic called level score</option>
|
|
506 <option value="CGA_LAF">CGA_LAF - Lesser Allele Fraction estimate, 100k window</option>
|
|
507 <option value="CGA_LLAF">CGA_LLAF - Lesser Allele Fraction lower bound, 100k window</option>
|
|
508 <option value="CGA_ULAF">CGA_ULAF - Lesser Allele Fraction upper bound, 100k window</option>
|
|
509 <option value="SVTYPE">SVTYPE - Type of structural variation</option>
|
|
510 <option value="CGA_BF">CGA_BF - Frequency in set of baseline genomes</option>
|
|
511 <option value="CGA_MEDEL">CGA_MEDEL - Mobile element deletion</option>
|
|
512 <option value="MATEID">MATEID - ID of mate breakend</option>
|
|
513 <option value="CGA_BNDG">CGA_BNDG - Transcript name and strand of genes containing breakend</option>
|
|
514 <option value="CGA_BNDGO">CGA_BNDGO - Transcript name and strand of genes containing mate breakend</option>
|
|
515 <option value="CGA_BNDP">CGA_BNDP - Precision of breakend</option>
|
|
516 <option value="CGA_BNDMPC">CGA_BNDMPC - Mate pair count supporting a breakend</option>
|
|
517 <option value="CGA_BNDPOS">CGA_BNDPOS - Position of breakend as detected in individual genome</option>
|
|
518 <option value="CGA_BNDDEF">CGA_BNDDEF - Breakend definition in individual genome</option>
|
|
519 </param>
|
|
520 </when>
|
|
521
|
|
522 <when value="masterVar">
|
|
523 <!--conditional to select inputs-->
|
|
524 <conditional name="data_sources">
|
|
525 <param name="data_source" type="select" label="Where are the input files?">
|
|
526 <option value="in" selected="true">imported into Galaxy</option>
|
|
527 <option value="out">located outside Galaxy (data on server or mounted drive)</option>
|
|
528 </param>
|
|
529
|
|
530 <when value="in">
|
|
531 <!--form field to select input files-->
|
|
532 <repeat name="files" title="MasterVar file" min="1" max="2">
|
|
533 <param name="input" type="data" format="cg_mastervar" label="Dataset">
|
|
534 <validator type="dataset_ok_validator"/>
|
|
535 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
|
|
536 metadata_name="dbkey" metadata_column="1"
|
|
537 message="cgatools is not currently available for this build."/>
|
|
538 </param>
|
|
539 </repeat>
|
|
540 </when>
|
|
541
|
|
542 <when value="out">
|
|
543 <!--form field to enter input file-->
|
|
544 <param name="input" type="text" label="File with list of genome root directories or masterVar files" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01), or a list of masterVar files, one per line in the format /path/masterVarfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01/ASM/masterVarBeta-GS00000YYYY-ASM.tsv.bz2).">
|
|
545 <validator type="empty_field" message="You must supply the list of genome root directories or masterVar files"/>
|
|
546 </param>
|
|
547 </when>
|
|
548 </conditional>
|
|
549
|
|
550 <!--form field to select no-calls-->
|
|
551 <param name="nocalls" type="select" label="Include no-calls?">
|
|
552 <option value="" selected="true">no</option>
|
|
553 <option value="--nocalls">yes</option>
|
|
554 </param>
|
|
555
|
|
556 <!--form field to enter calibration directory-->
|
|
557 <param name="calibration" type="text" size="300" label="Directory calibration data (/path/calibration-root)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v2.tgz"/>
|
|
558
|
|
559 <!--form field to select field names to include in vcf-->
|
|
560 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
|
|
561 <option value="all" selected="true">-- all (default) --</option>
|
|
562 <option value="NS">NS - Number of samples</option>
|
|
563 <option value="AN">AN - Total number of alleles in called genotypes</option>
|
|
564 <option value="AC">AC - Allele count in genotypes</option>
|
|
565 <option value="CGA_XR">CGA_XR - External database reference</option>
|
|
566 <option value="CGA_FI">CGA_FI - Functional impact</option>
|
|
567 <option value="CGA_PFAM">CGA_PFAM - PFAM domain </option>
|
|
568 <option value="CGA_MIRB">CGA_MIRB - miRBaseId</option>
|
|
569 <option value="CGA_SDO">CGA_SDO - Depth of overlapping segmental duplications</option>
|
|
570 <option value="CGA_RPT">CGA_RPT - Overlapping repeatMasker annotations</option>
|
|
571 <option value="GT">GT - Genotype</option>
|
|
572 <option value="PS">PS - Phase set</option>
|
|
573 <option value="FT">FT - Sample genotype filters</option>
|
|
574 <option value="GL">GL - Genotype likelihoods</option>
|
|
575 <option value="CGA_CEHQ">CGA_CEHQ - Calibrated haplotype quality based on EAF assumption</option>
|
|
576 <option value="CGA_CEGL">CGA_CEGL - Genotype likelihoods based on CEHQ</option>
|
|
577 <option value="SS">SS - Somatic status</option>
|
|
578 <option value="HQ">HQ - Haplotype quality</option>
|
|
579 <option value="EHQ">EHQ - Haplotype quality based on EAF assumption</option>
|
|
580 <option value="GQ">GQ - Genotype quality</option>
|
|
581 <option value="DP">DP - Total read depth</option>
|
|
582 <option value="AD">AD - Allelic depths</option>
|
|
583 <option value="CGA_RDP">CGA_RDP - Read depth in reference</option>
|
|
584 <option value="CGA_ODP">CGA_ODP - Other total read depth: somatic comparison</option>
|
|
585 <option value="CGA_OAD">CGA_OAD - Other allelic depths: somatic comparison</option>
|
|
586 <option value="CGA_ORDP">CGA_ORDP - Other reference depth: somatic comparison </option>
|
|
587 <option value="CGA_SOMC">CGA_SOMC - Somatic Category</option>
|
|
588 <option value="CGA_SOMR">CGA_SOMR - Somatic Rank</option>
|
|
589 <option value="CGA_SOMS">CGA_SOMS - Somatic Score</option>
|
|
590 </param>
|
|
591 </when>
|
|
592
|
|
593 <when value="CNV">
|
|
594 <!--conditional to select inputs-->
|
|
595 <conditional name="data_sources">
|
|
596 <param name="data_source" type="select" label="Where are the input files?">
|
|
597 <option value="out" selected="true">located outside Galaxy (data on server or mounted drive)</option>
|
|
598 </param>
|
|
599
|
|
600 <when value="out">
|
|
601 <!--form field to enter input file-->
|
|
602 <param name="input" type="text" label="File with list of genome root directories" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01).">
|
|
603 <validator type="empty_field" message="You must supply the list of genome root directories"/>
|
|
604 </param>
|
|
605 </when>
|
|
606 </conditional>
|
|
607
|
|
608 <!--form field to select field names to include in vcf-->
|
|
609 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
|
|
610 <option value="all" selected="true">-- all (default) --</option>
|
|
611 <option value="GT">GT - Genotype</option>
|
|
612 <option value="CGA_GP">CGA_GP - Normalized mean GC corrected coverage</option>
|
|
613 <option value="CGA_NP">CGA_NP - Normalized mean coverage for 2k window</option>
|
|
614 <option value="CGA_CP">CGA_CP - Diploid-model ploidy call for segment</option>
|
|
615 <option value="CGA_PS">CGA_PS - Diploid-model called ploidy score</option>
|
|
616 <option value="CGA_CT">CGA_CT - Diploid-model CNV type</option>
|
|
617 <option value="CGA_TS">CGA_TS - Diploid-model CNV type score</option>
|
|
618 <option value="CGA_CL">CGA_CL - Nondiploid-model called level</option>
|
|
619 <option value="CGA_LS">CGA_LS - Nondiploid-model called level score</option>
|
|
620 <option value="CGA_SCL">CGA_SCL - Nondiploid-model somatic called level</option>
|
|
621 <option value="CGA_SLS">CGA_SLS - Non-diploid-model somatic called level score</option>
|
|
622 <option value="CGA_LAF">CGA_LAF - Lesser Allele Fraction estimate, 100k window</option>
|
|
623 <option value="CGA_LLAF">CGA_LLAF - Lesser Allele Fraction lower bound, 100k window</option>
|
|
624 <option value="CGA_ULAF">CGA_ULAF - Lesser Allele Fraction upper bound, 100k window</option>
|
|
625 </param>
|
|
626 </when>
|
|
627
|
|
628 <when value="SV">
|
|
629 <!--conditional to select inputs-->
|
|
630 <conditional name="data_sources">
|
|
631 <param name="data_source" type="select" label="Where are the input files?">
|
|
632 <option value="in" selected="true">imported into Galaxy</option>
|
|
633 <option value="out">located outside Galaxy (data on server or mounted drive)</option>
|
|
634 </param>
|
|
635
|
|
636 <when value="in">
|
|
637 <!--form field to select mastervar files-->
|
|
638 <repeat name="files" title="SV files" min="1" max="2">
|
|
639 <param name="input" type="data" format="tabular" label="Dataset">
|
|
640 <validator type="dataset_ok_validator" />
|
|
641 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
|
|
642 metadata_name="dbkey" metadata_column="1"
|
|
643 message="cgatools is not currently available for this build."/>
|
|
644 </param>
|
|
645 </repeat>
|
|
646 </when>
|
|
647
|
|
648 <when value="out">
|
|
649 <!--form field to enter input file-->
|
|
650 <param name="input" type="text" label="File with list of genome root directories or SV files" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01), or a list of SV files, one per line in the format /path/SVfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01/ASM/SV/allJunctionsBeta-GS00000YYYY-ASM.tsv).">
|
|
651 <validator type="empty_field" message="You must supply the list of genome root directories or SV files"/>
|
|
652 </param>
|
|
653 </when>
|
|
654 </conditional>
|
|
655
|
|
656 <!--form fields junction threshold options-->
|
|
657 <param name="jctscore" type="integer" value="10" label="Junction score thresholds (discordant mate pair count) (default 10)">
|
|
658 <validator type="empty_field" message="You must enter a value, for the default value enter 10" />
|
|
659 </param>
|
|
660 <param name="jctside" type="integer" value="70" label="Junction side length threshold (default 70)">
|
|
661 <validator type="empty_field" message="You must enter a value, for the default value enter 70" />
|
|
662 </param>
|
|
663 <param name="jctdistance" type="integer" value="200" label="Distance tolerance for junction compatibility (default 200)">
|
|
664 <validator type="empty_field" message="You must enter a value, for the default value enter 200" />
|
|
665 </param>
|
|
666 <param name="jctlength" type="integer" value="500" label="Length threshold for compatible junctions (default 500)">
|
|
667 <validator type="empty_field" message="You must enter a value, for the default value enter 500" />
|
|
668 </param>
|
|
669
|
|
670 <!--form field to select junction confidence in tumors-->
|
|
671 <param name="jctpriority" type="select" label="Use normal junction priority for vcf output?">
|
|
672 <option value="" selected="true">no</option>
|
|
673 <option value="--jctpriority">yes</option>
|
|
674 </param>
|
|
675
|
|
676 <!--form field to select junction confidence in tumors-->
|
|
677 <param name="jcttumor" type="select" label="Use high confidence junctions for tumors?">
|
|
678 <option value="" selected="true">no</option>
|
|
679 <option value="--jcttumor">yes</option>
|
|
680 </param>
|
|
681
|
|
682 <!--form field to select field names to include in vcf-->
|
|
683 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
|
|
684 <option value="all" selected="true">-- all (default) --</option>
|
|
685 <option value="GT">GT - Genotype</option>
|
|
686 <option value="FT">FT - Sample genotype filters</option>
|
|
687 <option value="SVTYPE">SVTYPE - Type of structural variation</option>
|
|
688 <option value="CGA_BF">CGA_BF - Frequency in set of baseline genomes</option>
|
|
689 <option value="CGA_MEDEL">CGA_MEDEL - Mobile element deletion</option>
|
|
690 <option value="MATEID">MATEID - ID of mate breakend</option>
|
|
691 <option value="CGA_BNDG">CGA_BNDG - Transcript name and strand of genes containing breakend</option>
|
|
692 <option value="CGA_BNDGO">CGA_BNDGO - Transcript name and strand of genes containing mate breakend</option>
|
|
693 <option value="CGA_BNDP">CGA_BNDP - Precision of breakend</option>
|
|
694 <option value="CGA_BNDMPC">CGA_BNDMPC - Mate pair count supporting a breakend</option>
|
|
695 <option value="CGA_BNDPOS">CGA_BNDPOS - Position of breakend as detected in individual genome</option>
|
|
696 <option value="CGA_BNDDEF">CGA_BNDDEF - Breakend definition in individual genome</option>
|
|
697 </param>
|
|
698 </when>
|
|
699 </conditional>
|
|
700 </when>
|
|
701
|
|
702 <when value="3">
|
|
703 <!--form field to select input sources-->
|
|
704 <conditional name="sources">
|
|
705 <param name="source" type="select" label="Data sources to be included for each genome">
|
|
706 <option value="masterVar,CNV" selected="true">masterVar + CNV</option>
|
|
707 <option value="masterVar">masterVar</option>
|
|
708 <option value="CNV">CNV</option>
|
|
709 </param>
|
|
710
|
|
711 <when value="masterVar,CNV">
|
|
712 <!--conditional to select inputs-->
|
|
713 <conditional name="data_sources">
|
|
714 <param name="data_source" type="select" label="Where are the input files?">
|
|
715 <option value="out" selected="true">located outside Galaxy (data on server or mounted drive)</option>
|
|
716 </param>
|
|
717
|
|
718 <when value="out">
|
|
719 <!--form field to select outside list of genome directories or mastervar files-->
|
|
720 <param name="input" type="text" label="File with list of genome root directories" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01).">
|
|
721 <validator type="empty_field" message="You must supply the list of genome root directories"/>
|
|
722 </param>
|
|
723 </when>
|
|
724 </conditional>
|
|
725
|
|
726 <!--form field to select no-calls-->
|
|
727 <param name="nocalls" type="select" label="Include no-calls?">
|
|
728 <option value="" selected="true">no</option>
|
|
729 <option value="--nocalls">yes</option>
|
|
730 </param>
|
|
731
|
|
732 <!--form field to enter calibration directory-->
|
|
733 <param name="calibration" type="text" size="300" label="Directory calibration data (/path/calibration-root)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v2.tgz"/>
|
|
734
|
|
735 <!--form field to select field names to include in vcf-->
|
|
736 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
|
|
737 <option value="all" selected="true">-- all (default) --</option>
|
|
738 <option value="NS">NS - Number of samples</option>
|
|
739 <option value="AN">AN - Total number of alleles in called genotypes</option>
|
|
740 <option value="AC">AC - Allele count in genotypes</option>
|
|
741 <option value="CGA_XR">CGA_XR - External database reference</option>
|
|
742 <option value="CGA_FI">CGA_FI - Functional impact</option>
|
|
743 <option value="CGA_PFAM">CGA_PFAM - PFAM domain </option>
|
|
744 <option value="CGA_MIRB">CGA_MIRB - miRBaseId</option>
|
|
745 <option value="CGA_SDO">CGA_SDO - Depth of overlapping segmental duplications</option>
|
|
746 <option value="CGA_RPT">CGA_RPT - Overlapping repeatMasker annotations</option>
|
|
747 <option value="GT">GT - Genotype</option>
|
|
748 <option value="PS">PS - Phase set</option>
|
|
749 <option value="FT">FT - Sample genotype filters</option>
|
|
750 <option value="GL">GL - Genotype likelihoods</option>
|
|
751 <option value="CGA_CEHQ">CGA_CEHQ - Calibrated haplotype quality based on EAF assumption</option>
|
|
752 <option value="CGA_CEGL">CGA_CEGL - Genotype likelihoods based on CEHQ</option>
|
|
753 <option value="SS">SS - Somatic status</option>
|
|
754 <option value="HQ">HQ - Haplotype quality</option>
|
|
755 <option value="EHQ">EHQ - Haplotype quality based on EAF assumption</option>
|
|
756 <option value="GQ">GQ - Genotype quality</option>
|
|
757 <option value="DP">DP - Total read depth</option>
|
|
758 <option value="AD">AD - Allelic depths</option>
|
|
759 <option value="CGA_RDP">CGA_RDP - Read depth in reference</option>
|
|
760 <option value="CGA_ODP">CGA_ODP - Other total read depth: somatic comparison</option>
|
|
761 <option value="CGA_OAD">CGA_OAD - Other allelic depths: somatic comparison</option>
|
|
762 <option value="CGA_ORDP">CGA_ORDP - Other reference depth: somatic comparison </option>
|
|
763 <option value="CGA_SOMC">CGA_SOMC - Somatic Category</option>
|
|
764 <option value="CGA_SOMR">CGA_SOMR - Somatic Rank</option>
|
|
765 <option value="CGA_SOMS">CGA_SOMS - Somatic Score</option>
|
|
766 <option value="CGA_GP">CGA_GP - Normalized mean GC corrected coverage</option>
|
|
767 <option value="CGA_NP">CGA_NP - Normalized mean coverage for 2k window</option>
|
|
768 <option value="CGA_CP">CGA_CP - Diploid-model ploidy call for segment</option>
|
|
769 <option value="CGA_PS">CGA_PS - Diploid-model called ploidy score</option>
|
|
770 <option value="CGA_CT">CGA_CT - Diploid-model CNV type</option>
|
|
771 <option value="CGA_TS">CGA_TS - Diploid-model CNV type score</option>
|
|
772 <option value="CGA_CL">CGA_CL - Nondiploid-model called level</option>
|
|
773 <option value="CGA_LS">CGA_LS - Nondiploid-model called level score</option>
|
|
774 <option value="CGA_SCL">CGA_SCL - Nondiploid-model somatic called level</option>
|
|
775 <option value="CGA_SLS">CGA_SLS - Non-diploid-model somatic called level score</option>
|
|
776 <option value="CGA_LAF">CGA_LAF - Lesser Allele Fraction estimate, 100k window</option>
|
|
777 <option value="CGA_LLAF">CGA_LLAF - Lesser Allele Fraction lower bound, 100k window</option>
|
|
778 <option value="CGA_ULAF">CGA_ULAF - Lesser Allele Fraction upper bound, 100k window</option>
|
|
779 </param>
|
|
780 </when>
|
|
781
|
|
782 <when value="masterVar">
|
|
783 <!--conditional to select inputs-->
|
|
784 <conditional name="data_sources">
|
|
785 <param name="data_source" type="select" label="Where are the input files?">
|
|
786 <option value="in" selected="true">imported into Galaxy</option>
|
|
787 <option value="out">located outside Galaxy (data on server or mounted drive)</option>
|
|
788 </param>
|
|
789
|
|
790 <when value="in">
|
|
791 <!--form field to select mastervar files-->
|
|
792 <repeat name="files" title="MasterVar files" min="1">
|
|
793 <param name="input" type="data" format="cg_mastervar" label="Dataset">
|
|
794 <validator type="dataset_ok_validator" />
|
|
795 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
|
|
796 metadata_name="dbkey" metadata_column="1"
|
|
797 message="cgatools is not currently available for this build."/>
|
|
798 </param>
|
|
799 </repeat>
|
|
800 </when>
|
|
801
|
|
802 <when value="out">
|
|
803 <!--form field to enter input file-->
|
|
804 <param name="input" type="text" label="File with list of genome root directories or masterVar files" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01), or a list of masterVar files, one per line in the format /path/masterVarfile (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01/ASM/masterVarBeta-GS00000YYYY-ASM.tsv.bz2).">
|
|
805 <validator type="empty_field" message="You must supply the list of genome root directories or masterVar files"/>
|
|
806 </param>
|
|
807 </when>
|
|
808 </conditional>
|
|
809
|
|
810 <!--form field to select no-calls-->
|
|
811 <param name="nocalls" type="select" label="Include no-calls?">
|
|
812 <option value="" selected="true">no</option>
|
|
813 <option value="--nocalls">yes</option>
|
|
814 </param>
|
|
815
|
|
816 <!--form field to enter calibration directory-->
|
|
817 <param name="calibration" type="text" size="300" label="Directory calibration data (/path/calibration-root)" help="The directory containing calibration data. For example, there should exist a file calibration-root/0.0.0/metrics.tsv. Calibration data can be downloaded from ftp://ftp.completegenomics.com/ScoreCalibrationFiles/var-calibration-v2.tgz"/>
|
|
818
|
|
819 <!--form field to select field names to include in vcf-->
|
|
820 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
|
|
821 <option value="all" selected="true">-- all (default) --</option>
|
|
822 <option value="NS">NS - Number of samples</option>
|
|
823 <option value="AN">AN - Total number of alleles in called genotypes</option>
|
|
824 <option value="AC">AC - Allele count in genotypes</option>
|
|
825 <option value="CGA_XR">CGA_XR - External database reference</option>
|
|
826 <option value="CGA_FI">CGA_FI - Functional impact</option>
|
|
827 <option value="CGA_PFAM">CGA_PFAM - PFAM domain </option>
|
|
828 <option value="CGA_MIRB">CGA_MIRB - miRBaseId</option>
|
|
829 <option value="CGA_SDO">CGA_SDO - Depth of overlapping segmental duplications</option>
|
|
830 <option value="CGA_RPT">CGA_RPT - Overlapping repeatMasker annotations</option>
|
|
831 <option value="GT">GT - Genotype</option>
|
|
832 <option value="PS">PS - Phase set</option>
|
|
833 <option value="FT">FT - Sample genotype filters</option>
|
|
834 <option value="GL">GL - Genotype likelihoods</option>
|
|
835 <option value="CGA_CEHQ">CGA_CEHQ - Calibrated haplotype quality based on EAF assumption</option>
|
|
836 <option value="CGA_CEGL">CGA_CEGL - Genotype likelihoods based on CEHQ</option>
|
|
837 <option value="SS">SS - Somatic status</option>
|
|
838 <option value="HQ">HQ - Haplotype quality</option>
|
|
839 <option value="EHQ">EHQ - Haplotype quality based on EAF assumption</option>
|
|
840 <option value="GQ">GQ - Genotype quality</option>
|
|
841 <option value="DP">DP - Total read depth</option>
|
|
842 <option value="AD">AD - Allelic depths</option>
|
|
843 <option value="CGA_RDP">CGA_RDP - Read depth in reference</option>
|
|
844 <option value="CGA_ODP">CGA_ODP - Other total read depth: somatic comparison</option>
|
|
845 <option value="CGA_OAD">CGA_OAD - Other allelic depths: somatic comparison</option>
|
|
846 <option value="CGA_ORDP">CGA_ORDP - Other reference depth: somatic comparison </option>
|
|
847 <option value="CGA_SOMC">CGA_SOMC - Somatic Category</option>
|
|
848 <option value="CGA_SOMR">CGA_SOMR - Somatic Rank</option>
|
|
849 <option value="CGA_SOMS">CGA_SOMS - Somatic Score</option>
|
|
850 </param>
|
|
851 </when>
|
|
852
|
|
853 <when value="CNV">
|
|
854 <!--conditional to select inputs-->
|
|
855 <conditional name="data_sources">
|
|
856 <param name="data_source" type="select" label="Where are the input files?">
|
|
857 <option value="out" selected="true">located outside Galaxy (data on server or mounted drive)</option>
|
|
858 </param>
|
|
859
|
|
860 <when value="out">
|
|
861 <!--form field to enter input file-->
|
|
862 <param name="input" type="text" label="File with list of genome root directories" size="200" help="Enter file name with full path (/path/file). This file should contain a list of genome root directory names, one per line in the format /path/dir (e.g. /harddrive/GS00000XXXX-DID/GS00000YYYY-ASM/GS00123-DNA_G01).">
|
|
863 <validator type="empty_field" message="You must supply the list of genome root directories"/>
|
|
864 </param>
|
|
865 </when>
|
|
866 </conditional>
|
|
867
|
|
868 <!--form field to select field names to include in vcf-->
|
|
869 <param name="fields" type="select" label="Field names to be included in vcf file" multiple="true" help="Select all field names (default) or a collection of individual field names.">
|
|
870 <option value="all" selected="true">-- all (default) --</option>
|
|
871 <option value="GT">GT - Genotype</option>
|
|
872 <option value="CGA_GP">CGA_GP - Normalized mean GC corrected coverage</option>
|
|
873 <option value="CGA_NP">CGA_NP - Normalized mean coverage for 2k window</option>
|
|
874 <option value="CGA_CP">CGA_CP - Diploid-model ploidy call for segment</option>
|
|
875 <option value="CGA_PS">CGA_PS - Diploid-model called ploidy score</option>
|
|
876 <option value="CGA_CT">CGA_CT - Diploid-model CNV type</option>
|
|
877 <option value="CGA_TS">CGA_TS - Diploid-model CNV type score</option>
|
|
878 <option value="CGA_CL">CGA_CL - Nondiploid-model called level</option>
|
|
879 <option value="CGA_LS">CGA_LS - Nondiploid-model called level score</option>
|
|
880 <option value="CGA_SCL">CGA_SCL - Nondiploid-model somatic called level</option>
|
|
881 <option value="CGA_SLS">CGA_SLS - Non-diploid-model somatic called level score</option>
|
|
882 <option value="CGA_LAF">CGA_LAF - Lesser Allele Fraction estimate, 100k window</option>
|
|
883 <option value="CGA_LLAF">CGA_LLAF - Lesser Allele Fraction lower bound, 100k window</option>
|
|
884 <option value="CGA_ULAF">CGA_ULAF - Lesser Allele Fraction upper bound, 100k window</option>
|
|
885 </param>
|
|
886 </when>
|
|
887 </conditional>
|
|
888 </when>
|
|
889 </conditional>
|
|
890 </inputs>
|
|
891
|
|
892 <help>
|
|
893
|
|
894 **What it does**
|
|
895
|
|
896 This tool uses cgatools mkvcf to convert Complete Genomics masterVar files, including CNV, SV and/or MEI data, to vcf format version.
|
|
897
|
|
898 **cgatools 1.6.0 Documentation**
|
|
899
|
|
900 Userguide: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-user-guide.pdf
|
|
901
|
|
902 Release notes: http://cgatools.sourceforge.net/docs/1.6.0/cgatools-release-notes.pdf
|
|
903
|
|
904 **Command line reference**::
|
|
905
|
|
906 COMMAND NAME
|
|
907 mkvcf - Converts var file(s) or masterVar file(s) to VCF.
|
|
908
|
|
909 DESCRIPTION
|
|
910 Converts var file(s) or masterVar file(s) to VCF.
|
|
911
|
|
912 OPTIONS
|
|
913 -h [ --help ]
|
|
914 Print this help message.
|
|
915
|
|
916 --beta
|
|
917 This is a beta command. To run this command, you must pass the --beta
|
|
918 flag.
|
|
919
|
|
920 --reference arg
|
|
921 The reference crr file.
|
|
922
|
|
923 --output arg (=STDOUT)
|
|
924 The output file (may be omitted for stdout).
|
|
925
|
|
926 --field-names arg (=GT,PS,NS,AN,AC,SS,FT,CGA_XR,CGA_FI,GQ,HQ,EHQ,CGA_CEHQ,GL,
|
|
927 CGA_CEGL,DP,AD,CGA_RDP,CGA_ODP,CGA_OAD,CGA_ORDP,CGA_PFAM,CGA_MIRB,CGA_RPT,
|
|
928 CGA_SDO,CGA_SOMC,CGA_SOMR,CGA_SOMS,CGA_GP,CGA_NP,CGA_CP,CGA_PS,CGA_CT,
|
|
929 CGA_TS,CGA_CL,CGA_LS,CGA_SCL,CGA_SLS,CGA_LAF,CGA_LLAF,CGA_ULAF,CGA_IS,
|
|
930 CGA_IDC,CGA_IDCL,CGA_IDCR,CGA_RDC,CGA_NBET,CGA_ETS,CGA_KES,CGA_BF,
|
|
931 CGA_MEDEL,MATEID,SVTYPE,CGA_BNDG,CGA_BNDGO,CGA_BNDMPC,CGA_BNDPOS,CGA_BNDDEF,
|
|
932 CGA_BNDP)
|
|
933 Comma-separated list of field names. By default, all fields are
|
|
934 included, but you may override this option to ensure only a subset of
|
|
935 the fields is included in the VCF output. For a description of each
|
|
936 field, see the cgatools user guide.
|
|
937
|
|
938 --source-names arg (=masterVar,CNV,SV,MEI)
|
|
939 Comma-separated list of source names. The following source names are
|
|
940 available:
|
|
941 masterVar - Includes records extracted from the masterVar file.
|
|
942 CNV - Includes CNV-related records.
|
|
943 SV - Includes records derived from junctions files.
|
|
944 MEI - Includes records describing mobile element insertions.
|
|
945 Some of these source types are only available for more recent pipeline
|
|
946 versions, and some of these source types do not support multi-genome
|
|
947 VCFs. For more information about which source types are available for
|
|
948 which versions of the Complete Genomics pipeline software, see the
|
|
949 cgatools user guide.
|
|
950
|
|
951 --genome-root arg
|
|
952 For each genome to include in the VCF, the genome root directory, for
|
|
953 example /data/GS00118-DNA_A01; this directory is expected to contain
|
|
954 the ASM and LIB subdirectories, for example. You must supply this
|
|
955 option for each genome in the VCF, unless you are using
|
|
956 --source-names=masterVar and you have specified the --master-var option
|
|
957 for each genome in the VCF.
|
|
958
|
|
959 --master-var arg
|
|
960 For each genome to include in the VCF, the masterVar file. If
|
|
961 genome-roots parameter is given, this parameter defaults to the
|
|
962 masterVar in the given genome-root.
|
|
963
|
|
964 --include-no-calls
|
|
965 Small variants VCF records include loci that have no
|
|
966 reference-inconsistent calls.
|
|
967
|
|
968 --calibration-root arg
|
|
969 The directory containing calibration data. For example, there should
|
|
970 exist a file calibration-root/version0.0.0/metrics.tsv. This option is only
|
|
971 required if CGA_CEHQ or CGA_CEGL are included in the --field-names
|
|
972 parameter.
|
|
973
|
|
974 --junction-file arg
|
|
975 For each genome to include in the VCF, the junctions file. If
|
|
976 genome-roots parameter is given, this parameter defaults to the
|
|
977 respective junctions file in the export directory.
|
|
978
|
|
979 --junction-score-threshold arg (=10)
|
|
980 Junction score thresholds (discordant mate pair count).
|
|
981
|
|
982 --junction-side-length-threshold arg (=70)
|
|
983 Junction side length threshold.
|
|
984
|
|
985 --junction-distance-tolerance arg (=200)
|
|
986 Distance tolerance for junction compatibility.
|
|
987
|
|
988 --junction-length-threshold arg (=500)
|
|
989 Length threshold for compatible junctions.
|
|
990
|
|
991 --junction-normal-priority
|
|
992 Normal junction priority for vcf output.
|
|
993
|
|
994 --junction-tumor-hc
|
|
995 use high confidence junctions for tumors.
|
|
996
|
|
997
|
|
998 SUPPORTED FORMAT_VERSION
|
|
999 0.3 or later
|
|
1000 </help>
|
|
1001 </tool>
|