annotate gemini_roh.xml @ 16:ae03de7a9fee draft

Uploaded
author iuc
date Tue, 28 Apr 2015 22:55:56 -0400
parents 53a5647e5271
children 65f742e605ec
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
14caa57eca63 Uploaded
iuc
parents:
diff changeset
1 <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
14caa57eca63 Uploaded
iuc
parents:
diff changeset
2 <description>Identifying runs of homozygosity</description>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
3 <expand macro="requirements" />
14caa57eca63 Uploaded
iuc
parents:
diff changeset
4 <expand macro="version_command" />
14caa57eca63 Uploaded
iuc
parents:
diff changeset
5 <macros>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
6 <import>gemini_macros.xml</import>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
7 <token name="@BINARY@">roh</token>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
8 </macros>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
9 <command>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
10 <![CDATA[
14caa57eca63 Uploaded
iuc
parents:
diff changeset
11 gemini @BINARY@
2
e6512af21622 Uploaded
iuc
parents: 0
diff changeset
12 --min-snps $min_snps
e6512af21622 Uploaded
iuc
parents: 0
diff changeset
13 --min-total-depth $min_total_depth
e6512af21622 Uploaded
iuc
parents: 0
diff changeset
14 --min-gt-depth $min_gt_depth
e6512af21622 Uploaded
iuc
parents: 0
diff changeset
15 --min-size $min_size
e6512af21622 Uploaded
iuc
parents: 0
diff changeset
16 --max-hets $max_hets
e6512af21622 Uploaded
iuc
parents: 0
diff changeset
17 --max-unknowns $max_unknowns
e6512af21622 Uploaded
iuc
parents: 0
diff changeset
18 #if $samples.strip() != '':
e6512af21622 Uploaded
iuc
parents: 0
diff changeset
19 -s "${samples}"
e6512af21622 Uploaded
iuc
parents: 0
diff changeset
20 #end if
0
14caa57eca63 Uploaded
iuc
parents:
diff changeset
21 "${ infile }"
2
e6512af21622 Uploaded
iuc
parents: 0
diff changeset
22 > "${ outfile }"
0
14caa57eca63 Uploaded
iuc
parents:
diff changeset
23 ]]>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
24 </command>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
25 <expand macro="stdio" />
14caa57eca63 Uploaded
iuc
parents:
diff changeset
26 <inputs>
11
53a5647e5271 Uploaded
iuc
parents: 2
diff changeset
27 <expand macro="infile" />
0
14caa57eca63 Uploaded
iuc
parents:
diff changeset
28
14caa57eca63 Uploaded
iuc
parents:
diff changeset
29 <param name="min_snps" type="integer" value="25" size="5" label="Minimum number of expected homozygous SNPs" help="default: 25 (--min-snps)">
14caa57eca63 Uploaded
iuc
parents:
diff changeset
30 <validator type="in_range" min="0"/>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
31 </param>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
32 <param name="min_total_depth" type="integer" value="20" size="10" label="The minimum overall sequencing depth requiredfor a SNP to be considered" help="default: 20 (--min-total-depth)">
14caa57eca63 Uploaded
iuc
parents:
diff changeset
33 <validator type="in_range" min="0"/>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
34 </param>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
35 <param name="min_gt_depth" type="integer" value="0" size="10" label="The minimum required sequencing depth underlying a given sample's genotype for a SNP to be considered"
14caa57eca63 Uploaded
iuc
parents:
diff changeset
36 help="default: 0 (--min-gt-depth)">
14caa57eca63 Uploaded
iuc
parents:
diff changeset
37 <validator type="in_range" min="0"/>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
38 </param>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
39 <param name="min_size" type="integer" value="100000" size="10" label="Minimum run size in base pairs" help="default: 100000 (--min-size)">
14caa57eca63 Uploaded
iuc
parents:
diff changeset
40 <validator type="in_range" min="1"/>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
41 </param>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
42 <param name="max_hets" type="integer" value="1" size="5" label="Maximum number of allowed hets in the run" help="default: 1 (--max-hets)">
14caa57eca63 Uploaded
iuc
parents:
diff changeset
43 <validator type="in_range" min="1"/>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
44 </param>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
45 <param name="max_unknowns" type="integer" value="3" size="5" label="Maximum number of allowed unknowns in the run" help="default: 3 (-max-unknowns)">
14caa57eca63 Uploaded
iuc
parents:
diff changeset
46 <validator type="in_range" min="0"/>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
47 </param>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
48
14caa57eca63 Uploaded
iuc
parents:
diff changeset
49 <param name="samples" size="30" type="text" value="" label="Comma separated list of samples to screen for ROHs" help="e.g S120,S450 (-s)"/>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
50
14caa57eca63 Uploaded
iuc
parents:
diff changeset
51 </inputs>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
52
14caa57eca63 Uploaded
iuc
parents:
diff changeset
53 <outputs>
11
53a5647e5271 Uploaded
iuc
parents: 2
diff changeset
54 <data name="outfile" format="tabular" />
0
14caa57eca63 Uploaded
iuc
parents:
diff changeset
55 </outputs>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
56 <tests>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
57 <test>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
58 </test>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
59 </tests>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
60 <help>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
61
14caa57eca63 Uploaded
iuc
parents:
diff changeset
62 **What it does**
14caa57eca63 Uploaded
iuc
parents:
diff changeset
63
14caa57eca63 Uploaded
iuc
parents:
diff changeset
64 ===========================================================================
14caa57eca63 Uploaded
iuc
parents:
diff changeset
65 ``ROH``: Identifying runs of homozygosity
14caa57eca63 Uploaded
iuc
parents:
diff changeset
66 ===========================================================================
14caa57eca63 Uploaded
iuc
parents:
diff changeset
67 Runs of homozygosity are long stretches of homozygous genotypes that reflect
14caa57eca63 Uploaded
iuc
parents:
diff changeset
68 segments shared identically by descent and are a result of consanguinity or
14caa57eca63 Uploaded
iuc
parents:
diff changeset
69 natural selection. Consanguinity elevates the occurrence of rare recessive
14caa57eca63 Uploaded
iuc
parents:
diff changeset
70 diseases (e.g. cystic fibrosis) that represent homozygotes for strongly deleterious
14caa57eca63 Uploaded
iuc
parents:
diff changeset
71 mutations. Hence, the identification of these runs holds medical value.
14caa57eca63 Uploaded
iuc
parents:
diff changeset
72
14caa57eca63 Uploaded
iuc
parents:
diff changeset
73 The 'roh' tool in GEMINI returns runs of homozygosity identified in whole genome data.
14caa57eca63 Uploaded
iuc
parents:
diff changeset
74 The tool basically looks at every homozygous position on the chromosome as a possible
14caa57eca63 Uploaded
iuc
parents:
diff changeset
75 start site for the run and looks for those that could give rise to a potentially long
14caa57eca63 Uploaded
iuc
parents:
diff changeset
76 stretch of homozygous genotypes.
14caa57eca63 Uploaded
iuc
parents:
diff changeset
77
14caa57eca63 Uploaded
iuc
parents:
diff changeset
78 For e.g. for the given example allowing ``1 HET`` genotype (h) and ``2 UKW`` genotypes (u)
14caa57eca63 Uploaded
iuc
parents:
diff changeset
79 the possible roh runs (H) would be:
14caa57eca63 Uploaded
iuc
parents:
diff changeset
80
14caa57eca63 Uploaded
iuc
parents:
diff changeset
81
14caa57eca63 Uploaded
iuc
parents:
diff changeset
82 ::
14caa57eca63 Uploaded
iuc
parents:
diff changeset
83
14caa57eca63 Uploaded
iuc
parents:
diff changeset
84 genotype_run = H H H H h H H H H u H H H H H u H H H H H H H h H H H H H h H H H H H
14caa57eca63 Uploaded
iuc
parents:
diff changeset
85 roh_run1 = H H H H h H H H H u H H H H H u H H H H H H H
14caa57eca63 Uploaded
iuc
parents:
diff changeset
86 roh_run2 = H H H H u H H H H H u H H H H H H H h H H H H H
14caa57eca63 Uploaded
iuc
parents:
diff changeset
87 roh_run3 = H H H H H u H H H H H H H h H H H H H
14caa57eca63 Uploaded
iuc
parents:
diff changeset
88 roh_run4 = H H H H H H H h H H H H H
14caa57eca63 Uploaded
iuc
parents:
diff changeset
89
14caa57eca63 Uploaded
iuc
parents:
diff changeset
90 roh returned for --min-snps = 20 would be:
14caa57eca63 Uploaded
iuc
parents:
diff changeset
91
14caa57eca63 Uploaded
iuc
parents:
diff changeset
92 ::
14caa57eca63 Uploaded
iuc
parents:
diff changeset
93
14caa57eca63 Uploaded
iuc
parents:
diff changeset
94 roh_run1 = H H H H h H H H H u H H H H H u H H H H H H H
14caa57eca63 Uploaded
iuc
parents:
diff changeset
95 roh_run2 = H H H H u H H H H H u H H H H H H H h H H H H H
14caa57eca63 Uploaded
iuc
parents:
diff changeset
96
14caa57eca63 Uploaded
iuc
parents:
diff changeset
97
14caa57eca63 Uploaded
iuc
parents:
diff changeset
98 As you can see, the immediate homozygous position right of a break (h or u) would be the possible
14caa57eca63 Uploaded
iuc
parents:
diff changeset
99 start of a new roh run and genotypes to the left of a break are pruned since they cannot
14caa57eca63 Uploaded
iuc
parents:
diff changeset
100 be part of a longer run than we have seen before.
14caa57eca63 Uploaded
iuc
parents:
diff changeset
101
14caa57eca63 Uploaded
iuc
parents:
diff changeset
102
14caa57eca63 Uploaded
iuc
parents:
diff changeset
103 @CITATION@
14caa57eca63 Uploaded
iuc
parents:
diff changeset
104 </help>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
105 <expand macro="citations"/>
14caa57eca63 Uploaded
iuc
parents:
diff changeset
106 </tool>