annotate MUMmer/mummer_clustering.xml @ 3:f807110e7c80

Corrected tool dependencies
author abossers
date Wed, 29 Oct 2014 10:36:29 +0100
parents 479eb076cd23
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
1 <tool id="mummer_clustering" name="MUMmer Clustering" version="0.9.alx" force_history_refresh="True">
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
2 <description>: order sequence matches in clusters</description>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
3 <command>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
4 <!-- update this path to the installed location -->
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
5 $tool.cmd
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
6 #if $tool.cmd=="gaps":
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
7 $in_reference
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
8 #if $tool.gaps_r=="yes":
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
9 -r
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
10 #end if
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
11 #end if
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
12 #if $tool.cmd=="mgaps":
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
13 #if $tool.cmd_C=="yes":
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
14 -C
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
15 #end if
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
16 -d $tool.cmd_d
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
17 #if $tool.cmd_e=="yes":
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
18 -e
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
19 #end if
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
20 -f $tool.cmd_f
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
21 -l $tool.cmd_l
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
22 -s $tool.cmd_s
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
23 #end if
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
24 &lt; $tool.in_match_list
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
25 &gt; $out_tool
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
26
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
27 </command>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
28 <inputs>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
29 <conditional name="tool">
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
30 <param name="cmd" type="select" label="MUMmer maximal matching" help="Algorithms are run with default parameters (none). For specific args see help below" >
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
31 <option value="gaps" selected="true">gaps</option>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
32 <option value="mgaps">mgaps</option>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
33 </param>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
34 <when value="gaps">
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
35 <param name="in_reference" type="data" format="fasta" label="Reference FastA file" />
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
36 <param name="gaps_r" type="select" label="Use reversed [-r]" >
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
37 <option value="no" selected="true">No</option>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
38 <option value="yes">Yes</option>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
39 </param>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
40 <param name="in_match_list" type="data" format="text" label="MUMmer match list" help="See help for more details" />
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
41 </when>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
42 <when value="mgaps">
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
43 <param name="in_match_list" type="data" format="text" label="MUMmer match list" help="See help for more details" />
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
44 <param name="cmd_C" type="select" label="Check input header labels have reversed keyword [-C]" >
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
45 <option value="no" selected="true">No</option>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
46 <option value="yes">Yes</option>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
47 </param>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
48 <param name="cmd_d" type="integer" size="5" value="5" label="Max fixed diagonal difference [-d]" />
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
49 <param name="cmd_e" type="select" label="Use extent of cluster [-e]" >
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
50 <option value="no" selected="true">No</option>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
51 <option value="yes">Yes</option>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
52 </param>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
53 <param name="cmd_f" type="float" size="5" value="0.05" label="Max fraction separation for diagonal difference [-f]" />
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
54 <param name="cmd_l" type="integer" size="5" value="200" label="Min cluster length [-l]" />
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
55 <param name="cmd_s" type="integer" size="5" value="1000" label="Max separation adjecent matches in cluster [-s]" />
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
56 </when>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
57 </conditional>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
58 </inputs>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
59 <outputs>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
60 <data name="out_tool" format="text" label="Clustering output" />
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
61 </outputs>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
62 <requirements>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
63 <!-- <requirement type="set_environment" version="3.23">MUMMER_PATH</requirement> -->
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
64 <requirement type="package" version="4.6.4">gnuplot</requirement>
3
f807110e7c80 Corrected tool dependencies
abossers
parents: 2
diff changeset
65 <requirement type="package" version="3.23">mummer</requirement>
2
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
66 </requirements>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
67 <tests>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
68 <test>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
69 </test>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
70 </tests>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
71 <help>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
72 |
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
73
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
74
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
75 **Reference**
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
76 =============
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
77
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
78 - **MUMmer clustering Galaxy tool wrapper:** Alex Bossers, CVI of Wageningen UR, The Netherlands.
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
79
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
80 - **MUMmer suite v3.22:** http://mummer.sourceforge.net
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
81
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
82 - **MUMmer tutorials:** http://mummer.sourceforge.net/examples/
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
83
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
84 If you found these tools/wrappers usefull in your research, please acknowledge our work. If you improve
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
85 or modify the wrappers please add instead of substitute yourself into the acknowlegement section :)
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
86
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
87
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
88 **MUMmer Clustering**
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
89 =====================
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
90
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
91 MUMmer's clustering algorithms attempt to order small individual matches into larger match clusters
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
92 in order to make the output of mummer more intelligible. A dot plot makes it easy to spot alignment
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
93 regions from a match list, however when examining the data without graphic aids, it is very difficult
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
94 to draw any reasonable conclusions from the simple flat file list of matches. Clustering the matches
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
95 together into larger groups of neighboring matches makes this process much easier by ordering the
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
96 data and removing spurious matches.
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
97
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
98
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
99 Gaps
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
100 ----
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
101
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
102 *gaps* is the primary clustering algorithm for run-mummer1, and although classified as a "clustering"
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
103 step, gaps is more of a sorting routine. It implements the LIS (longest increasing subset) algorithm
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
104 to extract the longest consistent set of matches between two sequences, and generates a single
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
105 cluster that represents the best "straight-line" arrangement of matches between the sequences. By
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
106 straight-line, we mean no rearrangements or inversions, just a simple path of agreeing matches
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
107 between the two sequences. This limits the usability of this program to the alignment of genomes
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
108 that are very similar and with no large scale mutations. *gaps* is best suited for the comparison of
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
109 near identical sequences with the goal of finding minor mutations like SNPs and small indels.
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
110
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
111 Input can be filtered mummer output. The strange syntax is a result of a legacy issue described in
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
112 the Known problems (manual) section, and requires the header be stripped from the mummer output. In
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
113 addition, gaps is only designed to handle a single reference and a single query sequence, thus the
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
114 preceding mummer run must also follow this constraint. The -r is optional and designates the incoming
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
115 matches as reverse complement matches which must reference the reverse complement of the sequence,
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
116 therefore forcing mummer to be run without the -c option.
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
117
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
118 Reference: http://mummer.sourceforge.net/manual/#gaps
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
119
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
120 **Output:**
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
121 ::
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
122
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
123 > /home/aphillip/data/GHP.1con Consistent matches
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
124 183 17 22 none - -
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
125 238 72 108 none 33 33
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
126 347 181 92 none 1 1
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
127 458 292 50 none 19 19
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
128 705 539 44 none 1 1
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
129 750 584 38 none 1 1
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
130 807 641 23 -16 0 4
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
131 (output continues ...)
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
132 > Wrap around
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
133 334398 329917 47 none - 225
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
134 334446 329965 62 none 1 1
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
135 334539 330058 20 none 31 31
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
136 334560 330079 92 none 1 1
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
137 334653 330172 77 none 1 1
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
138 334740 330259 41 none 10 10
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
139 (output continues ...)
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
140 > /home/aphillip/data/GHP.1con Other matches
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
141 1317231 4891 21 none - -
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
142 1317275 4927 21 none - -
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
143 1317804 5399 25 none 508 451
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
144 947580 5436 36 none - -
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
145 23406 5518 34 none - -
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
146 333079 6592 32 none - -
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
147 (output continues ...)
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
148
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
149 Where the first line is the location of the reference file, and the first three columns are the same
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
150 as the three column match format described in the mummer section. The final three columns are the
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
151 overlap between this match and the previous match, the gap between the start of this match and the
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
152 end of the previous match in the reference, and the gap between the start of this match and the end
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
153 of the previous match in the query respectively.
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
154
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
155
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
156 mgaps
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
157 -----
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
158
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
159 *mgaps* was introduced into the MUMmer pipeline in an effort to better handle large-scale
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
160 rearrangements and duplications. Unlike gaps, mgaps is a full clustering algorithm that is capable
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
161 of generating multiple groups of consistently ordered matches. Clustering is controlled by a set of
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
162 command-line parameters that adjust the minimum cluster size, maximum gap between matches, etc. Only
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
163 matches that were included in clusters will appear in the output, so by adjusting the command-line
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
164 parameters it is possible to filter out many of the spurious matches, thus leaving only the larger
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
165 areas of conservation between the input sequences. The major advantage of mgaps is its ability to
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
166 identify these "islands" of conservation. This frees the user from the single LIS restraints of the
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
167 gaps program and allows for the identification of large-scale rearrangements, duplications, gene
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
168 families and so on.
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
169
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
170 Gaps can fail to identify clusters because they were not consistent with the LIS. However, by using
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
171 mgaps, all regions of conservation can now been identified. The only fallback being the increased
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
172 complexity of the output, where you once had only one cluster for the whole comparison, you usually
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
173 now get more. Because of this, it can sometimes be difficult separating the repetitive clusters from
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
174 "correct" clusters, *making mgaps more suited for global alignments instead of localized error detection*.
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
175
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
176 Input can be raw mummer output. *mgaps* is only designed to handle a single reference and one or
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
177 more query sequences, thus the preceding mummer run must also follow this constraint. Please refer
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
178 to the run-mummer3 script (see online manual) for an example of how to use this program in an
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
179 alignment pipeline. Note that in order to cluster reverse complement matches, the reverse complement
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
180 matches must reference the reverse complement strand of the query sequence, therefore forcing mummer
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
181 to be run without the -c option. A rewrite of this algorithm to handle multiple reference sequences
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
182 and a better coordinate system (forward coordinates for reverse complement matches) is doubtful but
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
183 may eventually appear.
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
184
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
185 The -d option can be interpreted as the number of insertions allowed between two matches in the same
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
186 cluster, while the -f option is a fraction equal to (diagonal difference / match separation) where
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
187 a higher value will increase the indel tolerance. Minimum cluster length is the sum of the contained
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
188 matches unless the -e option is used. The best way to get a feel for what each parameter controls
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
189 is to cluster the same data set numerous times with different values and observe the resulting
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
190 differences. It can also be helpful to set these parameters to the size of the element you wish to
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
191 capture, i.e. set the minimum cluster size to say the smallest exon you expect and set the max gap
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
192 to the smallest intron you expect to obtain clusters that could represent single exons (depending
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
193 of course of the similarity of the two sequences).
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
194
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
195 Reference: http://mummer.sourceforge.net/manual/#mgaps
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
196
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
197 **Output format**
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
198
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
199 Output of *mgaps* shares much in common with the output of mummer and gaps, with a slightly different
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
200 header formatting than gaps to allow for multiple query sequences and multiple clusters. The output
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
201 of mgaps run on both forward and reverse complement matches is as follows:
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
202 ::
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
203
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
204 > ID41
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
205 > ID41 Reverse
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
206 5177399 1 232 none - -
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
207 5177632 234 6794 none 1 1
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
208 5184433 7035 24 none 7 7
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
209 5184468 7069 23 none 11 10
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
210 > ID42
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
211 10181 43 1521 none - -
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
212 > ID42 Reverse
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
213 4654536 17 36 none - -
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
214 4654578 57 298 none 6 4
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
215 4654877 356 226 none 1 1
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
216 #
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
217 4655139 845 28 none - -
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
218 4655178 884 694 none 11 11
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
219 4655873 1579 20 none 1 1
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
220 #
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
221 4850044 17 1492 none - -
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
222 4851537 1510 711 none 1 1
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
223 4852249 2222 42 none 1 1
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
224 (output continues ...)
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
225
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
226
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
227 Headers containing the ID for each query sequence are listed after the '>' characters, and a
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
228 following Reverse keyword identifies the reverse matches for that query sequence. Individual clusters
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
229 for each sequence are separated by a '#' character, and the six columns are exactly the same as the
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
230 gaps output (see the gaps section for more details).
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
231
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
232
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
233 |
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
234 |
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
235
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
236 </help>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
237 </tool>
479eb076cd23 Add revised mummer toolshed files to testtoolshed
abossers
parents:
diff changeset
238