|
0
|
1 <?xml version="1.0"?>
|
|
|
2 <tool id="windowmasker_ustat" name="WindowMasker_ustat" version="1.0">
|
|
|
3 <description>Mask sequences using a WindowMasker unit counts table</description>
|
|
|
4
|
|
|
5 <macros>
|
|
|
6 <import>windowmasker_macros.xml</import>
|
|
|
7 </macros>
|
|
|
8
|
|
|
9 <expand macro="requirements" />
|
|
|
10
|
|
|
11 <command detect_errors="exit_code">
|
|
|
12 <![CDATA[
|
|
|
13 @OPTIONAL_PARAM_FUNC@
|
|
|
14
|
|
|
15 windowmasker -ustat "${mkcount_input}"
|
|
|
16 -infmt fasta
|
|
|
17 -in "${fasta_input}"
|
|
|
18 -outfmt "${output_format}"
|
|
|
19 ${parse_seqids}
|
|
|
20
|
|
|
21 #if str($use_dust.use_dust_selector) == "yes":
|
|
|
22 -dust true -dust_level ${use_dust.dust_level}
|
|
|
23 #end if
|
|
|
24
|
|
|
25 $optional_param("-set_t_high", $adv.set_t_high)
|
|
|
26 $optional_param("-set_t_low", $adv.set_t_low)
|
|
|
27 $optional_param("-t_extend", $adv.t_extend)
|
|
|
28 $optional_param("-t_low", $adv.t_low)
|
|
|
29 $optional_param("-t_high", $adv.t_high)
|
|
|
30 $optional_param("-t_thres", $adv.t_thres)
|
|
|
31 $optional_param("-window", $adv.window)
|
|
|
32
|
|
|
33 ## Convert WindowMasker interval output to BED format
|
|
|
34 #if str($output_format) == "interval":
|
|
1
|
35 | ${__tool_directory__}/windowmasker_to_bed.pl > "${mask_output}"
|
|
0
|
36 #else
|
|
|
37 -out "${mask_output}"
|
|
|
38 #end if
|
|
|
39 ]]>
|
|
|
40 </command>
|
|
|
41 <inputs>
|
|
|
42 <param name="fasta_input" type="data" format="fasta"
|
|
|
43 label="FASTA sequence file" />
|
|
|
44
|
|
|
45 <param name="mkcount_input" type="data" format="txt"
|
|
|
46 label="Unit counts produced by WindowMasker mkcount" />
|
|
|
47
|
|
|
48 <param name="parse_seqids" type="boolean" checked="false"
|
|
|
49 truevalue="-parse_seqids" falsevalue=""
|
|
|
50 label="Parse Seq-ids in FASTA input"
|
|
|
51 help="-parse_seqids" />
|
|
|
52
|
|
|
53 <conditional name="use_dust">
|
|
|
54 <param name="use_dust_selector" type="select"
|
|
|
55 label="Use DUST to mask low complexity sequences?"
|
|
|
56 help="-dust">
|
|
|
57 <option value="yes">Yes</option>
|
|
|
58 <option value="no">No</option>
|
|
|
59 </param>
|
|
|
60
|
|
|
61 <when value="yes">
|
|
|
62 <param name="dust_level" type="integer"
|
|
|
63 min="1" value="20"
|
|
|
64 label="DUST level"
|
|
|
65 help="Score threshold for subwindows" />
|
|
|
66 </when>
|
|
|
67
|
|
|
68 <when value="no"></when>
|
|
|
69 </conditional>
|
|
|
70
|
|
|
71 <param name="output_format" type="select" label="Output format">
|
|
|
72 <option value="fasta">FASTA</option>
|
|
|
73 <option value="interval" selected="true">BED</option>
|
|
|
74 <option value="maskinfo_asn1_bin">maskinfo ASN.1 binary</option>
|
|
|
75 <option value="maskinfo_asn1_text">maskinfo ASN.1 text</option>
|
|
|
76 <option value="maskinfo_xml">maskinfo XML</option>
|
|
|
77 </param>
|
|
|
78
|
|
|
79 <section name="adv" title="Advanced options" expanded="false">
|
|
|
80
|
|
|
81 <param name="set_t_high" type="integer" label="set_t_high"
|
|
|
82 min="0" optional="true"
|
|
|
83 help="Score for units with unit count greater than T_high" />
|
|
|
84
|
|
|
85 <param name="set_t_low" type="integer" label="set_t_low"
|
|
|
86 min="0" optional="true"
|
|
|
87 help="Score for units with unit count less than T_low" />
|
|
|
88
|
|
|
89 <param name="t_extend" type="integer" label="t_extend"
|
|
|
90 min="0" optional="true"
|
|
|
91 help="Override the t_extend value in the unit counts file" />
|
|
|
92
|
|
|
93 <param name="t_low" type="integer" label="t_low"
|
|
|
94 min="0" optional="true"
|
|
|
95 help="Override the t_low value in the unit counts file" />
|
|
|
96
|
|
|
97 <param name="t_high" type="integer" label="t_high"
|
|
|
98 min="0" optional="true"
|
|
|
99 help="Override the t_high value in the unit counts file" />
|
|
|
100
|
|
|
101 <param name="t_thres" type="integer" label="t_threshold"
|
|
|
102 min="0" optional="true"
|
|
|
103 help="Override the score threshold value in the unit counts file" />
|
|
|
104
|
|
|
105 <param name="window" type="integer" label="window"
|
|
|
106 min="0" optional="true"
|
|
|
107 help="Size of the sliding window (default = unit_size + 4)" />
|
|
|
108 </section>
|
|
|
109 </inputs>
|
|
|
110 <outputs>
|
|
|
111 <data name="mask_output" format="bed">
|
|
|
112 <change_format>
|
|
|
113 <when input="output_format" value="fasta" format="fasta" />
|
|
|
114 <when input="output_format" value="maskinfo_asn1_bin" format="maskinfo-asn1-binary" />
|
|
|
115 <when input="output_format" value="maskinfo_asn1_text" format="maskinfo-asn1" />
|
|
|
116 <when input="output_format" value="maskinfo_xml" format="xml" />
|
|
|
117 </change_format>
|
|
|
118 </data>
|
|
|
119 </outputs>
|
|
|
120 <tests>
|
|
|
121 <test>
|
|
|
122 <!-- Test WindowMasker ustat with fasta output -->
|
|
|
123 <param name="fasta_input" value="contigs.fa" ftype="fasta" />
|
|
|
124 <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
|
|
|
125 <param name="output_format" value="fasta" />
|
|
|
126 <output name="mask_output" file="contigs.wm.fa" />
|
|
|
127 </test>
|
|
|
128 <test>
|
|
|
129 <!-- Test WindowMasker ustat with ASN.1 text output -->
|
|
|
130 <param name="fasta_input" value="contigs.fa" ftype="fasta" />
|
|
|
131 <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
|
|
|
132 <param name="output_format" value="maskinfo_asn1_text" />
|
|
|
133 <output name="mask_output" file="contigs.wm.asn1" />
|
|
|
134 </test>
|
|
|
135 <test>
|
|
|
136 <!-- Test WindowMasker ustat with ASN.1 binary output -->
|
|
|
137 <param name="fasta_input" value="contigs.fa" ftype="fasta" />
|
|
|
138 <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
|
|
|
139 <param name="output_format" value="maskinfo_asn1_bin" />
|
|
|
140 <output name="mask_output" file="contigs.wm.asnb" />
|
|
|
141 </test>
|
|
|
142 <test>
|
|
|
143 <!-- Test WindowMasker ustat with XML output -->
|
|
|
144 <param name="fasta_input" value="contigs.fa" ftype="fasta" />
|
|
|
145 <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
|
|
|
146 <param name="output_format" value="maskinfo_xml" />
|
|
|
147 <output name="mask_output" file="contigs.wm.xml" />
|
|
|
148 </test>
|
|
|
149 <test>
|
|
|
150 <!-- Test WindowMasker ustat with advanced settings -->
|
|
|
151 <param name="fasta_input" value="contigs.fa" ftype="fasta" />
|
|
|
152 <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
|
|
|
153 <param name="set_t_high" value="10" />
|
|
|
154 <param name="set_t_low" value="9" />
|
|
|
155 <param name="t_extend" value="5" />
|
|
|
156 <param name="t_low" value="9" />
|
|
|
157 <param name="t_high" value="10" />
|
|
|
158 <param name="t_thres" value="20" />
|
|
|
159 <param name="window" value="50" />
|
|
|
160 <output name="mask_output" file="contigs.advanced_wm.bed" />
|
|
|
161 </test>
|
|
|
162 <test>
|
|
|
163 <!-- Test WindowMasker ustat without dust -->
|
|
|
164 <param name="fasta_input" value="contigs.fa" ftype="fasta" />
|
|
|
165 <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
|
|
|
166 <param name="use_dust_selector" value="no" />
|
|
|
167 <output name="mask_output" file="contigs.nodust_wm.bed" />
|
|
|
168 </test>
|
|
|
169 <test>
|
|
|
170 <!-- Test WindowMasker ustat with parse Seq-ids -->
|
|
|
171 <param name="fasta_input" value="contigs.fa" ftype="fasta" />
|
|
|
172 <param name="mkcount_input" value="chr4_part.oascii.counts" ftype="txt" />
|
|
|
173 <param name="parse_seqids" value="-parse_seqids" />
|
|
|
174 <output name="mask_output" file="contigs.seqid_wm.bed" />
|
|
|
175 </test>
|
|
|
176 </tests>
|
|
|
177 <help>
|
|
|
178 <![CDATA[
|
|
|
179 **What it does**
|
|
|
180
|
|
|
181 This tool runs `stage 2 <https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/src/app/winmasker/>`_
|
|
|
182 of the WindowMasker analysis to identify repeats within the input sequences.
|
|
|
183
|
|
|
184 .. class:: infomark
|
|
|
185
|
|
|
186 **Output formats:**
|
|
|
187
|
|
|
188 * Use the **binary or text maskinfo ASN.1** output formats to generate the mask file for
|
|
|
189 the `NCBI BLAST+ makeblastdb tool <https://www.ncbi.nlm.nih.gov/books/NBK279681/#_cookbook_Create_BLAST_database_with_the_>`_
|
|
|
190 * Use the BED output format to generate a list of masked regions
|
|
|
191
|
|
|
192
|
|
|
193 .. class:: infomark
|
|
|
194
|
|
|
195 **Advanced options:**
|
|
|
196
|
|
|
197 * See the `WindowMasker README file <https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/src/app/winmasker/README>`_
|
|
|
198 for additional details on the WindowMasker repeat masking options
|
|
|
199
|
|
|
200 ]]></help>
|
|
|
201
|
|
|
202 <expand macro="citations" />
|
|
|
203 </tool>
|