comparison yahs.xml @ 5:c4b9b2d57fe1 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/yahs commit ab918ac1eab72932e78c6e45e46d745543eac810
author iuc
date Wed, 17 Sep 2025 06:27:53 +0000
parents d802668bc0fe
children
comparison
equal deleted inserted replaced
4:d802668bc0fe 5:c4b9b2d57fe1
1 <tool id="yahs" name="YAHS" version="@VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> 1 <tool id="yahs" name="YAHS" version="@VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
2 <description>yet another HI-C scaffolding tool</description> 2 <description>yet another HI-C scaffolding tool</description>
3 <macros> 3 <macros>
4 <token name="@VERSION@">1.2a.2</token> 4 <token name="@VERSION@">1.2a.2</token>
5 <token name="@VERSION_SUFFIX@">1</token> 5 <token name="@VERSION_SUFFIX@">3</token>
6 </macros> 6 </macros>
7 <requirements> 7 <requirements>
8 <requirement type="package" version="@VERSION@">yahs</requirement> 8 <requirement type="package" version="@VERSION@">yahs</requirement>
9 <requirement type="package" version="1.11">samtools</requirement> 9 <requirement type="package" version="1.11">samtools</requirement>
10 <requirement type="package" version="3.9">python</requirement> 10 <requirement type="package" version="3.9">python</requirement>
11 </requirements> 11 </requirements>
12 <command detect_errors="exit_code"><![CDATA[ 12 <command detect_errors="exit_code"><![CDATA[
13 #if $function.function_select == "yahs": 13 #if $function.function_select == "yahs":
14 ln -s '$function.fasta' input.fasta && 14 ln -s '$function.fasta' input.fasta &&
15 #if $function.bfile.ext == "bam": 15 ln -s '$function.bfile' input.$function.bfile.ext &&
16 ln -s '$function.bfile' input.bam &&
17 #else if $function.bfile.ext == "bed":
18 ln -s '$function.bfile' input.bed &&
19 #end if
20 #if $function.agp: 16 #if $function.agp:
21 ln -s '$function.agp' input.agp && 17 ln -s '$function.agp' input.agp &&
22 #end if 18 #end if
23 samtools faidx input.fasta && 19 samtools faidx input.fasta &&
24 mkdir initial_break agp_out agp_break final_outs && 20 mkdir initial_break agp_out agp_break final_outs &&
25 yahs --no-mem-check input.fasta 21 yahs --no-mem-check input.fasta input.$function.bfile.ext
26 #if $function.bfile.ext == "bam":
27 input.bam
28 #else if $function.bfile.ext == "bed":
29 input.bed
30 #end if
31 #if $agp: 22 #if $agp:
32 -a input.agp 23 -a input.agp
33 #end if 24 #end if
34 #if $function.res: 25 #if $function.res:
35 -r $function.res 26 -r $function.res
81 <option value="yahs">YAHS</option> 72 <option value="yahs">YAHS</option>
82 <option value="agp_to_fasta">AGP to fasta</option> 73 <option value="agp_to_fasta">AGP to fasta</option>
83 </param> 74 </param>
84 <when value="yahs"> 75 <when value="yahs">
85 <param name="fasta" type="data" format="fasta" label="Input contig sequences"/> 76 <param name="fasta" type="data" format="fasta" label="Input contig sequences"/>
86 <param name="bfile" type="data" format="bam,bed" label="Alignment file of Hi-C reads to contigs"/> 77 <param name="bfile" type="data" format="bam,bed,qname_sorted.bam,unsorted.bam" label="Alignment file of Hi-C reads to contigs. NOTE: The input BAM could either be sorted by read names (qname_sorted.bam) or not. The behaviours of the program are slightly different, which might lead to slightly different scaffolding results. For a BAM input sorted by read names, with each mapped read pair, a Hi-C link is counted between the middle positions of the read alignments; while for a BAM input sorted by coordinates or unsorted, Hi-C links are counted between the start positions of the read alignments. Also, for a BAM input not sorted by read names, the mapping quality filtering is suppressed (-q option). If a bed file is provided: the BAM file used to genereate BED file need to be filtered out unmapped reads, supplementary/secondary alignment records, and PCR/optical duplicates, and sorted by read names (otherwise the resulted BED file need to be sorted by the read name column)."/>
87 <param name="agp" argument="-a" type="data" format="agp" optional="true" label="Input AGP file (for rescaffolding)" 78 <param name="agp" argument="-a" type="data" format="agp" optional="true" label="Input AGP file (for rescaffolding)" help="You can specify a AGP format file to ask YaHS to do scaffolding with the scaffolds in the AGP file as the start point"/>
88 help="You can specify a AGP format file to ask YaHS to do scaffolding with the scaffolds in the AGP file as the start point"/> 79 <param name="res" argument="-r" type="text" label="Resolutions" optional="true" help="Comma separated, ascending list of range of resolutions with no spaces. Ex. 50000,100000,200000,500000,1000000,2000000,5000000. By default and the upper limit is automatically adjusted with the genome size">
89 <param name="res" argument="-r" type="text" label="Resolutions" optional="true" 80 <validator type="regex" message="Only Numbers and commas can be used in to define the list of range of resolutions.">^\d+(,\d+)*$</validator>
90 help="Comma separated, ascending list of range of resolutions with no spaces. Ex. 50000,100000,200000,500000,1000000,2000000,5000000. By default and the upper limit is automatically adjusted with the genome size"/> 81 </param>
91 <conditional name="enzyme_conditional"> 82 <conditional name="enzyme_conditional">
92 <param name="enzyme_options" type="select" label="Restriction enzyme used in Hi-C experiment" help="Hi-C experiments can use different restriction enzymes. 83 <param name="enzyme_options" type="select" label="Restriction enzyme used in Hi-C experiment" help="Hi-C experiments can use different restriction enzymes.
93 The enzyme frequency in contigs is used to normalize the Hi-C interaction frequency. Note that you need to specify the actual 84 The enzyme frequency in contigs is used to normalize the Hi-C interaction frequency. Note that you need to specify the actual
94 sequence of the cutting site for a restriction enzyme and not the enzyme name. You can also specify DNASE as an enzyme if you 85 sequence of the cutting site for a restriction enzyme and not the enzyme name. You can also specify DNASE as an enzyme if you
95 use an enzyme-free prep, e.g. Omin-C."> 86 use an enzyme-free prep, e.g. Omni-C.">
96 <option value="not_specified">Not specified</option> 87 <option value="not_specified">Not specified</option>
97 <option value="preconfigured">Preconfigured restriction enzymes</option> 88 <option value="preconfigured">Preconfigured restriction enzymes</option>
98 <option value="specific">Enter a specific sequence</option> 89 <option value="specific">Enter a specific sequence</option>
99 </param> 90 </param>
100 <when value="not_specified"/> 91 <when value="not_specified"/>
105 <option value="arima2">Arima Hi-C 2.0: GATC, GANTC, CTNAG, TTAA</option> 96 <option value="arima2">Arima Hi-C 2.0: GATC, GANTC, CTNAG, TTAA</option>
106 <option value="omnic">Dovetail Omni-C: enzyme-free prep</option> 97 <option value="omnic">Dovetail Omni-C: enzyme-free prep</option>
107 </param> 98 </param>
108 </when> 99 </when>
109 <when value="specific"> 100 <when value="specific">
110 <param name="manual_enzyme" argument="-e" type="text" label="Restriction enzyme sequence(s)" 101 <param name="manual_enzyme" argument="-e" type="text" label="Restriction enzyme sequence(s)" help="Restriction enzyme sequence. If multiple were used, include all as a comma separated list without spaces (ex. 'GATC,AAGCTT').">
111 help="Restriction enzyme sequence. If multiple were used, include all as a comma separated list without spaces (ex. 'GATC,AAGCTT')."> 102 <validator type="expression" message="Only alphabetical letters and the comma can be used in to define restriction enzyme sequences.">value.replace(',', '').isalpha()</validator>
112 <validator type="expression" message="Only alphabetical letters and the comma can be used in to define restriction enzym sequences.">value.replace(',', '').isalpha()</validator>
113 </param> 103 </param>
114 </when> 104 </when>
115 </conditional> 105 </conditional>
116 <param name="length" argument="-l" type="integer" label="Minimum contig length included for scaffolding" min="1" optional="true"/> 106 <param name="length" argument="-l" type="integer" label="Minimum contig length included for scaffolding" min="1" optional="true"/>
117 <param name="quality" argument="-q" type="integer" label="Minimum read mapping quality (for BAM input only)" min="1" optional="true"/> 107 <param name="quality" argument="-q" type="integer" label="Minimum read mapping quality (for BAM input only)" min="1" optional="true"/>
119 <param argument="--no-scaffold-ec" type="boolean" label="Skip assembly/scaffolding error check each round" truevalue="--no-scaffold-ec" falsevalue="" help="Will also prevent any break.agp output files"/> 109 <param argument="--no-scaffold-ec" type="boolean" label="Skip assembly/scaffolding error check each round" truevalue="--no-scaffold-ec" falsevalue="" help="Will also prevent any break.agp output files"/>
120 </when> 110 </when>
121 <when value="agp_to_fasta"> 111 <when value="agp_to_fasta">
122 <param name="agp" type="data" format="agp" label="Input AGP file"/> 112 <param name="agp" type="data" format="agp" label="Input AGP file"/>
123 <param name="fasta" type="data" format="fasta" label="Contig fasta file"/> 113 <param name="fasta" type="data" format="fasta" label="Contig fasta file"/>
124 <param name="length" type='integer' label="Output fasta line length" value="60" min="1"/> 114 <param name="length" type="integer" label="Output fasta line length" value="60" min="1"/>
125 </when> 115 </when>
126 </conditional> 116 </conditional>
127 <param name="log_out" type="boolean" label="Output log file?" truevalue="yes" falsevalue="no"/> 117 <param name="log_out" type="boolean" label="Output log file?" truevalue="yes" falsevalue="no"/>
128 </inputs> 118 </inputs>
129 <outputs> 119 <outputs>
157 <tests> 147 <tests>
158 <!-- TEST 1 --> 148 <!-- TEST 1 -->
159 <test expect_num_outputs="5"> 149 <test expect_num_outputs="5">
160 <conditional name="function"> 150 <conditional name="function">
161 <param name="function_select" value="yahs"/> 151 <param name="function_select" value="yahs"/>
162 <param name="fasta" value="test.fasta"/> 152 <param name="fasta" value="test.fasta" ftype="fasta"/>
163 <param name="bfile" value="test.bed"/> 153 <param name="bfile" value="test.bed" ftype="bed" />
164 <param name="agp" value="test.agp"/> 154 <param name="agp" value="test.agp" ftype="agp"/>
165 <param name="res" value="50000,100000,150000,2000000,1000000"/> 155 <param name="res" value="50000,100000,150000,2000000,1000000"/>
166 </conditional> 156 </conditional>
167 <output name="final_agp_out" file="test_01_scaffolds_final.agp" ftype="agp"/> 157 <output name="final_agp_out" file="test_01_scaffolds_final.agp" ftype="agp"/>
168 <output name="final_fasta_out" file="test_01_scaffolds_final.fa" ftype="fasta"/> 158 <output name="final_fasta_out" file="test_01_scaffolds_final.fa" ftype="fasta"/>
169 <!-- COMMAND: yahs test.fasta test.bed -r 50000,100000,150000,2000000,1000000 -a test.agp -o test_1 --> 159 <!-- COMMAND: yahs test.fasta test.bed -r 50000,100000,150000,2000000,1000000 -a test.agp -o test_1 -->
170 </test> 160 </test>
171 <!-- TEST 2 --> 161 <!-- TEST 2 -->
172 <test expect_num_outputs="5"> 162 <test expect_num_outputs="5">
173 <conditional name="function"> 163 <conditional name="function">
174 <param name="function_select" value="yahs"/> 164 <param name="function_select" value="yahs"/>
175 <param name="fasta" value="test.fasta"/> 165 <param name="fasta" value="test.fasta" ftype="fasta"/>
176 <param name="bfile" value="test.bed"/> 166 <param name="bfile" value="test.bed" ftype="bed"/>
177 <param name="no_contig_ec" value="--no-contig-ec"/> 167 <param name="no_contig_ec" value="true"/>
178 <param name="no_scaffold_ec" value="--no-scaffold-ec"/> 168 <param name="no_scaffold_ec" value="true"/>
179 </conditional> 169 </conditional>
180 <output name="final_agp_out" file="test_02_scaffolds_final.agp" ftype="agp"/> 170 <output name="final_agp_out" file="test_02_scaffolds_final.agp" ftype="agp"/>
181 <output name="final_fasta_out" file="test_02_scaffolds_final.fa" ftype="fasta"/> 171 <output name="final_fasta_out" file="test_02_scaffolds_final.fa" ftype="fasta"/>
182 <output_collection name="agp_break"> 172 <output_collection name="agp_break">
183 <element name="yahs_out_no_break" file="test_02_no_break.agp" ftype="agp"/> 173 <element name="yahs_out_no_break" file="test_02_no_break.agp" ftype="agp"/>
186 </test> 176 </test>
187 <!-- TEST 3 --> 177 <!-- TEST 3 -->
188 <test expect_num_outputs="6"> 178 <test expect_num_outputs="6">
189 <conditional name="function"> 179 <conditional name="function">
190 <param name="function_select" value="yahs"/> 180 <param name="function_select" value="yahs"/>
191 <param name="fasta" value="test2.fasta"/> 181 <param name="fasta" value="test2.fasta" ftype="fasta"/>
192 <param name="bfile" value="test2.bam"/> 182 <param name="bfile" value="test2.bam" ftype="bam"/>
193 <param name="res" value="1000,2000,5000,10000,20000,50000,100000,200000,500000"/> 183 <param name="res" value="1000,2000,5000,10000,20000,50000,100000,200000,500000"/>
194 <conditional name="enzyme_conditional"> 184 <conditional name="enzyme_conditional">
195 <param name="enzyme_options" value="not_specified"/> 185 <param name="enzyme_options" value="not_specified"/>
196 </conditional> 186 </conditional>
197 </conditional> 187 </conditional>
198 <param name="log_out" value="yes"/> 188 <param name="log_out" value="yes"/>
199 <output name="log_file" ftype="txt"> 189 <output name="log_file" ftype="txt">
200 <assert_contents> 190 <assert_contents>
201 <has_text text="[I::dump_links_from_bam_file] dumped 6399 read pairs from 17675 records: 6297 intra links + 102 inter links" /> 191 <has_text text="[I::dump_links_from_bam_file] dumped 6399 read pairs from 17675 records: 6297 intra links + 102 inter links"/>
202 </assert_contents> 192 </assert_contents>
203 </output> 193 </output>
204 <!-- COMMAND: yahs test.fasta test.bam -r 1000,2000,5000,10000,20000,50000,100000,200000,500000 -o test_3 --> 194 <!-- COMMAND: yahs test.fasta test.bam -r 1000,2000,5000,10000,20000,50000,100000,200000,500000 -o test_3 -->
205 </test> 195 </test>
206 <!-- TEST 4 --> 196 <!-- TEST 4 -->
207 <test expect_num_outputs="5"> 197 <test expect_num_outputs="5">
208 <conditional name="function"> 198 <conditional name="function">
209 <param name="function_select" value="yahs"/> 199 <param name="function_select" value="yahs"/>
210 <param name="fasta" value="test2.fasta"/> 200 <param name="fasta" value="test2.fasta" ftype="fasta"/>
211 <param name="bfile" value="test2.bed"/> 201 <param name="bfile" value="test2.bed" ftype="bed"/>
212 <param name="qual" value="10"/> 202 <param name="quality" value="10"/>
213 <param name="length" value="20"/> 203 <param name="length" value="20"/>
214 <conditional name="enzyme_conditional"> 204 <conditional name="enzyme_conditional">
215 <param name="enzyme_options" value="not_specified"/> 205 <param name="enzyme_options" value="not_specified"/>
216 </conditional> 206 </conditional>
217 </conditional> 207 </conditional>
223 </test> 213 </test>
224 <!-- TEST 5 --> 214 <!-- TEST 5 -->
225 <test expect_num_outputs="1"> 215 <test expect_num_outputs="1">
226 <conditional name="function"> 216 <conditional name="function">
227 <param name="function_select" value="agp_to_fasta"/> 217 <param name="function_select" value="agp_to_fasta"/>
228 <param name="fasta" value="test.fasta"/> 218 <param name="fasta" value="test.fasta" ftype="fasta"/>
229 <param name="agp" value="test.agp"/> 219 <param name="agp" value="test.agp" ftype="agp"/>
230 <param name="length" value="20"/> 220 <param name="length" value="20"/>
231 </conditional> 221 </conditional>
232 <output name="fasta_from_agp" file="test_05.fasta" ftype="fasta"/> 222 <output name="fasta_from_agp" file="test_05.fasta" ftype="fasta"/>
233 <!-- COMMAND: agp_to_fasta test.fasta test.agp -l 20 -o test_4 --> 223 <!-- COMMAND: agp_to_fasta test.fasta test.agp -l 20 -o test_4 -->
234 </test> 224 </test>
235 <!-- TEST 6 --> 225 <!-- TEST 6 -->
236 <test expect_num_outputs="6"> 226 <test expect_num_outputs="6">
237 <conditional name="function"> 227 <conditional name="function">
238 <param name="function_select" value="yahs"/> 228 <param name="function_select" value="yahs"/>
239 <param name="fasta" value="test.fasta"/> 229 <param name="fasta" value="test.fasta" ftype="fasta"/>
240 <param name="bfile" value="test.bed"/> 230 <param name="bfile" value="test.bed" ftype="bed"/>
241 <param name="agp" value="test.agp"/> 231 <param name="agp" value="test.agp" ftype="agp"/>
242 <param name="res" value="50000,100000,150000,2000000,1000000"/> 232 <param name="res" value="50000,100000,150000,2000000,1000000"/>
243 </conditional> 233 </conditional>
244 <param name="log_out" value="yes"/> 234 <param name="log_out" value="yes"/>
245 <output name="final_agp_out" file="test_01_scaffolds_final.agp" ftype="agp"/> 235 <output name="final_agp_out" file="test_01_scaffolds_final.agp" ftype="agp"/>
246 <output name="final_fasta_out" file="test_01_scaffolds_final.fa" ftype="fasta"/> 236 <output name="final_fasta_out" file="test_01_scaffolds_final.fa" ftype="fasta"/>
253 </test> 243 </test>
254 <!-- TEST 7: omnic prep --> 244 <!-- TEST 7: omnic prep -->
255 <test expect_num_outputs="6"> 245 <test expect_num_outputs="6">
256 <conditional name="function"> 246 <conditional name="function">
257 <param name="function_select" value="yahs"/> 247 <param name="function_select" value="yahs"/>
258 <param name="fasta" value="test.fasta"/> 248 <param name="fasta" value="test.fasta" ftype="fasta"/>
259 <param name="bfile" value="test.bed"/> 249 <param name="bfile" value="test.bed" ftype="bed"/>
260 </conditional> 250 <conditional name="enzyme_conditional">
261 <conditional name="enzyme_conditional"> 251 <param name="enzyme_options" value="preconfigured"/>
262 <param name="enzyme_options" value="preconfigured"/> 252 <param name="preconfigured_enzymes" value="omnic"/>
263 <param name="preconfigured_enzymes" value="omnic"/> 253 </conditional>
264 </conditional> 254 </conditional>
265 <param name="log_out" value="yes"/> 255 <param name="log_out" value="yes"/>
266 <output name="log_file" ftype="txt"> 256 <output name="log_file" ftype="txt">
267 <assert_contents> 257 <assert_contents>
268 <not_has_text text="-e"/> 258 <not_has_text text="-e"/>
269 </assert_contents> 259 </assert_contents>
270 </output> 260 </output>
261 </test>
262 <!-- TEST 8: qname_sorted-->
263 <test expect_num_outputs="6">
264 <conditional name="function">
265 <param name="function_select" value="yahs"/>
266 <param name="fasta" value="test2.fasta" ftype="fasta"/>
267 <param name="bfile" value="test3.qname_sorted.bam" ftype="qname_sorted.bam"/>
268 <param name="res" value="1000,2000,5000,10000,20000,50000,100000,200000,500000"/>
269 <conditional name="enzyme_conditional">
270 <param name="enzyme_options" value="not_specified"/>
271 </conditional>
272 </conditional>
273 <param name="log_out" value="yes"/>
274 <output name="log_file" ftype="txt">
275 <assert_contents>
276 <has_text text="[I::dump_links_from_bam_file] dumped 6399 read pairs from 17675 records: 6399 intra links + 0 inter links"/>
277 </assert_contents>
278 </output>
279 </test>
280 <!-- TEST 9: unsorted-->
281 <test expect_num_outputs="6">
282 <conditional name="function">
283 <param name="function_select" value="yahs"/>
284 <param name="fasta" value="test2.fasta" ftype="fasta"/>
285 <param name="bfile" value="test2.unsorted.bam" ftype="bam"/>
286 <param name="res" value="1000,2000,5000,10000,20000,50000,100000,200000,500000"/>
287 <conditional name="enzyme_conditional">
288 <param name="enzyme_options" value="not_specified"/>
289 </conditional>
290 </conditional>
291 <param name="log_out" value="yes"/>
292 <output name="log_file" ftype="txt">
293 <assert_contents>
294 <has_text text="[I::dump_links_from_bam_file] dumped 6399 read pairs from 17675 records: 6297 intra links + 102 inter links"/>
295 </assert_contents>
296 </output>
297 <!-- COMMAND: yahs test.fasta test.bam -r 1000,2000,5000,10000,20000,50000,100000,200000,500000 -o test_3 -->
271 </test> 298 </test>
272 </tests> 299 </tests>
273 <help><![CDATA[ 300 <help><![CDATA[
274 YaHS is scaffolding tool using Hi-C data. It relies on a new algothrim for contig joining detection which considers the topological distribution of Hi-C signals aiming to distingush real interaction signals from mapping nosies. YaHS has been tested in a wide range of genome assemblies. Compared to other Hi-C scaffolding tools, it usually generates more contiguous scaffolds - especially with a higher N90 and L90 statistics. It is also super fast - takes less than 5 minutes to reconstruct the human genome from an assembly of 5,483 contigs with ~45X Hi-C data. 301 YaHS is scaffolding tool using Hi-C data. It relies on a new algorithm for contig joining detection which considers the topological distribution of Hi-C signals aiming to distingush real interaction signals from mapping nosies. YaHS has been tested in a wide range of genome assemblies. Compared to other Hi-C scaffolding tools, it usually generates more contiguous scaffolds - especially with a higher N90 and L90 statistics. It is also super fast - takes less than 5 minutes to reconstruct the human genome from an assembly of 5,483 contigs with ~45X Hi-C data.
275 ]]></help> 302 ]]></help>
276 <citations> 303 <citations>
277 <citation type="doi">10.5281/zenodo.5848772</citation> 304 <citation type="doi">10.5281/zenodo.5848772</citation>
278 </citations> 305 </citations>
279 </tool> 306 </tool>