Mercurial > repos > iuc > necat
comparison necat.xml @ 0:cee311483327 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/necat commit 6946d81de9419c90e9bc4ea2f7bd5e4168dd6dd6
| author | iuc |
|---|---|
| date | Fri, 25 Nov 2022 14:23:52 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:cee311483327 |
|---|---|
| 1 <tool id="necat" name="necat" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT"> | |
| 2 <description>Error correction and de-novo assembly for ONT Nanopore reads</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <xrefs> | |
| 7 <xref type="bio.tools">necat</xref> | |
| 8 </xrefs> | |
| 9 <requirements> | |
| 10 <requirement type="package" version="@TOOL_VERSION@">necat</requirement> | |
| 11 </requirements> | |
| 12 <command detect_errors="exit_code"><![CDATA[ | |
| 13 ## helper function | |
| 14 #def make_filename($i, $input_param) | |
| 15 #set ext = $input_param.extension | |
| 16 #if $ext == "fastqsanger" | |
| 17 #set $ext = "fastq" | |
| 18 #end if | |
| 19 #set filename = "reads_" + str($i) + "." + $ext | |
| 20 #return $filename | |
| 21 #end def | |
| 22 | |
| 23 ## push each input file and everything in input collections into read_list.txt | |
| 24 #set i = 1 | |
| 25 #for input in $input_fastqs | |
| 26 #set filename = $make_filename($i, $input) | |
| 27 cp '$input' $filename | |
| 28 && echo $filename >> read_list.txt && | |
| 29 #set i = $i + 1 | |
| 30 #end for | |
| 31 | |
| 32 ## #for $i, $input in enumerate($input_fastqs): | |
| 33 ## #set filename = 'reads_${i}.$input.ext' | |
| 34 ## ln -s '$input' $filename && | |
| 35 ## echo $filename >> read_list.txt && | |
| 36 ## #end for | |
| 37 | |
| 38 ## necat commands | |
| 39 necat correct '${job_configfile}' | |
| 40 #if $assembly.should_assemble == "yes": | |
| 41 && necat assemble '${job_configfile}' | |
| 42 && necat bridge '${job_configfile}' | |
| 43 #end if | |
| 44 ]]></command> | |
| 45 <configfiles> | |
| 46 <expand macro="job_conf" /> | |
| 47 </configfiles> | |
| 48 <inputs> | |
| 49 <param name="input_fastqs" type="data" format="fastq,fastq.gz,fasta,fasta.gz" multiple="true" label="Input reads" help="Input read files (FASTQ or FASTA). To select more than one file or collection from your history, use the 'ctrl' key" /> | |
| 50 | |
| 51 <param name="genome_size" type="integer" value="" min="1" max="100000000000" label="Genome size" help="Estimated size of genome (bp)" /> | |
| 52 <param name="min_read_length" type="integer" value="1000" min="1" max="10000000" label="Min read length" help="Minimum length for input reads" /> | |
| 53 <param name="correction_coverage" type="integer" value="40" min="1" max="10000" label="Correction coverage" help="Number of reads to correct in terms of genome coverage. For a 4Gb genome, setting correction coverage = 10 will correct the longest 40Gb worth of reads from the input fastq. " /> | |
| 54 <conditional name="assembly"> | |
| 55 <param name="should_assemble" type="select" label="Assembly"> | |
| 56 <option value="no" selected="true">Don't perform assembly</option> | |
| 57 <option value="yes">Perform assembly on corrected reads</option> | |
| 58 </param> | |
| 59 <when value="no" /> | |
| 60 <when value="yes"> | |
| 61 <param name="assembly_coverage" type="integer" value="30" min="1" max="10000" label="Assembly coverage" help="Number of reads to use in genome assembly in terms of genome coverage" /> | |
| 62 <param name="polish_contigs" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Polish contigs" help="Polish contigs as final step after briding" /> | |
| 63 </when> | |
| 64 </conditional> | |
| 65 | |
| 66 <section name="adv" title="Advanced options" expanded="false" help="Warning: only change these if you really know what you are doing"> | |
| 67 <expand macro="overlap_sensitive_options" /> | |
| 68 <expand macro="consensus_sensitive_options" /> | |
| 69 <expand macro="overlap_fast_options" /> | |
| 70 <expand macro="consensus_fast_options" /> | |
| 71 <expand macro="trimming_overlap_options" /> | |
| 72 <expand macro="assembly_overlap_options" /> | |
| 73 <expand macro="assembly_overlap_filtering" /> | |
| 74 <expand macro="contig_assembly" /> | |
| 75 <expand macro="contig_bridging" /> | |
| 76 </section> | |
| 77 </inputs> | |
| 78 <outputs> | |
| 79 <data name="out_reads" format="fasta.gz" from_work_dir="project/1-consensus/cns_final.fasta.gz" label="${tool.name} on ${on_string}: corrected reads" /> | |
| 80 <data name="out_assembly" format="fasta" from_work_dir="project/6-bridge_contigs/polished_contigs.fasta" label="${tool.name} on ${on_string}: bridged assembly"> | |
| 81 <filter>assembly['should_assemble'] == 'yes' and not assembly['polish_contigs']</filter> | |
| 82 </data> | |
| 83 <data name="out_polished_assembly" format="fasta" from_work_dir="project/6-bridge_contigs/bridged_contigs.fasta" label="${tool.name} on ${on_string}: polished assembly"> | |
| 84 <filter>assembly['should_assemble'] == 'yes' and assembly['polish_contigs']</filter> | |
| 85 </data> | |
| 86 </outputs> | |
| 87 <tests> | |
| 88 <!-- single input fastq --> | |
| 89 <test expect_num_outputs="2"> | |
| 90 <param name="input_fastqs" value="test1.fa" /> | |
| 91 <param name="genome_size" value="13000" /> | |
| 92 <param name="min_read_length" value="1000" /> | |
| 93 <param name="correction_coverage" value="40" /> | |
| 94 <conditional name="assembly"> | |
| 95 <param name="should_assemble" value="yes" /> | |
| 96 <param name="assembly_coverage" value="30"/> | |
| 97 <param name="polish_contigs" value="true"/> | |
| 98 </conditional> | |
| 99 <output name="out_reads" ftype="fasta.gz"> | |
| 100 <assert_contents> | |
| 101 <has_size value="75000" delta="2000" /> | |
| 102 </assert_contents> | |
| 103 </output> | |
| 104 <output name="out_polished_assembly" ftype="fasta"> | |
| 105 <assert_contents> | |
| 106 <has_line line=">bctg00000000 000000F" /> | |
| 107 <has_size value="13000" delta="1000" /> | |
| 108 </assert_contents> | |
| 109 </output> | |
| 110 </test> | |
| 111 <!-- multiple input files of different format --> | |
| 112 <test expect_num_outputs="2"> | |
| 113 <param name="input_fastqs" value="test1_head.fastq,test1_tail.fasta" /> | |
| 114 <param name="genome_size" value="13000" /> | |
| 115 <param name="min_read_length" value="1000" /> | |
| 116 <param name="correction_coverage" value="40" /> | |
| 117 <conditional name="assembly"> | |
| 118 <param name="should_assemble" value="yes" /> | |
| 119 <param name="assembly_coverage" value="30"/> | |
| 120 <param name="polish_contigs" value="true"/> | |
| 121 </conditional> | |
| 122 <output name="out_reads" ftype="fasta.gz"> | |
| 123 <assert_contents> | |
| 124 <has_size value="29000" delta="2000" /> | |
| 125 </assert_contents> | |
| 126 </output> | |
| 127 <output name="out_polished_assembly" ftype="fasta"> | |
| 128 <assert_contents> | |
| 129 <has_line line=">bctg00000000 000000F" /> | |
| 130 <has_size value="13000" delta="1000" /> | |
| 131 </assert_contents> | |
| 132 </output> | |
| 133 </test> | |
| 134 <!-- advanced params 1 --> | |
| 135 <test expect_num_outputs="2"> | |
| 136 <param name="input_fastqs" value="test1.fa" /> | |
| 137 <param name="genome_size" value="13000" /> | |
| 138 <param name="min_read_length" value="1000" /> | |
| 139 <param name="correction_coverage" value="40" /> | |
| 140 <conditional name="assembly"> | |
| 141 <param name="should_assemble" value="yes" /> | |
| 142 <param name="assembly_coverage" value="30"/> | |
| 143 <param name="polish_contigs" value="true"/> | |
| 144 </conditional> | |
| 145 <section name="adv"> | |
| 146 <section name="ovs"> | |
| 147 <param name="n" value="600" /> | |
| 148 <param name="k" value="14" /> | |
| 149 <param name="q" value="600" /> | |
| 150 <param name="z" value="15" /> | |
| 151 <param name="b" value="2500" /> | |
| 152 <param name="a" value="800" /> | |
| 153 <param name="d" value="0.25" /> | |
| 154 <param name="e" value="0.4" /> | |
| 155 <param name="m" value="600" /> | |
| 156 </section> | |
| 157 </section> | |
| 158 <output name="out_reads" ftype="fasta.gz"> | |
| 159 <assert_contents> | |
| 160 <has_size value="75000" delta="2000" /> | |
| 161 </assert_contents> | |
| 162 </output> | |
| 163 <output name="out_polished_assembly" ftype="fasta"> | |
| 164 <assert_contents> | |
| 165 <has_line line=">bctg00000000 000000F" /> | |
| 166 <has_size value="13000" delta="1000" /> | |
| 167 </assert_contents> | |
| 168 </output> | |
| 169 </test> | |
| 170 <!-- advanced params 2 --> | |
| 171 <test expect_num_outputs="2"> | |
| 172 <param name="input_fastqs" value="test1.fa" /> | |
| 173 <param name="genome_size" value="13000" /> | |
| 174 <param name="min_read_length" value="1000" /> | |
| 175 <param name="correction_coverage" value="40" /> | |
| 176 <conditional name="assembly"> | |
| 177 <param name="should_assemble" value="yes" /> | |
| 178 <param name="assembly_coverage" value="30"/> | |
| 179 <param name="polish_contigs" value="true"/> | |
| 180 </conditional> | |
| 181 <section name="adv"> | |
| 182 <section name="fol"> | |
| 183 <param name="min_length" value="2000" /> | |
| 184 <param name="max_length" value="200000" /> | |
| 185 <param name="min_aligned_length" value="2000" /> | |
| 186 <param name="max_overhang" value="20000" /> | |
| 187 <param name="min_coverage" value="5" /> | |
| 188 <param name="bestn" value="5" /> | |
| 189 <param name="overhang_local_deviation1" value="5" /> | |
| 190 </section> | |
| 191 </section> | |
| 192 <output name="out_reads" ftype="fasta.gz"> | |
| 193 <assert_contents> | |
| 194 <has_size value="75000" delta="2000" /> | |
| 195 </assert_contents> | |
| 196 </output> | |
| 197 <output name="out_polished_assembly" ftype="fasta"> | |
| 198 <assert_contents> | |
| 199 <has_line line=">bctg00000000 000000F" /> | |
| 200 <has_size value="13000" delta="1000" /> | |
| 201 </assert_contents> | |
| 202 </output> | |
| 203 </test> | |
| 204 <!-- advanced params 3 --> | |
| 205 <test expect_num_outputs="2"> | |
| 206 <param name="input_fastqs" value="test1.fa" /> | |
| 207 <param name="genome_size" value="13000" /> | |
| 208 <param name="min_read_length" value="1000" /> | |
| 209 <param name="correction_coverage" value="40" /> | |
| 210 <conditional name="assembly"> | |
| 211 <param name="should_assemble" value="yes" /> | |
| 212 <param name="assembly_coverage" value="30"/> | |
| 213 <param name="polish_contigs" value="true"/> | |
| 214 </conditional> | |
| 215 <section name="adv"> | |
| 216 <section name="fa"> | |
| 217 <param name="min_length" value="1000" /> | |
| 218 <param name="min_identity" value="40" /> | |
| 219 <param name="min_contig_length" value="600" /> | |
| 220 <param name="select_branch" value="true" /> | |
| 221 </section> | |
| 222 </section> | |
| 223 <output name="out_reads" ftype="fasta.gz"> | |
| 224 <assert_contents> | |
| 225 <has_size value="75000" delta="2000" /> | |
| 226 </assert_contents> | |
| 227 </output> | |
| 228 <output name="out_polished_assembly" ftype="fasta"> | |
| 229 <assert_contents> | |
| 230 <has_line line=">bctg00000000 000000F" /> | |
| 231 <has_size value="13000" delta="1000" /> | |
| 232 </assert_contents> | |
| 233 </output> | |
| 234 </test> | |
| 235 <!-- advanced params 4 --> | |
| 236 <test expect_num_outputs="2"> | |
| 237 <param name="input_fastqs" value="test1.fa" /> | |
| 238 <param name="genome_size" value="13000" /> | |
| 239 <param name="min_read_length" value="1000" /> | |
| 240 <param name="correction_coverage" value="40" /> | |
| 241 <conditional name="assembly"> | |
| 242 <param name="should_assemble" value="yes" /> | |
| 243 <param name="assembly_coverage" value="30"/> | |
| 244 <param name="polish_contigs" value="true"/> | |
| 245 </conditional> | |
| 246 <section name="adv"> | |
| 247 <section name="fcb"> | |
| 248 <param name="read_min_length" value="4000" /> | |
| 249 <param name="ctg_min_length" value="1000" /> | |
| 250 <param name="ctg2ctg_min_identity" value="90" /> | |
| 251 <param name="read2ctg_min_identity" value="60" /> | |
| 252 <param name="min_contig_length" value="1000" /> | |
| 253 </section> | |
| 254 </section> | |
| 255 <output name="out_reads" ftype="fasta.gz"> | |
| 256 <assert_contents> | |
| 257 <has_size value="75000" delta="2000" /> | |
| 258 </assert_contents> | |
| 259 </output> | |
| 260 <output name="out_polished_assembly" ftype="fasta"> | |
| 261 <assert_contents> | |
| 262 <has_line line=">bctg00000000 000000F" /> | |
| 263 <has_size value="13000" delta="1000" /> | |
| 264 </assert_contents> | |
| 265 </output> | |
| 266 </test> | |
| 267 </tests> | |
| 268 | |
| 269 <help><![CDATA[ | |
| 270 | |
| 271 NECAT | |
| 272 ..... | |
| 273 | |
| 274 **What it does** | |
| 275 | |
| 276 | NECAT performs error correction to remove complex errors in nanopore reads. It can also optionally de novo assembly. | |
| 277 | After assembly it is recommended to use MEDAKA for long-read polishing, then NextPolish for short-read polishing. | |
| 278 | | |
| 279 | Github: https://github.com/xiaochuanle/NECAT | |
| 280 | | |
| 281 | |
| 282 **Input** | |
| 283 | |
| 284 - One or more files or collections containing sequence reads (fastq / fasta) | |
| 285 | |
| 286 **Output** | |
| 287 | |
| 288 - Corrected reads (fasta) | |
| 289 - Genome assembly (fasta) (Optional) | |
| 290 | |
| 291 | | |
| 292 | |
| 293 **Advanced Settings** | |
| 294 | |
| 295 | Necat runs multiple subprograms in an assembly pipeline to create its final output. | |
| 296 | Each subprogram does a specific task, then hands its output to the next. | |
| 297 | The subprograms are listed in order below, alongside the settings which can be configured: | |
| 298 | | |
| 299 | |
| 300 *oc2pmov* | |
| 301 | |
| 302 | Finds overlaps between raw-reads | |
| 303 | *Overlap Sensitive Options & Overlap Fast Options* | |
| 304 | | |
| 305 | |
| 306 -k <Integer> kmer size | |
| 307 -z <Integer> scan window size | |
| 308 -q <Integer> kmer occurs > q times will be ignored | |
| 309 -b <Integer> block size | |
| 310 -n <Integer> number of candidates | |
| 311 -a <Integer> min align length | |
| 312 -d <Real> ddf score cutoff | |
| 313 -e <Real> sequencing error | |
| 314 -m <Integer> number of output | |
| 315 | |
| 316 | | |
| 317 | |
| 318 | DEFAULT OPTIONS: | |
| 319 | -k 15 -z 10 -q 500 -b 2000 -s 3 -n 500 -a 500 -d 0.250000 -e 0.500000 -m 500 -t 1 | |
| 320 | |
| 321 | | |
| 322 | | |
| 323 | |
| 324 *oc2cns* | |
| 325 | |
| 326 | Creates consensus reads from raw-read overlaps | |
| 327 | *Consensus Sensitive Options & Consensus Fast Options* | |
| 328 | | |
| 329 | |
| 330 -a <Integer> align length cutoff | |
| 331 -x <Integer> minimal coverage | |
| 332 -y <Integer> maximal coverage | |
| 333 -l <Integer> minimal length of corrected reads. | |
| 334 -f <0 or 1> full consensus or not: 1 = yes, 0 = no | |
| 335 -e <Real> sequencing error | |
| 336 -p <Real> minimal mapping ratio | |
| 337 -r <0 or 1> rescue long indels or not: 1 = yes, 0 = no | |
| 338 -u <0 or 1> use dynamic or fixed ident cutoff: 1 = fixed, 0 = dynamic | |
| 339 | |
| 340 | | |
| 341 | |
| 342 | DEFAULT OPTIONS: | |
| 343 | -a 400 -x 4 -y 12 -l 500 -f 0 -e 0.500000 -p 0.800000 -t 1 -r 0 -u 0 -s 0 | |
| 344 | |
| 345 | | |
| 346 | | |
| 347 | |
| 348 *oc2asmpm* | |
| 349 | |
| 350 | Identifies corrected-read overlaps for assembly | |
| 351 | *Trimming Overlap Options & Assembly Overlap Options* | |
| 352 | | |
| 353 | |
| 354 | |
| 355 -k <Integer> kmer size | |
| 356 -z <Integer> scan window size | |
| 357 -q <Integer> kmer occurs > q times will be ignored | |
| 358 -b <Integer> block size | |
| 359 -n <Integer> number of candidates | |
| 360 -a <Integer> min align length | |
| 361 -d <Real> ddf score cutoff | |
| 362 -e <Real> sequencing error | |
| 363 -m <Integer> number of output | |
| 364 | |
| 365 | | |
| 366 | | |
| 367 | |
| 368 *fsa_ol_filter* | |
| 369 | |
| 370 | Filters out low-quality corrected-read overlaps for assembly | |
| 371 | *Assembly Overlap Filtering Options* | |
| 372 | | |
| 373 | |
| 374 --min_length=INT minimum length of reads. default: 2500 | |
| 375 --max_length=INT maximum length of reads. default: 2147483647 | |
| 376 --min_identity=DOUBLE minimum identity of overlaps default: -1 | |
| 377 --min_aligned_length=INT minimum aligned length of overlaps default: 2500 | |
| 378 --max_overhang=INT maximum overhang of overlaps, negative number = determined by the program. default: -1 | |
| 379 --min_coverage=INT minimum base coverage, negative number = determined by the program. default: -1 | |
| 380 --max_coverage=INT maximum base coverage, negative number = determined by the program default: -1 | |
| 381 --max_diff_coverage=INT maximum difference of base coverage, negative number = determined by the program default: -1 | |
| 382 --coverage_discard=DOUBLE discard ratio of base coverage. If max_coverage or max_diff_coverage is negative, it will be reset to (100-coverage_discard)th percentile. default: 0.01 | |
| 383 --bestn=INT output best n overlaps on 5' or 3' end for each read. default: 10 | |
| 384 --genome_size=INT genome size. It determines the maximum length of reads with coverage together default: 0 | |
| 385 --coverage=INT coverage. It determines the maximum length of reads with genome_size together default: 40 | |
| 386 --identity_global_deviation1=DOUBLE If min_identity < 0, min_identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 98 | |
| 387 --identity_global_deviation2=DOUBLE If min_identity < 0, min_identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 6 | |
| 388 --overhang_global_deviation1=DOUBLE If max_overhang < 0, max_overhang is set to max(m, deviation1) + 1.4826*mad*deviation2 default: 30 | |
| 389 --overhang_global_deviation2=DOUBLE If max_overhang < 0, max_overhang is set to max(m, deviation1) + 1.4826*mad*deviation2 default: 6 | |
| 390 --identity_local_deviation1=DOUBLE The local threshold of identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 99 | |
| 391 --identity_local_deviation2=DOUBLE The local threshold of identity is set to min(m, deviation1) - 1.4826*mad*deviation2 default: 6 | |
| 392 --overhang_local_deviation1=DOUBLE The local threshold of overhang is set to max(m, deviation1) + 1.253*mad*deviation2 default: 10 | |
| 393 --overhang_local_deviation2=DOUBLE The local threshold of overhang is set to max(m, deviation1) + 1.253*mad*deviation2 default: 6 | |
| 394 --identity_local_condition=INT Local filtering conditions. 0 = overlap idenitity < threshold, 1 = overlap idenitity < threshold and query identity >= target identity default: 0 | |
| 395 --local_low_coverage=INT If the coverage of reads is less than local_low_coverage, min_identity and max_overhang are used to filter out low-quality overlaps. Otherwise, the local threshold is used. default: 25 | |
| 396 | |
| 397 | | |
| 398 | | |
| 399 | |
| 400 *fsa_assemble* | |
| 401 | |
| 402 | Constructs contigs from filtered overlaps | |
| 403 | *Contig Assembly Options* | |
| 404 | | |
| 405 | |
| 406 --min_length=INT minimum length of reads default: 0 | |
| 407 --min_identity=DOUBLE minimum identity of overlaps default: 0 | |
| 408 --min_aligned_length=INT minimum aligned length of overlaps default: 0 | |
| 409 --min_contig_length=INT minimum length of contigs default: 500 | |
| 410 --select_branch=BOOL select the most probable branch default: "no" | |
| 411 --max_spur_length=INT branches less the threshod are treated as spurs default: 50000 | |
| 412 | |
| 413 | | |
| 414 | | |
| 415 | |
| 416 *fsa_ctg_bridge* | |
| 417 | |
| 418 | Bridges contigs using input long raw-reads | |
| 419 | *Contig Bridging Options* | |
| 420 | | |
| 421 | |
| 422 --read_min_length=INT minimum rawread length default: 5000 | |
| 423 --ctg_min_length=INT minimum contig length default: 500 | |
| 424 --ctg2ctg_min_identity=DOUBLE minimum identity of overlaps between contigs default: 95 | |
| 425 --ctg2ctg_max_overhang=INT maximum overhang of overlaps between contigs default: 100 | |
| 426 --ctg2ctg_min_aligned_length=INT minimum aligned length of overlaps between contigs default: 2000 | |
| 427 --read2ctg_min_identity=DOUBLE minimum identity of overlaps between rawreads and contigs default: 80 | |
| 428 --read2ctg_max_overhang=INT maximum overhang of overlaps between rawreads and contigs default: 500 | |
| 429 --read2ctg_min_aligned_length=INT minimum aligned length of overlaps between rawreads and contigs default: 5000 | |
| 430 --read2ctg_min_coverage=INT minimum coverage of links between rawreads and contigs default: 3 | |
| 431 --min_contig_length=INT minimum length of bridged contig default: 500 | |
| 432 --select_branch=BOOL select the most probable branch default: "no" | |
| 433 --window_size=INT threshold is used to group rawreads that bridge contigs default: 1000 | |
| 434 | |
| 435 | | |
| 436 | |
| 437 | |
| 438 ]]></help> | |
| 439 <expand macro="citations" /> | |
| 440 </tool> |
