Mercurial > repos > iuc > ragtag
comparison ragtag.xml @ 0:7ec824b37dec draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ragtag commit 4c4b2a548b4ce46da88810992459b3ac8581d035"
| author | iuc |
|---|---|
| date | Wed, 10 Nov 2021 23:32:27 +0000 |
| parents | |
| children | c877619c2de2 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:7ec824b37dec |
|---|---|
| 1 <tool id='ragtag' name='RagTag' version='@TOOL_VERSION@+galaxy@VERSION_SUFFIX@' profile='20.01'> | |
| 2 <description>reference-guided scaffolding of draft genomes</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro='xrefs' /> | |
| 7 <expand macro='requirements' /> | |
| 8 <command detect_errors='exit_code'><![CDATA[ | |
| 9 #if $mode_conditional.mode_option != 'merge' | |
| 10 #if $mode_conditional.advanced_options.mapping_conditional.mapping_option == 'nucmer' | |
| 11 #set $nucmer_params = '%s -l %s -c %s' % ($mode_conditional.advanced_options.mapping_conditional.anchor_mode, | |
| 12 $mode_conditional.advanced_options.mapping_conditional.l, | |
| 13 $mode_conditional.advanced_options.mapping_conditional.c) | |
| 14 #end if | |
| 15 #end if | |
| 16 #if $mode_conditional.mode_option == 'merge' | |
| 17 #set $input_files = list() | |
| 18 mkdir merge_files && | |
| 19 #for $i, $j in enumerate($mode_conditional.scaffold_files) | |
| 20 #set $out_file = './merge_files/scaffold_%s.agp' % $i | |
| 21 ln -s '${j}' $out_file && | |
| 22 $input_files.append($out_file) | |
| 23 #end for | |
| 24 #set $merge_files = " ".join($input_files) | |
| 25 #end if | |
| 26 ragtag.py $mode_conditional.mode_option -u | |
| 27 #if $mode_conditional.mode_option == 'correct' | |
| 28 @INPUTS@ | |
| 29 @COMMON_PARAMETERS@ | |
| 30 #if $mode_conditional.validation_conditional.validation_option == 'true' | |
| 31 -R '${mode_conditional.validation_conditional.R}' | |
| 32 -T $mode_conditional.validation_conditional.read_type | |
| 33 -v $mode_conditional.validation_conditional.v | |
| 34 #if $mode_conditional.validation_conditional.max_cov | |
| 35 --max-cov $mode_conditional.validation_conditional.max_cov | |
| 36 #end if | |
| 37 #if $mode_conditional.validation_conditional.min_cov | |
| 38 --min-cov $mode_conditional.validation_conditional.min_cov | |
| 39 #end if | |
| 40 #end if | |
| 41 -b $mode_conditional.advanced_options.b | |
| 42 #if $mode_conditional.advanced_options.missasembly_break | |
| 43 $mode_conditional.advanced_options.missasembly_break | |
| 44 #end if | |
| 45 #if $mode_conditional.advanced_options.gff | |
| 46 --gff '${mode_conditional.advanced_options.gff}' | |
| 47 #end if | |
| 48 --read-aligner 'minimap2' ## it is the only allowed | |
| 49 #else if $mode_conditional.mode_option == 'scaffold' | |
| 50 @INPUTS@ | |
| 51 @COMMON_PARAMETERS@ | |
| 52 -i $mode_conditional.advanced_options.i | |
| 53 -a $mode_conditional.advanced_options.a | |
| 54 -s $mode_conditional.advanced_options.s | |
| 55 #if $mode_conditional.advanced_options.gap_conditional.gap_option == 'true' | |
| 56 -r | |
| 57 -g '${mode_conditional.advanced_options.gap_conditional.g}' | |
| 58 -m '${mode_conditional.advanced_options.gap_conditional.m}' | |
| 59 #end if | |
| 60 #if $mode_conditional.advanced_options.unplaced_conditional.unplaced_option == 'true' | |
| 61 -C | |
| 62 #if $mode_conditional.advanced_options.unplaced_conditional.J | |
| 63 -J '${mode_conditional.advanced_options.unplaced_conditional.J}' | |
| 64 #end if | |
| 65 #end if | |
| 66 #else if $mode_conditional.mode_option == 'patch' | |
| 67 @INPUTS@ | |
| 68 @COMMON_PARAMETERS@ | |
| 69 -s $mode_conditional.advanced_options.s | |
| 70 -i $mode_conditional.advanced_options.i | |
| 71 #if $mode_conditional.advanced_options.patching_mode | |
| 72 $mode_conditional.advanced_options.patching_mode | |
| 73 #end if | |
| 74 #else | |
| 75 $assembly_fasta | |
| 76 #if $mode_conditional.scaffold_files | |
| 77 $merge_files | |
| 78 #end if | |
| 79 #if $mode_conditional.merging_options.j | |
| 80 -j $mode_conditional.merging_options.j | |
| 81 #end if | |
| 82 -l $mode_conditional.merging_options.l | |
| 83 -e $mode_conditional.merging_options.e | |
| 84 --gap-func $mode_conditional.merging_options.function_merging | |
| 85 #if $mode_conditional.hic_options.b | |
| 86 -b $mode_conditional.hic_options.b | |
| 87 -r $mode_conditional.hic_options.r | |
| 88 -p $mode_conditional.hic_options.p | |
| 89 #end if | |
| 90 #end if | |
| 91 -o ./ | |
| 92 #if $mode_conditional.mode_option != 'merge' | |
| 93 -t \${GALAXY_SLOTS:-2} | |
| 94 #end if | |
| 95 #if $mode_conditional.mode_option == 'patch' | |
| 96 && mv ragtag.patch.asm.paf.log ragtag.patch.log | |
| 97 #end if | |
| 98 ]]> </command> | |
| 99 <inputs> | |
| 100 <conditional name="mode_conditional"> | |
| 101 <param name="mode_option" type="select" label="Operation mode"> | |
| 102 <option value="correct">Correct: homology-based missasembly correction</option> | |
| 103 <option value="scaffold">Scaffold: homology-based assebly scaffolding</option> | |
| 104 <option value="patch">Patch: homology-based assembly patching</option> | |
| 105 <option value="merge">Merge: scaffolding merging</option> | |
| 106 </param> | |
| 107 <when value="correct"> | |
| 108 <expand macro="input_options"/> | |
| 109 <conditional name="validation_conditional"> | |
| 110 <param name="validation_option" type="select" label="Use validation reads"> | |
| 111 <option value="true">Enabled</option> | |
| 112 <option value="false" selected="true">Disabled</option> | |
| 113 </param> | |
| 114 <when value="true"> | |
| 115 <param argument="-R" type="data" format="fastq,fastqsanger" label="Validation reads" | |
| 116 help="Without validation, the module will break at any point of reference discordance as defined by the 'correction options'. | |
| 117 With validation, RagTag maps reads to the query assembly and verifies putative break points if they are near regions of | |
| 118 exceptionally low or high coverage. The reads used for validation should come from the same genotype as the query | |
| 119 assembly to ensure that coverage abnormalities don't arise from true biological variation" /> | |
| 120 <param name="read_type" type="select" label="Read type"> | |
| 121 <option value="sr">Illumina</option> | |
| 122 <option value="ont">Nanopore</option> | |
| 123 <option value="corr">Error corrected long-reads</option> | |
| 124 </param> | |
| 125 <param argument="-v" type="integer" min="0" value="10000" label="Coverage validation window size" | |
| 126 help="This parameter specifies the window around the putative misassembly break point that RagTag examines | |
| 127 for exceptionally low or high read coverage. The larger this window size, the more likely | |
| 128 it is to find an unrelated coverage abnormality"/> | |
| 129 <param argument="--max-cov" type="integer" min="0" value="" optional="true" label="Break sequences at regions at or above this coverage level"/> | |
| 130 <param argument="--min-cov" type="integer" min="0" value="" optional="true" label="Break sequences at regions at or below this coverage level"/> | |
| 131 </when> | |
| 132 <when value="false"/> | |
| 133 </conditional> | |
| 134 <section name="advanced_options" title="Advanced options"> | |
| 135 <expand macro="common_parameters"/> | |
| 136 <param argument="-b" type="integer" min="0" value="5000" label="Minimum break distance from contig ends" | |
| 137 help="Breaks will not be made within -b bp of query sequence termini"/> | |
| 138 <param name="missasembly_break" type="select" optional="true" label="Break misassebly option" | |
| 139 help="One can also direct RagTag to only break misassemblies between (--inter, query maps to >1 reference sequence) or within | |
| 140 (--intra, query maps discordantly to 1 reference sequence) reference sequences"> | |
| 141 <option value="--inter">Only break misassemblies between reference sequences (--inter)</option> | |
| 142 <option value="--intra">Only break missasemblies within reference sequences (--intra)</option> | |
| 143 </param> | |
| 144 <param argument="--gff" type="data" format="gff" optional="true" label="Don't break sequences within GFF intervals" | |
| 145 help=" If one has annotations associated with the query assembly, provide them with the --gff option to ensure that the query assembly | |
| 146 is never broken within annotation intervals. "/> | |
| 147 </section> | |
| 148 <param name="output_correct" type="select" multiple="true" label="Output files"> | |
| 149 <option value="fasta" selected="true">The corrected query assembly in FASTA format</option> | |
| 150 <option value="agp" selected="true">The AGP file defining the exact coordinates of query sequence breaks</option> | |
| 151 <option value="paf">The description of the approximate mapping positions between two set of sequences in PAF format</option> | |
| 152 <option value="log">Log file</option> | |
| 153 </param> | |
| 154 </when> | |
| 155 <when value="scaffold"> | |
| 156 <expand macro="input_options"/> | |
| 157 <section name="advanced_options" title="advanced options"> | |
| 158 <expand macro="common_parameters"/> | |
| 159 <param argument="-i" type="float" min="0" max="1" value="0.2" label="Minimum grouping confidence score" | |
| 160 help="The grouping confidence score is the number of base pairs a contig covered in its assigned reference chromosome | |
| 161 divided by the total number of covered base pairs in the entire reference genome"/> | |
| 162 <param argument="-a" type="float" min="0" max="1" value="0" label="Minimum location confidence score" | |
| 163 help="To create a metric associated with contig ordering confidence, Ragtag define a location confidence. First, the smallest | |
| 164 and largest alignment positions, with respect to the reference, between a contig and its assigned reference chromosome are found. | |
| 165 The location confidence is then calculated as the number of covered base pairs in this range divided by the total number of | |
| 166 base pairs in the range"/> | |
| 167 <param argument="-s" type="float" min="0" max="1" value="0" label="Minimum orientation confidence score" | |
| 168 help="To calculate the orientation confidence, each base pair in each alignment between a contig and its assigned reference chromosome | |
| 169 casts a vote for the orientation of its alignment. The orientation confidence is the number of votes for the assigned orientation of | |
| 170 the contig divided by the total number of votes"/> | |
| 171 <conditional name="gap_conditional"> | |
| 172 <param name="gap_option" type="select" label="Infer gap sizes" help="When disabled, all gaps are 100 bp (-r)"> | |
| 173 <option value="true" selected="true">Enabled</option> | |
| 174 <option value="false">Disabled</option> | |
| 175 </param> | |
| 176 <when value="true"> | |
| 177 <param argument="-g" type="integer" min="0" value="100" label="Minimum infered gap size" /> | |
| 178 <param argument="-m" type="integer" min="0" value="100000" label="Maximum inferred gap size"/> | |
| 179 </when> | |
| 180 <when value="false"/> | |
| 181 </conditional> | |
| 182 <conditional name="unplaced_conditional"> | |
| 183 <param name="unplaced_option" type="select" label="Concatenate unplaced contigs and make 'chr0' (-C)"> | |
| 184 <option value="true">Enabled</option> | |
| 185 <option value="false" selected="true">Disabled</option> | |
| 186 </param> | |
| 187 <when value="true"> | |
| 188 <param argument="-J" type="data" format="txt" optional="true" label="List of query headers to leave unplaceds and exclude form 'chr0'"/> | |
| 189 </when> | |
| 190 <when value="false"/> | |
| 191 </conditional> | |
| 192 </section> | |
| 193 <param name="output_scaffold" type="select" multiple="true" label="Output files"> | |
| 194 <option value="fasta" selected="true">The scaffolds in FASTA format, defined by the ordering and orientations of the sequences containted in the AGP file</option> | |
| 195 <option value="agp" selected="true">The ordering and orientations of query sequences in AGP format</option> | |
| 196 <option value="paf">The description of the approximate mapping positions between two set of sequences in PAF format</option> | |
| 197 <option value="confidence">Confidence score values</option> | |
| 198 <option value="stats">Summary statistics for the scaffolding process</option> | |
| 199 <option value="log">Log file</option> | |
| 200 </param> | |
| 201 </when> | |
| 202 <when value="patch"> | |
| 203 <expand macro="input_options"/> | |
| 204 <section name="advanced_options" title="advanced options"> | |
| 205 <expand macro="common_parameters"/> | |
| 206 <param argument="-s" type="integer" min="0" value="50000" label="Minimum merged alignment length" | |
| 207 help="After merging, alignments less than -s bp long will be removed"/> | |
| 208 <param argument="-i" type="float" min="0" max="1" value="0.05" label="Maximum merged alignment distance" | |
| 209 help="Maximum merged alignment distance from sequence terminus as fraction of the sequence length. Alignments must | |
| 210 be within -i bp of a target sequence terminus or gap to be considered for patchin "/> | |
| 211 <param name="patching_mode" type="select" optional="true" label="Patching mode"> | |
| 212 <option value="--fill-only">Only fill existing target gaps. Do not join target sequences</option> | |
| 213 <option value="--join_only">Only join and patch target sequences. DO not fill existing gaps</option> | |
| 214 </param> | |
| 215 </section> | |
| 216 <param name="output_patch" type="select" multiple="true" label="Output files"> | |
| 217 <option value="final_fasta" selected="true">The final FASTA file containing the patched assembly</option> | |
| 218 <option value="final_agp" selected="true">The final AGP file defining how final FASTA is built</option> | |
| 219 <option value="assembly_file" selected="true">Assembly alignment files</option> | |
| 220 <option value="split_assembly">The split target assembly and the renamed query assembly combined into one FASTA file</option> | |
| 221 <option value="split_description">An AGP file defining how the target assembly was split at gaps</option> | |
| 222 <option value="target_gaps">The target assembly split at gaps</option> | |
| 223 <option value="agp_renamed">An AGP file defining the new names for query sequences</option> | |
| 224 <option value="fasta_renamed">A FASTA file with the original query sequence, but with new names</option> | |
| 225 <option value="log">Log file</option> | |
| 226 </param> | |
| 227 </when> | |
| 228 <when value="merge"> | |
| 229 <param name="assembly_fasta" type="data" format="fasta" label="Assembly FASTA file"/> | |
| 230 <param name="scaffold_files" type="data" format="agp" multiple="true" optional="true" label="Scaffold AGP files"/> | |
| 231 <section name="merging_options" title="Merging options"> | |
| 232 <param argument="-j" type="data" format="txt" optional="true" label="List of query headers to leave unplaced"/> | |
| 233 <param argument="-l" type="integer" min="0" value="100000" label="Minimum assembly sequence length" | |
| 234 help="Assembly sequences shorter than -l will also be left unplaced."/> | |
| 235 <param argument="-e" type="float" min="0" value="0" label="Minimum edge weight" | |
| 236 help="The edges in the merging graph represent scaffolding adjacencies. If an AGP file supports a particular adjacency, | |
| 237 its weight is added to the edge weight. Any edges with a weight lower than the minimum edge weigth will be removed from the graph"/> | |
| 238 <param name="function_merging" type="select" label="Function for merging gap lengths" | |
| 239 help="Scaffold gaps can differ between input AGP files. For example, a Hi-C derived AGP file might place 100 bp gaps between sequences | |
| 240 while a reference-guided AGP file might infer gap sizes based on a reference genome. Use this parameter to specify how gap sizes | |
| 241 should be computed from the supporting AGP files (--gap-func)"> | |
| 242 <option value="min" selected="true">Min</option> | |
| 243 <option value="max">Max</option> | |
| 244 <option value="mean">Mean</option> | |
| 245 </param> | |
| 246 </section> | |
| 247 <section name="hic_options" title="HI-C options"> | |
| 248 <param argument="-b" type="data" format="bam" optional="true" label="Hi-C alignments" help="Sorted by read name"/> | |
| 249 <param argument="-r" type="text" value="" optional="true" label="Restriction enzymes/sites or 'DNase'" help="List of restrction enzimes/sites or 'DNase', separated by comma. E.g. GATC,GACC"> | |
| 250 <sanitizer invalid_char=""> | |
| 251 <valid initial="string.letters,string.digits"> | |
| 252 <add value="," /> | |
| 253 <add value="[" /> | |
| 254 <add value="]" /> | |
| 255 </valid> | |
| 256 </sanitizer> | |
| 257 <validator type="regex">[0-9a-zA-Z,\]\[]+</validator> | |
| 258 </param> | |
| 259 <param argument="-p" type="float" min="0" max="1" value="1" optional="true" label="Portion of the sequence termini to consider for links"/> | |
| 260 </section> | |
| 261 </when> | |
| 262 </conditional> | |
| 263 </inputs> | |
| 264 <outputs> | |
| 265 <!--Correct mode outputs--> | |
| 266 <data format="paf" name="correct_paf" from_work_dir="ragtag.correct.asm.paf" label="${tool.name} on ${on_string}: PAF"> | |
| 267 <filter>mode_conditional["mode_option"] == "correct" and "paf" in mode_conditional["output_correct"]</filter> | |
| 268 </data> | |
| 269 <data format="agp" name="correct_agp" from_work_dir="ragtag.correct.agp" label="${tool.name} on ${on_string}: AGP"> | |
| 270 <filter>mode_conditional["mode_option"] == "correct" and "agp" in mode_conditional["output_correct"]</filter> | |
| 271 </data> | |
| 272 <data format="fasta" name="correct_fasta" from_work_dir="ragtag.correct.fasta" label="${tool.name} on ${on_string}: FASTA"> | |
| 273 <filter>mode_conditional["mode_option"] == "correct" and "fasta" in mode_conditional["output_correct"]</filter> | |
| 274 </data> | |
| 275 <data format="txt" name="correct_log" from_work_dir="ragtag.correct.asm.paf.log" label="${tool.name} on ${on_string}: log"> | |
| 276 <filter>mode_conditional["mode_option"] == "correct" and "log" in mode_conditional["output_correct"]</filter> | |
| 277 </data> | |
| 278 <!--Scaffold mode outputs--> | |
| 279 <data format="paf" name="scaffold_paf" from_work_dir="ragtag.scaffold.asm.paf" label="${tool.name} on ${on_string}: PAF"> | |
| 280 <filter>mode_conditional["mode_option"] == "scaffold" and "paf" in mode_conditional["output_scaffold"]</filter> | |
| 281 </data> | |
| 282 <data format="agp" name="scaffold_agp" from_work_dir="ragtag.scaffold.agp" label="${tool.name} on ${on_string}: AGP"> | |
| 283 <filter>mode_conditional["mode_option"] == "scaffold" and "agp" in mode_conditional["output_scaffold"]</filter> | |
| 284 </data> | |
| 285 <data format="fasta" name="scaffold_fasta" from_work_dir="ragtag.scaffold.fasta" label="${tool.name} on ${on_string}: FASTA"> | |
| 286 <filter>mode_conditional["mode_option"] == "scaffold" and "fasta" in mode_conditional["output_scaffold"]</filter> | |
| 287 </data> | |
| 288 <data format="txt" name="scaffold_log" from_work_dir="ragtag.scaffold.asm.paf.log" label="${tool.name} on ${on_string}: log"> | |
| 289 <filter>mode_conditional["mode_option"] == "scaffold" and "log" in mode_conditional["output_scaffold"]</filter> | |
| 290 </data> | |
| 291 <data format="tabular" name="scaffold_stats" from_work_dir="ragtag.scaffold.stats" label="${tool.name} on ${on_string}: stats"> | |
| 292 <filter>mode_conditional["mode_option"] == "scaffold" and "stats" in mode_conditional["output_scaffold"]</filter> | |
| 293 </data> | |
| 294 <data format="tabular" name="scaffold_confidence" from_work_dir="ragtag.scaffold.confidence.txt" label="${tool.name} on ${on_string}: confidence"> | |
| 295 <filter>mode_conditional["mode_option"] == "scaffold" and "confidence" in mode_conditional["output_scaffold"]</filter> | |
| 296 </data> | |
| 297 <!--Patch mode outputs--> | |
| 298 <data format="agp" name="patch_agp" from_work_dir="ragtag.patch.agp" label="${tool.name} on ${on_string}: final AGP"> | |
| 299 <filter>mode_conditional["mode_option"] == "patch" and "final_agp" in mode_conditional["output_patch"]</filter> | |
| 300 </data> | |
| 301 <data format="paf" name="patch_paf" from_work_dir="ragtag.patch.asm.paf" label="${tool.name} on ${on_string}: final PAF"> | |
| 302 <filter>mode_conditional["mode_option"] == "patch" and "assembly_file" in mode_conditional["output_patch"]</filter> | |
| 303 </data> | |
| 304 <data format="txt" name="patch_log" from_work_dir="ragtag.patch.log" label="${tool.name} on ${on_string}: log"> | |
| 305 <filter>mode_conditional["mode_option"] == "patch" and "log" in mode_conditional["output_patch"]</filter> | |
| 306 </data> | |
| 307 <data format="fasta" name="patch_comps_fasta" from_work_dir="ragtag.patch.comps.fasta" label="${tool.name} on ${on_string}: components FASTA"> | |
| 308 <filter>mode_conditional["mode_option"] == "patch" and "split_assembly" in mode_conditional["output_patch"]</filter> | |
| 309 </data> | |
| 310 <data format="agp" name="patch_ctg_agp" from_work_dir="ragtag.patch.ctg.agp" label="${tool.name} on ${on_string}: contigs AGP"> | |
| 311 <filter>mode_conditional["mode_option"] == "patch" and "split_description" in mode_conditional["output_patch"]</filter> | |
| 312 </data> | |
| 313 <data format="fasta" name="patch_ctg_fasta" from_work_dir="ragtag.patch.ctg.fasta" label="${tool.name} on ${on_string}: contigs FASTA"> | |
| 314 <filter>mode_conditional["mode_option"] == "patch" and "target_gaps" in mode_conditional["output_patch"]</filter> | |
| 315 </data> | |
| 316 <data format="fasta" name="patch_fasta" from_work_dir="ragtag.patch.fasta" label="${tool.name} on ${on_string}: final FASTA"> | |
| 317 <filter>mode_conditional["mode_option"] == "patch" and "final_fasta" in mode_conditional["output_patch"]</filter> | |
| 318 </data> | |
| 319 <data format="agp" name="patch_rename_agp" from_work_dir="ragtag.patch.rename.agp" label="${tool.name} on ${on_string}: renamed AGP"> | |
| 320 <filter>mode_conditional["mode_option"] == "patch" and "agp_renamed" in mode_conditional["output_patch"]</filter> | |
| 321 </data> | |
| 322 <data format="fasta" name="patch_rename_fasta" from_work_dir="ragtag.patch.rename.fasta" label="${tool.name} on ${on_string}: renamed FASTA"> | |
| 323 <filter>mode_conditional["mode_option"] == "patch" and "fasta_renamed" in mode_conditional["output_patch"]</filter> | |
| 324 </data> | |
| 325 <!-- Merge mode outputs--> | |
| 326 <data format="agp" name="merge_agp" from_work_dir="ragtag.merge.agp" label="${tool.name} on ${on_string}: merged AGP"> | |
| 327 <filter>mode_conditional["mode_option"] == "merge"</filter> | |
| 328 </data> | |
| 329 <data format="fasta" name="merge_fasta" from_work_dir="ragtag.merge.fasta" label="${tool.name} on ${on_string}: merged FASTA"> | |
| 330 <filter>mode_conditional["mode_option"] == "merge"</filter> | |
| 331 </data> | |
| 332 </outputs> | |
| 333 <tests> | |
| 334 <test expect_num_outputs="4"> | |
| 335 <!--Test 01 correct mode minimap2--> | |
| 336 <conditional name="mode_conditional"> | |
| 337 <param name="mode_option" value="correct"/> | |
| 338 <param name="reference" value="genome.fna"/> | |
| 339 <param name="query" value="contigs.fna"/> | |
| 340 <param name="output_correct" value="fasta,agp,paf,log"/> | |
| 341 <section name="advanced_options"> | |
| 342 <param name="e" value="reference_headers_skip.txt"/> | |
| 343 <param name="j" value="query_headers_skip.txt"/> | |
| 344 <param name="f" value="1000"/> | |
| 345 <conditional name="mapping_conditional"> | |
| 346 <param name="mapping_option" value="minimap2"/> | |
| 347 <param name="mm2_params" value="asm5"/> | |
| 348 </conditional> | |
| 349 <param name="remove_small" value="false"/> | |
| 350 <param name="q" value="10"/> | |
| 351 <param name="d" value="100000"/> | |
| 352 <param name="b" value="5000"/> | |
| 353 <param name="missasembly_break" value="--inter"/> | |
| 354 <param name="gff" value="annotation.gff"/> | |
| 355 </section> | |
| 356 </conditional> | |
| 357 <output name="correct_paf" file="correct_paf_01.paf" ftype="paf"/> | |
| 358 <output name="correct_agp" file="correct_agp_01.agp" ftype="agp"/> | |
| 359 <output name="correct_fasta" file="correct_fasta_01.fasta" ftype="fasta"/> | |
| 360 <output name="correct_log" file="correct_log_01.txt" ftype="txt" lines_diff="20"/> | |
| 361 </test> | |
| 362 <!--Test 02 correct mode nucmer--> | |
| 363 <test expect_num_outputs="2"> | |
| 364 <conditional name="mode_conditional"> | |
| 365 <param name="mode_option" value="correct"/> | |
| 366 <param name="reference" value="genome.fna"/> | |
| 367 <param name="query" value="contigs.fna"/> | |
| 368 <param name="output_correct" value="fasta,agp"/> | |
| 369 <section name="advanced_options"> | |
| 370 <param name="f" value="1000"/> | |
| 371 <conditional name="mapping_conditional"> | |
| 372 <param name="mapping_option" value="nucmer"/> | |
| 373 </conditional> | |
| 374 <param name="remove_small" value="true"/> | |
| 375 <param name="q" value="10"/> | |
| 376 <param name="d" value="100000"/> | |
| 377 <param name="b" value="5000"/> | |
| 378 <param name="missasembly_break" value="--inter"/> | |
| 379 </section> | |
| 380 </conditional> | |
| 381 <output name="correct_fasta" file="correct_fasta_02.fasta" ftype="fasta"/> | |
| 382 <output name="correct_agp" file="correct_agp_02.agp" ftype="agp"/> | |
| 383 | |
| 384 </test> | |
| 385 <!--Test 03 scaffold mode--> | |
| 386 <test expect_num_outputs="6"> | |
| 387 <conditional name="mode_conditional"> | |
| 388 <param name="mode_option" value="scaffold"/> | |
| 389 <param name="reference" value="genome.fna"/> | |
| 390 <param name="query" value="contigs.fna"/> | |
| 391 <param name="output_scaffold" value="fasta,agp,paf,confidence,log,stats"/> | |
| 392 <section name="advanced_options"> | |
| 393 <param name="f" value="1000"/> | |
| 394 <param name="remove_small" value="true"/> | |
| 395 <param name="q" value="10"/> | |
| 396 <param name="d" value="100000"/> | |
| 397 <param name="i" value="0.2"/> | |
| 398 <param name="a" value="0"/> | |
| 399 <param name="s" value="0"/> | |
| 400 </section> | |
| 401 </conditional> | |
| 402 <output name="scaffold_paf" file="scaffold_paf_03.paf" ftype="paf"/> | |
| 403 <output name="scaffold_agp" file="scaffold_apg.03.agp" ftype="agp"/> | |
| 404 <output name="scaffold_fasta" file="scaffold_fasta_03.fasta" ftype="fasta"/> | |
| 405 <output name="scaffold_log" file="scaffold_log_03.txt" ftype="txt" lines_diff="20"/> | |
| 406 <output name="scaffold_stats" file="scaffold_stats_03.tabular" ftype="tabular"/> | |
| 407 <output name="scaffold_confidence" file="scaffold_confidence_03.tabular" ftype="tabular"/> | |
| 408 </test> | |
| 409 <!--Test 04 patch mode--> | |
| 410 <test expect_num_outputs="9"> | |
| 411 <conditional name="mode_conditional"> | |
| 412 <param name="mode_option" value="patch"/> | |
| 413 <param name="reference" value="genome.fna"/> | |
| 414 <param name="query" value="contigs.fna"/> | |
| 415 <param name="output_patch" value="final_fasta,final_agp,assembly_file,split_assembly,split_description,target_gaps,agp_renamed,fasta_renamed,log"/> | |
| 416 <section name="advanced_options"> | |
| 417 <param name="s" value="50000"/> | |
| 418 <param name="i" value="0.05"/> | |
| 419 </section> | |
| 420 </conditional> | |
| 421 <output name="patch_agp" file="patch_agp_04.agp" ftype="agp"/> | |
| 422 <output name="patch_paf" file="patch_paf_04.paf" ftype="paf"/> | |
| 423 <output name="patch_log" file="patch_log_04.txt" ftype="txt" lines_diff="20"/> | |
| 424 <output name="patch_comps_fasta" ftype="fasta"> | |
| 425 <assert_contents> | |
| 426 <has_size value="603691" delta="100" /> | |
| 427 </assert_contents> | |
| 428 </output> | |
| 429 <output name="patch_ctg_fasta" file="patch_ctg_fasta_04.fasta" ftype="fasta"/> | |
| 430 <output name="patch_ctg_agp" file="patch_ctg_fasta_04.agp" ftype="agp"/> | |
| 431 <output name="patch_fasta" file="patch_fasta_04.fasta" ftype="fasta"/> | |
| 432 <output name="patch_rename_agp" file="patch_rename_agp.agp" ftype="agp"/> | |
| 433 <output name="patch_rename_fasta" file="patch_rename_fasta.fasta" ftype="fasta"/> | |
| 434 </test> | |
| 435 <test expect_num_outputs="2"> | |
| 436 <!-- Test 05 merge mode--> | |
| 437 <conditional name="mode_conditional"> | |
| 438 <param name="mode_option" value="merge"/> | |
| 439 <param name="assembly_fasta" value="correct_fasta_01.fasta"/> | |
| 440 <param name="scaffold_files" value="correct_agp_01.agp,correct_agp_02.agp"/> | |
| 441 <section name="merging_options"> | |
| 442 <param name="l" value="100000"/> | |
| 443 <param name="e" value="0"/> | |
| 444 <param name="function_merging" value="min"/> | |
| 445 </section> | |
| 446 </conditional> | |
| 447 <output name="merge_agp" file="merge_agp_05.agp" ftype="agp"/> | |
| 448 <output name="merge_fasta" file="merge_fasta_05.fasta" ftype="fasta"/> | |
| 449 </test> | |
| 450 </tests> | |
| 451 <help><![CDATA[ | |
| 452 .. class:: infomark | |
| 453 | |
| 454 **Purpose** | |
| 455 | |
| 456 RagTag is a collection of software tools for scaffolding and improving modern genome assemblies. Tasks include: | |
| 457 | |
| 458 - Homology-based misassembly correction | |
| 459 - Homology-based assembly scaffolding and patching | |
| 460 - Scaffold merging | |
| 461 | |
| 462 ---- | |
| 463 | |
| 464 .. class:: infomark | |
| 465 | |
| 466 **Correct mode** | |
| 467 | |
| 468 RagTag offers a correction module that uses a reference genome to identify and correct potential misassemblies in a query assembly. | |
| 469 RagTag also provides the option to verify putative misassemblies by aligning reads (from the same genotype) to the query assembly | |
| 470 and observing read coverage near misassembly break points. In all cases, sequence is never added or subtracted. Query sequences | |
| 471 are only broken at points of putative misassembly. | |
| 472 | |
| 473 *Misassemblies vs true variation* | |
| 474 | |
| 475 Reference-guided misassembly signatures are sometimes caused by true biological structural variation if the reference and query assemblies | |
| 476 represent distinct genotypes (or haplotypes). The read validation feature should help to avoid some of these misassembly false positives, | |
| 477 and the validation sensitivity can be tuned with command line parameters. However, it is ultimately up to the discretion of the user to decide | |
| 478 if misassembly correction is appropriate. One should validate all RagTag results with independent data (usually physical, optical, or genetic | |
| 479 maps), when possible. | |
| 480 | |
| 481 ---- | |
| 482 | |
| 483 .. class:: infomark | |
| 484 | |
| 485 **Scaffold mode** | |
| 486 | |
| 487 Scaffolding is the process of ordering and orienting draft assembly (query) sequences into longer sequences. Gaps (stretches of "N" characters) | |
| 488 are placed between adjacent query sequences to indicate the presence of unknown sequence. RagTag uses whole-genome alignments to a reference | |
| 489 assembly to scaffold query sequences. RagTag does not alter input query sequence in any way and only orders and orients sequences, joining them with gaps. | |
| 490 | |
| 491 ---- | |
| 492 | |
| 493 .. class:: infomark | |
| 494 | |
| 495 **Patch mode** | |
| 496 | |
| 497 This mode uses one genome assembly to *patch* another genome assembly. We define two types of patches: | |
| 498 | |
| 499 - Fills are patches that fill assembly gaps. This process is like traditional gap-filling, though it uses an assembly instead of WGS sequencing reads. | |
| 500 - Joins are patches that join distinct contigs. This is essentially scaffolding and gap-filling in a single step. | |
| 501 | |
| 502 ---- | |
| 503 | |
| 504 .. class:: infomark | |
| 505 | |
| 506 **Merge mode** | |
| 507 | |
| 508 Draft genome assemblies are often scaffolded multiple times using different approaches. For example, one might scaffold an assembly using different genome | |
| 509 maps (physical, linkage, Hi-C, etc.), different methods, or different method parameters. RagTag merge is a tool to merge and reconcile different scaffoldings | |
| 510 of the same assembly. In this way, one can leverage the advantages of multiple techniques to synergistically improve scaffolding. | |
| 511 | |
| 512 Most tools write scaffolding results in the AGP file format, which encodes adjacency and gap information in a plain text file. To run RagTag merge, | |
| 513 one must supply the assembly in FASTA format and at least two AGP files that define a scaffolding of the assembly. Each AGP file can optionally be | |
| 514 assigned a weight, allowing users to assign the relative influence of each AGP on the final result. | |
| 515 | |
| 516 If available, users can supply Hi-C alignments to the draft assembly to resolve conflicts in the merging graph. In this scenario, the input AGP | |
| 517 files are used to build the initial graph, but then Hi-C alignments are used to re-weight the graph before computing the scaffolding solution. | |
| 518 | |
| 519 | |
| 520 **List of accepted restriction enzymes** | |
| 521 | |
| 522 List of all accepted restriction enzymes and their restriction sites: | |
| 523 | |
| 524 - HindIII: AAGCTT | |
| 525 - Sau3AI: GATC | |
| 526 - MboI: GATC | |
| 527 - DpnII: GATC | |
| 528 - HinfI: GA[ATCG]TC | |
| 529 - DdeI: CT[ATCG]AG | |
| 530 - MseI: TTAA | |
| 531 | |
| 532 For RagTag, use a comma separated list of enzymes or sites (or a mix). For example: | |
| 533 | |
| 534 - Arima Hi-C v1.0: *Sau3AI,HinfI* or *GATC,GA[ATCG]TC* | |
| 535 - Arima Hi-C v2.0: *Sau3AI,HinfI,DdeI,MseI* or *GATC,GA[ATCG]TC,CT[ATCG]AG,TTAA* | |
| 536 | |
| 537 Note that for restriction sites, wildcards are represented with python regex syntax, not IUPAC ambiguity codes. e.g. '[ATCG]' instead of 'N'. | |
| 538 | |
| 539 Restriction enzymes are not necessarily the enzyme used for sample prep. Each is only a enzyme that cuts at the corresponding restriction site. | |
| 540 | |
| 541 ]]> </help> | |
| 542 <expand macro="citations" /> | |
| 543 </tool> |
