Mercurial > repos > iuc > hybpiper
comparison hybpiper.xml @ 0:91a16438e849 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hybpiper commit b439a8bebdd20955135572a15672a12a166d7ff8
| author | iuc |
|---|---|
| date | Sat, 23 Sep 2023 16:49:12 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:91a16438e849 |
|---|---|
| 1 <tool id="hybpiper" name="HybPiper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description>Analyse targeted sequence capture data</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="xrefs"/> | |
| 7 <expand macro="requirements"/> | |
| 8 <command detect_errors="exit_code"><![CDATA[ | |
| 9 | |
| 10 ## sample name checking | |
| 11 #import re | |
| 12 #def check_sample_name($sample_name): | |
| 13 #if re.search(r'[^A-Za-z0-9_\-]', $sample_name): | |
| 14 printf '%s\n' | |
| 15 'ERROR: special characters detected in sample identifier.' | |
| 16 'Identifiers may only contain letters, numbers, underscores and hyphens.' | |
| 17 'Check the identifier for the following sample:' | |
| 18 '${sample_name}' | |
| 19 1>&2 | |
| 20 && | |
| 21 exit 1 | |
| 22 && | |
| 23 #end if | |
| 24 #end def | |
| 25 | |
| 26 ## set up files | |
| 27 ln -s '${targetfile_dna}' ./target_file.fasta | |
| 28 && | |
| 29 | |
| 30 ############################### | |
| 31 ## hybpiper check_targetfile ## | |
| 32 ############################### | |
| 33 | |
| 34 #if str( $job_conditional.hybpiper_job ) == "check_and_fix_targetfile": | |
| 35 hybpiper check_targetfile | |
| 36 --targetfile_dna target_file.fasta | |
| 37 && | |
| 38 | |
| 39 mv fix_targetfile*.ctl hybpiper.ctl | |
| 40 && | |
| 41 | |
| 42 hybpiper fix_targetfile | |
| 43 --targetfile_dna target_file.fasta | |
| 44 --allow_gene_removal | |
| 45 hybpiper.ctl | |
| 46 && | |
| 47 | |
| 48 ####################### | |
| 49 ## hybpiper assemble ## | |
| 50 ####################### | |
| 51 | |
| 52 #elif str( $job_conditional.hybpiper_job ) == "assemble": | |
| 53 #set sample_prefix = str($job_conditional.paired_input.element_identifier) | |
| 54 | |
| 55 $check_sample_name($sample_prefix) | |
| 56 | |
| 57 hybpiper assemble | |
| 58 --readfiles | |
| 59 '${job_conditional.paired_input.forward}' | |
| 60 '${job_conditional.paired_input.reverse}' | |
| 61 --targetfile_dna target_file.fasta | |
| 62 --diamond | |
| 63 --cpu \${GALAXY_SLOTS:-1} | |
| 64 --prefix '${sample_prefix}' | |
| 65 && | |
| 66 | |
| 67 tar -cvf '${hybpiper_archive}' --directory='${sample_prefix}' . | |
| 68 && | |
| 69 | |
| 70 ####################################### | |
| 71 ## hybpiper stats/retrieve_sequences ## | |
| 72 ####################################### | |
| 73 | |
| 74 #elif str( $job_conditional.hybpiper_job ) == "stats": | |
| 75 | |
| 76 ## check logic of requested items | |
| 77 #unless $job_conditional.stats_type_select or $job_conditional.sequence_type_select: | |
| 78 printf '%s\n' | |
| 79 'ERROR: No outputs selected.' | |
| 80 1>&2 | |
| 81 && | |
| 82 exit 1 | |
| 83 && | |
| 84 #end unless | |
| 85 #if $job_conditional.heatmap and not $job_conditional.stats_type_select: | |
| 86 printf '%s\n' | |
| 87 'ERROR: heatmap requested, but no stats selected.' | |
| 88 1>&2 | |
| 89 && | |
| 90 exit 1 | |
| 91 && | |
| 92 #end if | |
| 93 | |
| 94 #for $sample in $job_conditional.hybpiper_results | |
| 95 #set sample_prefix = str($sample.element_identifier) | |
| 96 | |
| 97 $check_sample_name($sample_prefix) | |
| 98 | |
| 99 mkdir -p '${sample_prefix}' | |
| 100 && | |
| 101 | |
| 102 tar -xf '${sample}' -C '${sample_prefix}' | |
| 103 && | |
| 104 | |
| 105 echo '${sample_prefix}' >> namelist.txt | |
| 106 && | |
| 107 #end for | |
| 108 | |
| 109 ## Produce a stats file for each requested output type | |
| 110 #for $stats_output in $job_conditional.stats_type_select: | |
| 111 hybpiper stats | |
| 112 --targetfile_dna target_file.fasta | |
| 113 --stats_filename 'stats.${stats_output}' | |
| 114 --seq_lengths_filename 'seq_lengths.${stats_output}' | |
| 115 '${stats_output}' | |
| 116 namelist.txt | |
| 117 && | |
| 118 | |
| 119 ## Produce heatmaps if selected | |
| 120 #if $job_conditional.heatmap: | |
| 121 hybpiper recovery_heatmap | |
| 122 --heatmap_filename 'heatmap.${stats_output}' | |
| 123 --heatmap_filetype svg | |
| 124 'seq_lengths.${stats_output}.tsv' | |
| 125 && | |
| 126 #end if | |
| 127 #end for | |
| 128 | |
| 129 ## Produce sequences for each requested type | |
| 130 #for $sequence_output in $job_conditional.sequence_type_select: | |
| 131 mkdir 'fasta.${sequence_output}' | |
| 132 && | |
| 133 hybpiper retrieve_sequences | |
| 134 --targetfile_dna target_file.fasta | |
| 135 --sample_names namelist.txt | |
| 136 --fasta_dir 'fasta.${sequence_output}' | |
| 137 '${sequence_output}' | |
| 138 && | |
| 139 #end for | |
| 140 #end if | |
| 141 | |
| 142 wait | |
| 143 | |
| 144 ]]></command> | |
| 145 | |
| 146 <inputs> | |
| 147 <param argument="--targetfile_dna" type="data" format="fasta" label="Target file" help="Target file in FASTA format" /> | |
| 148 | |
| 149 <conditional name="job_conditional"> | |
| 150 <param name="hybpiper_job" type="select" label="Type of hybpiper run"> | |
| 151 <option value="check_and_fix_targetfile">Check and fix targetfile</option> | |
| 152 <option value="assemble" selected="true">Assemble target loci</option> | |
| 153 <option value="stats">Extract sequences and/or stats from Hybpiper runs</option> | |
| 154 </param> | |
| 155 | |
| 156 <when value="check_and_fix_targetfile"/> | |
| 157 | |
| 158 <when value="assemble"> | |
| 159 <param name="paired_input" format="fastqsanger" type="data_collection" collection_type="paired" label="Input reads" help="Your reads must be in a paired collection. See below for more information." /> | |
| 160 </when> | |
| 161 | |
| 162 <when value="stats"> | |
| 163 <param name="hybpiper_results" type="data_collection" collection_type="list" format="tar" multiple="true" label="Results from Hybpiper assemble runs" /> | |
| 164 <param name="stats_type_select" type="select" label="Choose statistics to report" display="checkboxes" multiple="true" optional="true"> | |
| 165 <option value="gene" selected="true">Gene</option> | |
| 166 <option value="supercontig">Supercontig</option> | |
| 167 </param> | |
| 168 <param name="heatmap" type="boolean" checked="false" label="Produce a heatmap for each of the selected statistics" /> | |
| 169 <param name="sequence_type_select" type="select" display="checkboxes" label="Choose sequences to extract" multiple="true" optional="true"> | |
| 170 <option value="dna" selected="true">DNA</option> | |
| 171 <option value="aa">Amino acid</option> | |
| 172 <option value="intron">Intron</option> | |
| 173 <option value="supercontig">Supercontig</option> | |
| 174 </param> | |
| 175 </when> | |
| 176 </conditional> | |
| 177 </inputs> | |
| 178 | |
| 179 <outputs> | |
| 180 <!-- check_targetfile output --> | |
| 181 <data name="fixed_targetfile" label="${targetfile_dna.element_identifier} (fixed)" format="fasta" from_work_dir="target_file_fixed.fasta"> | |
| 182 <filter>job_conditional['hybpiper_job'] == 'check_and_fix_targetfile'</filter> | |
| 183 </data> | |
| 184 <collection type="list" name="output_targetfile" label="Hybpiper logs for ${targetfile_dna.element_identifier}"> | |
| 185 <data name="targetfile_ctl_file" label="Hybpiper .ctl file for ${on_string}" format="txt" from_work_dir="hybpiper.ctl" /> | |
| 186 <data name="targetfile_report" label="Hybpiper targetfile report" format="tabular" from_work_dir="fix_targetfile_report.tsv" /> | |
| 187 <filter>job_conditional['hybpiper_job'] == 'check_and_fix_targetfile'</filter> | |
| 188 </collection> | |
| 189 | |
| 190 <!-- assemble output --> | |
| 191 <data name="hybpiper_archive" format="tar"> | |
| 192 <filter>job_conditional['hybpiper_job'] == 'assemble'</filter> | |
| 193 </data> | |
| 194 | |
| 195 <!-- stats / stats output --> | |
| 196 <collection name="hybpiper_stats" type="list" label="Hybpiper statistics"> | |
| 197 <data name="stats_gene" label="Hybpiper statistics (gene)" format="tabular" from_work_dir="stats.gene.tsv"> | |
| 198 <actions> | |
| 199 <action name="column_names" type="metadata" default="Name,NumReads,ReadsMapped,PctOnTarget,GenesMapped,GenesWithContigs,GenesWithSeqs,GenesAt25pct,GenesAt50pct,GenesAt75pct,GenesAt150pct,ParalogWarningsLong,ParalogWarningsDepth,GenesWithoutStitchedContigs,GenesWithStitchedContigs,GenesWithStitchedContigsSkipped,GenesWithChimeraWarning,TotalBasesRecovered" /> | |
| 200 </actions> | |
| 201 </data> | |
| 202 <data name="stats_supercontig" label="Hybpiper statistics (supercontig)" format="tabular" from_work_dir="stats.supercontig.tsv"> | |
| 203 <actions> | |
| 204 <action name="column_names" type="metadata" default="Name,NumReads,ReadsMapped,PctOnTarget,GenesMapped,GenesWithContigs,GenesWithSeqs,GenesAt25pct,GenesAt50pct,GenesAt75pct,GenesAt150pct,ParalogWarningsLong,ParalogWarningsDepth,GenesWithoutStitchedContigs,GenesWithStitchedContigs,GenesWithStitchedContigsSkipped,GenesWithChimeraWarning,TotalBasesRecovered" /> | |
| 205 </actions> | |
| 206 </data> | |
| 207 <data name="seqlengths_gene" label="Assembled sequence lengths (gene)" format="tabular" from_work_dir="seq_lengths.gene.tsv"/> | |
| 208 <data name="seqlengths_supercontig" label="Assembled sequence lengths (supercontig)" format="tabular" from_work_dir="seq_lengths.supercontig.tsv"> | |
| 209 </data> | |
| 210 <filter>job_conditional['hybpiper_job'] == 'stats' and ('gene' in job_conditional['stats_type_select'] or 'supercontig' in job_conditional['stats_type_select'])</filter> | |
| 211 </collection> | |
| 212 | |
| 213 <!-- stats/heatmap output --> | |
| 214 <collection name="hybpiper_heatmaps" type="list" label="Hybpiper heatmaps"> | |
| 215 <discover_datasets pattern="heatmap\.(?P<designation>.+)\.svg" format="svg" recurse="false" /> | |
| 216 <filter>job_conditional['hybpiper_job'] == 'stats' and job_conditional['heatmap'] and job_conditional['heatmap'] is true</filter> | |
| 217 </collection> | |
| 218 | |
| 219 <!-- stats/sequences output --> | |
| 220 <collection name="dna_sequences" type="list" label="DNA sequences"> | |
| 221 <discover_datasets pattern="(?P<designation>.+)\.FNA" format="fasta" directory="fasta.dna" recurse="false" /> | |
| 222 <filter>job_conditional['hybpiper_job'] == 'stats' and 'dna' in job_conditional['sequence_type_select']</filter> | |
| 223 </collection> | |
| 224 <collection name="aa_sequences" type="list" label="Amino acid sequences"> | |
| 225 <discover_datasets pattern="(?P<designation>.+)\.FAA" format="fasta" directory="fasta.aa" recurse="false" /> | |
| 226 <filter>job_conditional['hybpiper_job'] == 'stats' and 'aa' in job_conditional['sequence_type_select']</filter> | |
| 227 </collection> | |
| 228 <collection name="intron_sequences" type="list" label="Intron sequences"> | |
| 229 <discover_datasets pattern="(?P<designation>.+)\.fasta" format="fasta" directory="fasta.intron" recurse="false" /> | |
| 230 <filter>job_conditional['hybpiper_job'] == 'stats' and 'intron' in job_conditional['sequence_type_select']</filter> | |
| 231 </collection> | |
| 232 <collection name="supercontig_sequences" type="list" label="Supercontig sequences"> | |
| 233 <discover_datasets pattern="(?P<designation>.+)\.fasta" format="fasta" directory="fasta.supercontig" recurse="false" /> | |
| 234 <filter>job_conditional['hybpiper_job'] == 'stats' and 'supercontig' in job_conditional['sequence_type_select']</filter> | |
| 235 </collection> | |
| 236 | |
| 237 <!-- dummy output, in case the user deselects everything --> | |
| 238 <data name="dummy_output" label="Stats or sequences from Hybpiper runs" from_work_dir="namelist.txt" format="txt"> | |
| 239 <filter>job_conditional['hybpiper_job'] == 'stats' and not (job_conditional['stats_type_select'] or job_conditional['sequence_type_select']) </filter> | |
| 240 </data> | |
| 241 | |
| 242 </outputs> | |
| 243 <tests> | |
| 244 | |
| 245 <!-- test1: check and fix targetfile --> | |
| 246 <test expect_num_outputs="4"> | |
| 247 <param name="targetfile_dna" value="test_targets.fasta.gz"/> | |
| 248 <conditional name="job_conditional"> | |
| 249 <param name="hybpiper_job" value="check_and_fix_targetfile"/> | |
| 250 </conditional> | |
| 251 <output name="fixed_targetfile" file="test1_out.fasta"/> | |
| 252 <output_collection name="output_targetfile" type="list" count="2"> | |
| 253 <element name="targetfile_ctl_file" file="test1_out.ctl"/> | |
| 254 <element name="targetfile_report" file="test1_out.tsv"/> | |
| 255 </output_collection> | |
| 256 </test> | |
| 257 | |
| 258 <!-- test2: assemble with paired collection --> | |
| 259 <!-- Not possible to test stats unless element_identifier can be set. --> | |
| 260 <test expect_failure="true"> | |
| 261 <param name="targetfile_dna" value="test_targets.fasta.gz"/> | |
| 262 <conditional name="job_conditional"> | |
| 263 <param name="hybpiper_job" value="assemble"/> | |
| 264 <param name="paired_input"> | |
| 265 <collection type="paired"> | |
| 266 <element name="forward" ftype="fastqsanger.gz" value="NZ874_R1_test.fastq.gz" /> | |
| 267 <element name="reverse" ftype="fastqsanger.gz" value="NZ874_R2_test.fastq.gz" /> | |
| 268 </collection> | |
| 269 </param> | |
| 270 </conditional> | |
| 271 <!-- <output name="hybpiper_archive"> | |
| 272 <assert_contents> | |
| 273 <has_size value="2386944" delta="200000" /> | |
| 274 </assert_contents> | |
| 275 </output> --> | |
| 276 </test> | |
| 277 | |
| 278 <!-- test3: all stats output --> | |
| 279 <test expect_num_outputs="10"> | |
| 280 <param name="targetfile_dna" value="test_targets.fasta.gz"/> | |
| 281 <conditional name="job_conditional"> | |
| 282 <param name="hybpiper_job" value="stats"/> | |
| 283 <param name="hybpiper_results" > | |
| 284 <collection type="list"> | |
| 285 <element name="NZ874" value="NZ874.tar.gz" /> | |
| 286 </collection> | |
| 287 </param> | |
| 288 <param name="stats_type_select" value="gene,supercontig"/> | |
| 289 <param name="heatmap" value="true"/> | |
| 290 <param name="sequence_type_select" value="dna,aa,intron,supercontig"/> | |
| 291 </conditional> | |
| 292 <output_collection name="hybpiper_stats" type="list" count="4" /> | |
| 293 <output_collection name="hybpiper_heatmaps" type="list" count="2"> | |
| 294 </output_collection> | |
| 295 <output_collection name="dna_sequences" type="list" count="13"> | |
| 296 </output_collection> | |
| 297 <output_collection name="aa_sequences" type="list" count="13"> | |
| 298 </output_collection> | |
| 299 <output_collection name="intron_sequences" type="list" count="13"> | |
| 300 </output_collection> | |
| 301 <output_collection name="supercontig_sequences" type="list" count="13"> | |
| 302 </output_collection> | |
| 303 </test> | |
| 304 | |
| 305 <!-- test4: no output selected --> | |
| 306 <test expect_failure="true"> | |
| 307 <param name="targetfile_dna" value="test_targets.fasta.gz"/> | |
| 308 <conditional name="job_conditional"> | |
| 309 <param name="hybpiper_job" value="stats"/> | |
| 310 <param name="hybpiper_results" > | |
| 311 <collection type="list"> | |
| 312 <element name="NZ874" value="NZ874.tar.gz" /> | |
| 313 </collection> | |
| 314 </param> | |
| 315 <param name="stats_type_select" value=""/> | |
| 316 <param name="heatmap" value="true"/> | |
| 317 <param name="sequence_type_select" value=""/> | |
| 318 </conditional> | |
| 319 </test> | |
| 320 | |
| 321 </tests> | |
| 322 <help><![CDATA[ | |
| 323 | |
| 324 Using HybPiper on Galaxy | |
| 325 ------------------------ | |
| 326 | |
| 327 Input | |
| 328 ~~~~~ | |
| 329 | |
| 330 On Galaxy, **you have to use paired collections as input** for | |
| 331 HybPiper assemblies. HybPiper relies on the directory hierarchy it creates for each | |
| 332 sample during assembly. The hierarchy is based on the name of the | |
| 333 sample, which you provide to Galaxy as the identifier in the collection. | |
| 334 | |
| 335 Using paired collections | |
| 336 ~~~~~~~~~~~~~~~~~~~~~~~~ | |
| 337 | |
| 338 If you have your sequencing reads in individual datasets, you can easily organise them into a paired | |
| 339 collection. See the Galaxy training material on `using dataset | |
| 340 collections <https://gxy.io/GTN:T00146>`__ | |
| 341 for a step-by-step guide. | |
| 342 | |
| 343 **Note**: because HybPiper uses sample | |
| 344 identifiers to create directories, you **can't use special characters** | |
| 345 in your sample identifiers. The only allowed characters are letters, | |
| 346 numbers, underscores and hyphens. | |
| 347 | |
| 348 You can't use single-end and unpaired reads as input to Hybpiper on Galaxy. | |
| 349 | |
| 350 Running HybPiper | |
| 351 ~~~~~~~~~~~~~~~~ | |
| 352 | |
| 353 The following HybPiper analyses are available on Galaxy: | |
| 354 | |
| 355 1. Check your target file and fix issues (optional) | |
| 356 2. Assemble target loci per-sample | |
| 357 3. Extract sequences and summary statistics | |
| 358 | |
| 359 Use the *Type of hybpiper run* drop-down to select an analysis. | |
| 360 | |
| 361 .. class:: infomark | |
| 362 | |
| 363 What it does | |
| 364 ------------ | |
| 365 | |
| 366 HybPiper was designed for processing targeted sequence capture data. In | |
| 367 targeted sequence capture, DNA sequencing libraries are enriched for | |
| 368 gene regions of interest. This is used for sequencing many loci | |
| 369 simultaneously based on bait sequences. | |
| 370 | |
| 371 HybPiper is a suite of scripts that wrap and connect other tools to | |
| 372 extract target sequences from the sequencing reads. The HybPiper | |
| 373 pipeline starts with high-throughput sequencing reads (for example from | |
| 374 Illumina MiSeq), and assigns them to target genes using DIAMOND. The | |
| 375 reads are distributed to separate directories, where they are assembled | |
| 376 separately using SPAdes. The main output is a collection of FASTA files | |
| 377 of the (in frame) CDS portion of the sample for each target region. You | |
| 378 can also generate a separate collections of files with the translated | |
| 379 protein sequences, the intronic regions flanking each exon, and putative | |
| 380 paralog sequences. | |
| 381 | |
| 382 For more information, please see `the HybPiper | |
| 383 wiki <https://github.com/mossmatters/HybPiper/wiki>`__. | |
| 384 | |
| 385 | |
| 386 ]]></help> | |
| 387 <expand macro="citations"/> | |
| 388 </tool> |
