Mercurial > repos > iuc > hybpiper

diff hybpiper.xml @ 0:91a16438e849 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hybpiper commit b439a8bebdd20955135572a15672a12a166d7ff8
author: iuc
date: Sat, 23 Sep 2023 16:49:12 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hybpiper.xml	Sat Sep 23 16:49:12 2023 +0000
@@ -0,0 +1,388 @@
+<tool id="hybpiper" name="HybPiper" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Analyse targeted sequence capture data</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="xrefs"/>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+
+    ## sample name checking
+    #import re
+    #def check_sample_name($sample_name):
+        #if re.search(r'[^A-Za-z0-9_\-]', $sample_name):
+            printf '%s\n'
+            'ERROR: special characters detected in sample identifier.'
+            'Identifiers may only contain letters, numbers, underscores and hyphens.'
+            'Check the identifier for the following sample:' 
+            '${sample_name}'
+            1>&2
+            &&
+            exit 1
+            &&
+        #end if
+    #end def
+
+    ## set up files
+    ln -s '${targetfile_dna}' ./target_file.fasta
+    &&
+
+    ###############################
+    ## hybpiper check_targetfile ##
+    ###############################
+
+    #if str( $job_conditional.hybpiper_job ) == "check_and_fix_targetfile":
+        hybpiper check_targetfile 
+        --targetfile_dna target_file.fasta
+        &&
+
+        mv fix_targetfile*.ctl hybpiper.ctl
+        &&
+
+        hybpiper fix_targetfile
+        --targetfile_dna target_file.fasta
+        --allow_gene_removal
+        hybpiper.ctl
+        &&
+    
+    #######################
+    ## hybpiper assemble ##
+    #######################
+
+    #elif str( $job_conditional.hybpiper_job ) == "assemble":
+        #set sample_prefix = str($job_conditional.paired_input.element_identifier)
+
+        $check_sample_name($sample_prefix)
+
+        hybpiper assemble
+        --readfiles
+        '${job_conditional.paired_input.forward}'
+        '${job_conditional.paired_input.reverse}'
+        --targetfile_dna target_file.fasta
+        --diamond
+        --cpu \${GALAXY_SLOTS:-1}
+        --prefix '${sample_prefix}'
+        &&
+
+        tar -cvf '${hybpiper_archive}' --directory='${sample_prefix}' .
+        &&
+
+    #######################################
+    ## hybpiper stats/retrieve_sequences ##
+    #######################################
+
+    #elif str( $job_conditional.hybpiper_job ) == "stats":
+
+        ## check logic of requested items
+        #unless $job_conditional.stats_type_select or $job_conditional.sequence_type_select:
+            printf '%s\n'
+            'ERROR: No outputs selected.'
+            1>&2
+            &&
+            exit 1
+            &&
+        #end unless
+        #if $job_conditional.heatmap and not $job_conditional.stats_type_select:
+            printf '%s\n'
+            'ERROR: heatmap requested, but no stats selected.'
+            1>&2
+            &&
+            exit 1
+            &&
+        #end if
+
+        #for $sample in $job_conditional.hybpiper_results
+            #set sample_prefix = str($sample.element_identifier)
+
+            $check_sample_name($sample_prefix)
+
+            mkdir -p '${sample_prefix}'
+            &&
+
+            tar -xf '${sample}' -C '${sample_prefix}'
+            &&
+
+            echo '${sample_prefix}' >> namelist.txt
+            &&
+        #end for
+
+        ## Produce a stats file for each requested output type
+        #for $stats_output in $job_conditional.stats_type_select:
+            hybpiper stats
+            --targetfile_dna target_file.fasta
+            --stats_filename 'stats.${stats_output}'
+            --seq_lengths_filename 'seq_lengths.${stats_output}'
+            '${stats_output}'
+            namelist.txt
+            &&
+
+            ## Produce heatmaps if selected
+            #if $job_conditional.heatmap:
+                hybpiper recovery_heatmap
+                --heatmap_filename 'heatmap.${stats_output}'
+                --heatmap_filetype svg
+                'seq_lengths.${stats_output}.tsv'
+                &&
+            #end if
+        #end for
+
+        ## Produce sequences for each requested type
+        #for $sequence_output in $job_conditional.sequence_type_select:
+            mkdir 'fasta.${sequence_output}'
+            &&
+            hybpiper retrieve_sequences
+            --targetfile_dna target_file.fasta
+            --sample_names namelist.txt
+            --fasta_dir 'fasta.${sequence_output}'
+            '${sequence_output}'
+            &&
+        #end for
+    #end if
+    
+    wait
+
+]]></command>
+    
+    <inputs>
+        <param argument="--targetfile_dna" type="data" format="fasta" label="Target file" help="Target file in FASTA format" />
+    
+        <conditional name="job_conditional">
+            <param name="hybpiper_job" type="select" label="Type of hybpiper run">
+                <option value="check_and_fix_targetfile">Check and fix targetfile</option>
+                <option value="assemble" selected="true">Assemble target loci</option>
+                <option value="stats">Extract sequences and/or stats from Hybpiper runs</option>
+            </param>
+    
+            <when value="check_and_fix_targetfile"/>
+    
+            <when value="assemble">
+                <param name="paired_input" format="fastqsanger" type="data_collection" collection_type="paired" label="Input reads" help="Your reads must be in a paired collection. See below for more information." />
+            </when>
+    
+            <when value="stats">
+                <param name="hybpiper_results" type="data_collection" collection_type="list" format="tar" multiple="true" label="Results from Hybpiper assemble runs" />
+                <param name="stats_type_select" type="select" label="Choose statistics to report" display="checkboxes" multiple="true" optional="true">
+                    <option value="gene" selected="true">Gene</option>
+                    <option value="supercontig">Supercontig</option>
+                </param>
+                <param name="heatmap" type="boolean" checked="false" label="Produce a heatmap for each of the selected statistics" />
+                <param name="sequence_type_select" type="select" display="checkboxes" label="Choose sequences to extract" multiple="true" optional="true">
+                    <option value="dna" selected="true">DNA</option>
+                    <option value="aa">Amino acid</option>
+                    <option value="intron">Intron</option> 
+                    <option value="supercontig">Supercontig</option>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    
+    <outputs>
+        <!-- check_targetfile output -->
+        <data name="fixed_targetfile" label="${targetfile_dna.element_identifier} (fixed)" format="fasta" from_work_dir="target_file_fixed.fasta">
+            <filter>job_conditional['hybpiper_job'] == 'check_and_fix_targetfile'</filter>
+        </data>
+        <collection type="list" name="output_targetfile" label="Hybpiper logs for ${targetfile_dna.element_identifier}">
+            <data name="targetfile_ctl_file" label="Hybpiper .ctl file for ${on_string}" format="txt" from_work_dir="hybpiper.ctl" />
+            <data name="targetfile_report" label="Hybpiper targetfile report" format="tabular" from_work_dir="fix_targetfile_report.tsv" />
+            <filter>job_conditional['hybpiper_job'] == 'check_and_fix_targetfile'</filter>
+        </collection>
+
+        <!-- assemble output -->
+        <data name="hybpiper_archive" format="tar">
+            <filter>job_conditional['hybpiper_job'] == 'assemble'</filter>
+        </data>
+
+        <!-- stats / stats output -->
+        <collection name="hybpiper_stats" type="list" label="Hybpiper statistics">
+            <data name="stats_gene" label="Hybpiper statistics (gene)" format="tabular" from_work_dir="stats.gene.tsv">
+                <actions>
+                    <action name="column_names" type="metadata" default="Name,NumReads,ReadsMapped,PctOnTarget,GenesMapped,GenesWithContigs,GenesWithSeqs,GenesAt25pct,GenesAt50pct,GenesAt75pct,GenesAt150pct,ParalogWarningsLong,ParalogWarningsDepth,GenesWithoutStitchedContigs,GenesWithStitchedContigs,GenesWithStitchedContigsSkipped,GenesWithChimeraWarning,TotalBasesRecovered" />
+                </actions>
+            </data>
+            <data name="stats_supercontig" label="Hybpiper statistics (supercontig)" format="tabular" from_work_dir="stats.supercontig.tsv">
+                <actions>
+                    <action name="column_names" type="metadata" default="Name,NumReads,ReadsMapped,PctOnTarget,GenesMapped,GenesWithContigs,GenesWithSeqs,GenesAt25pct,GenesAt50pct,GenesAt75pct,GenesAt150pct,ParalogWarningsLong,ParalogWarningsDepth,GenesWithoutStitchedContigs,GenesWithStitchedContigs,GenesWithStitchedContigsSkipped,GenesWithChimeraWarning,TotalBasesRecovered" />
+                </actions>
+            </data>
+            <data name="seqlengths_gene" label="Assembled sequence lengths (gene)" format="tabular" from_work_dir="seq_lengths.gene.tsv"/>
+            <data name="seqlengths_supercontig" label="Assembled sequence lengths (supercontig)" format="tabular" from_work_dir="seq_lengths.supercontig.tsv">
+            </data>
+            <filter>job_conditional['hybpiper_job'] == 'stats' and ('gene' in job_conditional['stats_type_select'] or 'supercontig' in job_conditional['stats_type_select'])</filter>
+        </collection>
+
+        <!-- stats/heatmap output -->
+        <collection name="hybpiper_heatmaps" type="list" label="Hybpiper heatmaps">
+            <discover_datasets pattern="heatmap\.(?P&lt;designation&gt;.+)\.svg" format="svg" recurse="false" />
+            <filter>job_conditional['hybpiper_job'] == 'stats' and job_conditional['heatmap'] and job_conditional['heatmap'] is true</filter>            
+        </collection>
+
+        <!-- stats/sequences output -->         
+        <collection name="dna_sequences" type="list" label="DNA sequences">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.FNA" format="fasta" directory="fasta.dna" recurse="false" />
+            <filter>job_conditional['hybpiper_job'] == 'stats' and 'dna' in job_conditional['sequence_type_select']</filter>
+        </collection>
+        <collection name="aa_sequences" type="list" label="Amino acid sequences">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.FAA" format="fasta" directory="fasta.aa" recurse="false" />
+            <filter>job_conditional['hybpiper_job'] == 'stats' and 'aa' in job_conditional['sequence_type_select']</filter>
+        </collection>
+        <collection name="intron_sequences" type="list" label="Intron sequences">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fasta" format="fasta" directory="fasta.intron" recurse="false" />
+            <filter>job_conditional['hybpiper_job'] == 'stats' and 'intron' in job_conditional['sequence_type_select']</filter>
+        </collection>
+        <collection name="supercontig_sequences" type="list" label="Supercontig sequences">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fasta" format="fasta" directory="fasta.supercontig" recurse="false" />
+            <filter>job_conditional['hybpiper_job'] == 'stats' and 'supercontig' in job_conditional['sequence_type_select']</filter>
+        </collection>
+
+        <!-- dummy output, in case the user deselects everything -->
+        <data name="dummy_output" label="Stats or sequences from Hybpiper runs" from_work_dir="namelist.txt" format="txt">
+            <filter>job_conditional['hybpiper_job'] == 'stats' and not (job_conditional['stats_type_select'] or job_conditional['sequence_type_select']) </filter>
+        </data>
+
+    </outputs>
+    <tests>
+
+    <!-- test1: check and fix targetfile -->
+    <test expect_num_outputs="4">
+        <param name="targetfile_dna" value="test_targets.fasta.gz"/>
+        <conditional name="job_conditional">
+            <param name="hybpiper_job" value="check_and_fix_targetfile"/>
+        </conditional>
+        <output name="fixed_targetfile" file="test1_out.fasta"/>
+        <output_collection name="output_targetfile" type="list" count="2">
+            <element name="targetfile_ctl_file" file="test1_out.ctl"/>    
+            <element name="targetfile_report" file="test1_out.tsv"/>
+        </output_collection>
+    </test>
+
+    <!-- test2: assemble with paired collection -->
+    <!-- Not possible to test stats unless element_identifier can be set. -->
+    <test expect_failure="true">
+        <param name="targetfile_dna" value="test_targets.fasta.gz"/>
+        <conditional name="job_conditional">
+            <param name="hybpiper_job" value="assemble"/>
+            <param name="paired_input">
+                <collection type="paired">
+                    <element name="forward" ftype="fastqsanger.gz" value="NZ874_R1_test.fastq.gz" />
+                    <element name="reverse" ftype="fastqsanger.gz" value="NZ874_R2_test.fastq.gz" />
+                </collection>
+            </param>
+        </conditional>
+        <!-- <output name="hybpiper_archive">
+            <assert_contents>
+                <has_size value="2386944" delta="200000" />
+            </assert_contents>
+        </output> -->
+    </test>
+
+    <!-- test3: all stats output -->
+    <test expect_num_outputs="10">
+        <param name="targetfile_dna" value="test_targets.fasta.gz"/>
+        <conditional name="job_conditional">
+            <param name="hybpiper_job" value="stats"/>
+            <param name="hybpiper_results" >
+                <collection type="list">
+                    <element name="NZ874" value="NZ874.tar.gz" />
+                </collection>
+            </param>
+            <param name="stats_type_select" value="gene,supercontig"/>
+            <param name="heatmap" value="true"/>
+            <param name="sequence_type_select" value="dna,aa,intron,supercontig"/>
+        </conditional>
+        <output_collection name="hybpiper_stats" type="list" count="4" />
+        <output_collection name="hybpiper_heatmaps" type="list" count="2">
+        </output_collection>
+        <output_collection name="dna_sequences" type="list" count="13">
+        </output_collection>
+        <output_collection name="aa_sequences" type="list" count="13">
+        </output_collection>
+        <output_collection name="intron_sequences" type="list" count="13">
+        </output_collection>
+        <output_collection name="supercontig_sequences" type="list" count="13">
+        </output_collection>
+    </test>
+
+    <!-- test4: no output selected -->
+    <test expect_failure="true">
+        <param name="targetfile_dna" value="test_targets.fasta.gz"/>
+        <conditional name="job_conditional">
+            <param name="hybpiper_job" value="stats"/>
+            <param name="hybpiper_results" >
+                <collection type="list">
+                    <element name="NZ874" value="NZ874.tar.gz" />
+                </collection>
+            </param>
+            <param name="stats_type_select" value=""/>
+            <param name="heatmap" value="true"/>
+            <param name="sequence_type_select" value=""/>
+        </conditional>
+    </test>
+
+</tests>
+    <help><![CDATA[
+
+Using HybPiper on Galaxy
+------------------------
+
+Input
+~~~~~
+
+On Galaxy, **you have to use paired collections as input** for
+HybPiper assemblies. HybPiper relies on the directory hierarchy it creates for each
+sample during assembly. The hierarchy is based on the name of the
+sample, which you provide to Galaxy as the identifier in the collection.
+
+Using paired collections
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+If you have your sequencing reads in individual datasets, you can easily organise them into a paired
+collection. See the Galaxy training material on `using dataset
+collections <https://gxy.io/GTN:T00146>`__
+for a step-by-step guide.
+
+**Note**: because HybPiper uses sample
+identifiers to create directories, you **can't use special characters**
+in your sample identifiers. The only allowed characters are letters,
+numbers, underscores and hyphens.
+
+You can't use single-end and unpaired reads as input to Hybpiper on Galaxy.
+
+Running HybPiper
+~~~~~~~~~~~~~~~~
+
+The following HybPiper analyses are available on Galaxy:
+
+1. Check your target file and fix issues (optional)
+2. Assemble target loci per-sample
+3. Extract sequences and summary statistics
+
+Use the *Type of hybpiper run* drop-down to select an analysis.
+
+.. class:: infomark
+
+What it does
+------------
+
+HybPiper was designed for processing targeted sequence capture data. In
+targeted sequence capture, DNA sequencing libraries are enriched for
+gene regions of interest. This is used for sequencing many loci
+simultaneously based on bait sequences.
+
+HybPiper is a suite of scripts that wrap and connect other tools to
+extract target sequences from the sequencing reads. The HybPiper
+pipeline starts with high-throughput sequencing reads (for example from
+Illumina MiSeq), and assigns them to target genes using DIAMOND. The
+reads are distributed to separate directories, where they are assembled
+separately using SPAdes. The main output is a collection of FASTA files
+of the (in frame) CDS portion of the sample for each target region. You
+can also generate a separate collections of files with the translated
+protein sequences, the intronic regions flanking each exon, and putative
+paralog sequences.
+
+For more information, please see `the HybPiper
+wiki <https://github.com/mossmatters/HybPiper/wiki>`__.
+
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>