Mercurial > repos > iuc > pbgcpp
comparison pbgcpp.xml @ 0:3090b3a0be9f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pbgcpp commit d8032f67869704a4f9308796d748966d1f4760ae
| author | iuc |
|---|---|
| date | Wed, 01 Mar 2023 22:42:11 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:3090b3a0be9f |
|---|---|
| 1 <tool id="pbgcpp" name="pbgcpp" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description>Compute genomic consensus and call variants using PacBio reads mapped to a reference.</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="xrefs"/> | |
| 7 <expand macro="requirements"/> | |
| 8 <command detect_errors="exit_code"><![CDATA[ | |
| 9 ## set up files | |
| 10 #if $reference_source.reference_source_selector == 'history': | |
| 11 #set ref_fn = 'reference.fa' | |
| 12 ln -f -s '$reference_source.ref_file' '$ref_fn' && | |
| 13 #else: | |
| 14 #set ref_fn = $reference_source.ref_file.fields.path | |
| 15 #end if | |
| 16 | |
| 17 ln -s '$input' 'input.bam' && | |
| 18 ln -s '$input.metadata.bam_index' 'input.bam.bai' && | |
| 19 | |
| 20 ## set up the outputs | |
| 21 #set output_line = ','.join('output.' + str(x) for x in $output_selector) | |
| 22 | |
| 23 ## run variantCaller | |
| 24 gcpp | |
| 25 --num-threads \${GALAXY_SLOTS:-4} | |
| 26 --reference '$ref_fn' | |
| 27 --output $output_line | |
| 28 'input.bam' | |
| 29 ]]></command> | |
| 30 <inputs> | |
| 31 <!-- from tools-iuc minimap2 wrapper --> | |
| 32 <conditional name="reference_source"> | |
| 33 <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?"> | |
| 34 <option value="cached">Use a built-in genome index</option> | |
| 35 <option value="history">Use a genome from history and build index</option> | |
| 36 </param> | |
| 37 <when value="cached"> | |
| 38 <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list"> | |
| 39 <options from_data_table="all_fasta"> | |
| 40 <filter type="sort_by" column="2" /> | |
| 41 <validator type="no_options" message="No reference genomes are available" /> | |
| 42 </options> | |
| 43 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> | |
| 44 </param> | |
| 45 </when> | |
| 46 <when value="history"> | |
| 47 <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" /> | |
| 48 </when> | |
| 49 </conditional> | |
| 50 <param type="data" name="input" format="bam" label="bam" help="The input BAM alignment file" /> | |
| 51 <!-- Output Options --> | |
| 52 <param name="output_selector" type="select" multiple="True" display="checkboxes" label="Output formats"> | |
| 53 <!-- Use the format's extension as the value, so we can use it directly in the output_line constructor. --> | |
| 54 <option value="fa" selected="true">Computed consensus (fasta)</option> | |
| 55 <option value="vcf">Variants (vcf)</option> | |
| 56 <option value="gff">Variants (gff)</option> | |
| 57 </param> | |
| 58 </inputs> | |
| 59 <outputs> | |
| 60 <data name="fa" format="fasta" from_work_dir="output.fa" label="${tool.name} on ${on_string} (consensus)"> | |
| 61 <filter>output_selector and 'fa' in output_selector</filter> | |
| 62 </data> | |
| 63 <data name="gff" format="gff" from_work_dir="output.gff" label="${tool.name} on ${on_string} (gff)"> | |
| 64 <filter>output_selector and 'gff' in output_selector</filter> | |
| 65 </data> | |
| 66 <data name="vcf" format="vcf" from_work_dir="output.vcf" label="${tool.name} on ${on_string} (vcf)"> | |
| 67 <filter>output_selector and 'vcf' in output_selector</filter> | |
| 68 </data> | |
| 69 </outputs> | |
| 70 <tests> | |
| 71 <!-- test1: basic test (output from pbmm2 1.10.0) --> | |
| 72 <test expect_num_outputs="1"> | |
| 73 <param name="reference_source_selector" value="history" /> | |
| 74 <param name="ref_file" value="bnd-ref.fasta"/> | |
| 75 <param name="input" value="pbmm2_3.bam"/> | |
| 76 <param name="output_selector" value="fa"/> | |
| 77 <output name="fa" ftype="fasta" file="pbgcpp_test1_out.fa"/> | |
| 78 </test> | |
| 79 <!-- test2: output selector --> | |
| 80 <test expect_num_outputs="3"> | |
| 81 <param name="reference_source_selector" value="history" /> | |
| 82 <param name="ref_file" value="bnd-ref.fasta"/> | |
| 83 <param name="input" value="pbmm2.bam"/> | |
| 84 <param name="output_selector" value="fa,gff,vcf"/> | |
| 85 <output name="fa" ftype="fasta" file="pbgcpp_test2_out.fa"/> | |
| 86 <output name="gff" ftype="gff"> | |
| 87 <assert_contents> | |
| 88 <has_text text="gff-version 3" /> | |
| 89 </assert_contents> | |
| 90 </output> | |
| 91 <output name="vcf" ftype="vcf"> | |
| 92 <assert_contents> | |
| 93 <has_text text="fileformat=VCFv4.2" /> | |
| 94 </assert_contents> | |
| 95 </output> | |
| 96 </test> | |
| 97 <!-- test3: cached genome --> | |
| 98 <test> | |
| 99 <param name="reference_source_selector" value="cached" /> | |
| 100 <param name="ref_file" value="bnd-ref"/> | |
| 101 <param name="input" value="pbmm2_3.bam"/> | |
| 102 <param name="output_selector" value="fa"/> | |
| 103 <output name="fa" ftype="fasta" file="pbgcpp_test3_out.fa"/> | |
| 104 </test> | |
| 105 </tests> | |
| 106 <help><![CDATA[ | |
| 107 **What it does** | |
| 108 | |
| 109 Compute genomic consensus and call variants relative to the reference. | |
| 110 | |
| 111 This tool requires a PacBio BAM file. | |
| 112 | |
| 113 You can create one by mapping PacBio reads to the reference genome with | |
| 114 the `pbmm2 <root?tool_id=pbmm2>`__ tool. When doing this, you have to | |
| 115 input CLR reads to pbmm2 in [unaligned] BAM format, not fastq or fasta. This is | |
| 116 because the pbgcpp algorithm uses additional information stored in the | |
| 117 unaligned BAM format that PacBio uses. | |
| 118 | |
| 119 **NOTE**: The pbgcpp tool used to be called GenomicConsensus. It works for PacBio Sequel data and RS data with the P6-C4 chemistry. | |
| 120 | |
| 121 -------------- | |
| 122 | |
| 123 pbgcpp is Pacific Biosciences’ tool to generate accurate reference | |
| 124 contigs. It takes an alignment in the form of a BAM file and polishes | |
| 125 the references with the provided subreads from the alignment. It uses | |
| 126 the Arrow algorithm in multi-molecule consensus setting and can reach up | |
| 127 to QV60 at coverage 100. pbgcpp is the successor of the venerable | |
| 128 GenomicConsensus suite which has reached EOL. | |
| 129 | |
| 130 See the `Pacific Biosciences GitHub | |
| 131 page <https://github.com/PacificBiosciences/pbbioconda>`__ for more | |
| 132 information. | |
| 133 | |
| 134 **Input**: Aligned subreads in PacBio BAM format (.bam). Compatible with PacBio Sequel data and RS data with the P6-C4 chemistry. | |
| 135 | |
| 136 **Output**: Polished contigs in .fasta format. | |
| 137 | |
| 138 **Why am I getting “Missing valid chemistry from input file, is this a | |
| 139 proper PBBAM input file?”** | |
| 140 | |
| 141 pbgcpp expects metadata in the bamfile that most aligners (like | |
| 142 minimap2) don’t include by default. Align the PacBio reads file using | |
| 143 pbmm2. | |
| 144 | |
| 145 ]]></help> | |
| 146 <expand macro="creator"/> | |
| 147 </tool> |
