Mercurial > repos > iuc > featurecounts
comparison featurecounts.xml @ 0:8e70555968a1 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/featurecounts commit 03f64004f90ac0a7be67ecfc355a7b361f3c3314
| author | iuc |
|---|---|
| date | Tue, 20 Sep 2016 18:00:30 -0400 |
| parents | |
| children | 94fdae43f062 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:8e70555968a1 |
|---|---|
| 1 <tool id="featurecounts" name="featureCounts" version="1.4.6.p5" profile="16.04"> | |
| 2 <description>Measure gene expression in RNA-Seq experiments from SAM or BAM files.</description> | |
| 3 <requirements> | |
| 4 <requirement type="package" version="1.4.6p5">subread</requirement> | |
| 5 </requirements> | |
| 6 | |
| 7 <version_command>featureCounts -v 2>&1 | grep .</version_command> | |
| 8 <command><![CDATA[ | |
| 9 ## Check whether all alignments are from the same type (bam || sam) | |
| 10 featureCounts | |
| 11 -a "$reference_gene_sets" | |
| 12 -o "output" | |
| 13 -T \${GALAXY_SLOTS:-2} | |
| 14 | |
| 15 -t "$extended_parameters.gff_feature_type" | |
| 16 -g "$extended_parameters.gff_feature_attribute" | |
| 17 $extended_parameters.summarization_level | |
| 18 $extended_parameters.contribute_to_multiple_features | |
| 19 -s $extended_parameters.strand_specificity | |
| 20 $extended_parameters.multimapping_enabled.multimapping_counts | |
| 21 | |
| 22 #if str($extended_parameters.multimapping_enabled.multimapping_counts) == " -M" | |
| 23 $extended_parameters.multimapping_enabled.fraction | |
| 24 #end if | |
| 25 | |
| 26 -Q $extended_parameters.mapping_quality | |
| 27 $extended_parameters.largest_overlap | |
| 28 --minOverlap $extended_parameters.min_overlap | |
| 29 $extended_parameters.read_reduction | |
| 30 $extended_parameters.primary | |
| 31 $extended_parameters.ignore_dup | |
| 32 | |
| 33 #if str($extended_parameters.read_extension_5p) != "0" | |
| 34 --readExtension5 $extended_parameters.read_extension_5p | |
| 35 #end if | |
| 36 | |
| 37 #if str($extended_parameters.read_extension_3p) != "0" | |
| 38 --readExtension3 $extended_parameters.read_extension_3p | |
| 39 #end if | |
| 40 | |
| 41 $pe_parameters.fragment_counting_enabled.fragment_counting | |
| 42 #if str($pe_parameters.fragment_counting_enabled.fragment_counting) == " -p" | |
| 43 $pe_parameters.fragment_counting_enabled.check_distance_enabled.check_distance | |
| 44 #if str($pe_parameters.fragment_counting_enabled.check_distance_enabled.check_distance) == " -P" | |
| 45 -d $pe_parameters.fragment_counting_enabled.check_distance_enabled.minimum_fragment_length | |
| 46 -D $pe_parameters.fragment_counting_enabled.check_distance_enabled.maximum_fragment_length | |
| 47 #end if | |
| 48 #end if | |
| 49 | |
| 50 $pe_parameters.only_both_ends | |
| 51 -S $pe_parameters.orientation | |
| 52 $pe_parameters.exclude_chimerics | |
| 53 | |
| 54 "${alignment}" | |
| 55 | |
| 56 ## Removal of comment and column-header line | |
| 57 && grep -v "^#" "output" | tail -n+2 > body.txt | |
| 58 | |
| 59 ## Set the right columns for the tabular formats | |
| 60 #if $format.value == "tabdel_medium" | |
| 61 && cut -f 1,7 body.txt > expression_matrix.txt | |
| 62 | |
| 63 ## Paste doesn't allow a non ordered list of columns: -f 1,7,8,6 will only return columns 1,7 and 8 | |
| 64 ## Thus the gene length column (last column) has to be added separately | |
| 65 && cut -f 6 body.txt > gene_lengths.txt | |
| 66 && paste expression_matrix.txt gene_lengths.txt > expression_matrix.txt.bak | |
| 67 && mv -f expression_matrix.txt.bak "${output_medium}" | |
| 68 #elif $format.value == "tabdel_short" | |
| 69 && cut -f 1,7 body.txt > "${output_short}" | |
| 70 #else | |
| 71 && cp body.txt "${output_full}" | |
| 72 #end if | |
| 73 | |
| 74 | |
| 75 #if str($include_feature_length_file) == "true" | |
| 76 && cut -f 1,6 body.txt > "${output_feature_lengths}" | |
| 77 #end if | |
| 78 | |
| 79 && tail -n+2 "output.summary" > "${output_summary}" | |
| 80 | |
| 81 ]]></command> | |
| 82 <inputs> | |
| 83 <param name="alignment" | |
| 84 type="data" | |
| 85 multiple="false" | |
| 86 format="bam,sam" | |
| 87 label="Alignment file" | |
| 88 help="The input alignment file(s) where the gene expression has to be counted. The file can have a SAM or BAM format; but ALL files must be in the same format" /> | |
| 89 | |
| 90 <param name="reference_gene_sets" | |
| 91 format="gff,gtf,gff3" | |
| 92 type="data" | |
| 93 label="Gene annotation file" | |
| 94 help="The program assumes that the provided annotation file is in GTF format. Make sure that the gene annotation file corresponds to the same reference genome as used for the alignment" /> | |
| 95 | |
| 96 <param name="format" | |
| 97 type="select" | |
| 98 label="Output format" | |
| 99 help="The output format will be tabular, select the preferred columns here"> | |
| 100 <option value="tabdel_short" selected="true">Gene-ID "\t" read-count (DESeq2 IUC wrapper compatible)</option> | |
| 101 <option value="tabdel_medium">Gene-ID "\t" read-count "\t" gene-length</option> | |
| 102 <option value="tabdel_full">featureCounts 1.4.0+ default (includes regions provided by the GTF file)</option> | |
| 103 </param> | |
| 104 | |
| 105 <param name="include_feature_length_file" | |
| 106 type="boolean" | |
| 107 truevalue="true" | |
| 108 falsevalue="false" | |
| 109 selected="false" | |
| 110 label="Create gene-length file" | |
| 111 help="Creates a tabular file that contains the effective (nucleotides used for counting reads) length of the feature; might be useful for estimating FPKM/RPKM" /> | |
| 112 | |
| 113 | |
| 114 <section name="pe_parameters" title="Options for paired-end reads"> | |
| 115 <conditional name="fragment_counting_enabled"> | |
| 116 | |
| 117 <param name="fragment_counting" | |
| 118 type="select" | |
| 119 argument="-p" | |
| 120 checked="true" | |
| 121 label="Count fragments instead of reads" | |
| 122 help="If specified, fragments (or templates) will be counted instead of reads."> | |
| 123 <option value="" selected="true">Disabled; all reads/mates will be counted individually</option> | |
| 124 <option value=" -p">Enabled; fragments (or templates) will be counted instead of reads</option> | |
| 125 </param> | |
| 126 | |
| 127 <when value=" -p"> | |
| 128 <conditional name="check_distance_enabled"> | |
| 129 <param name="check_distance" | |
| 130 type="boolean" | |
| 131 truevalue=" -P" | |
| 132 falsevalue="" | |
| 133 argument="-P" | |
| 134 label="Check paired-end distance" | |
| 135 help="If specified, paired-end distance will be checked when assigning fragments to meta-features or features. This option is only applicable when -p (Count fragments instead of reads) is specified. The distance thresholds should be specified using -d and -D (minimum and maximum fragment/template length) options." /> | |
| 136 <when value=" -P"> | |
| 137 <param name="minimum_fragment_length" | |
| 138 type="integer" | |
| 139 value="50" | |
| 140 argument="-d" | |
| 141 label="Minimum fragment/template length." /> | |
| 142 <param name="maximum_fragment_length" | |
| 143 type="integer" | |
| 144 value="600" | |
| 145 argument="-D" | |
| 146 label="Maximum fragment/template length." /> | |
| 147 </when> | |
| 148 <when value="" /> | |
| 149 </conditional> | |
| 150 </when> | |
| 151 <when value="" /> | |
| 152 </conditional> | |
| 153 | |
| 154 <param name="only_both_ends" | |
| 155 type="boolean" | |
| 156 truevalue=" -B" | |
| 157 falsevalue="" | |
| 158 argument="-B" | |
| 159 label="Only allow fragments with both reads aligned" | |
| 160 help="If specified, only fragments that have both ends successfully aligned will be considered for summarization. This option is only applicable for paired-end reads." /> | |
| 161 | |
| 162 <param name="orientation" | |
| 163 type="select" | |
| 164 label="Orientation of the two read from the same pair" | |
| 165 argument="-S" | |
| 166 help="Default is 'fr'"> | |
| 167 <option value="fr" selected="true">Forward, Reverse (fr)</option> | |
| 168 <option value="ff">Forward, Forward (ff)</option> | |
| 169 <option value="rf">Reverse, Forward (rf)</option> | |
| 170 </param> | |
| 171 | |
| 172 <param name="exclude_chimerics" | |
| 173 type="boolean" | |
| 174 truevalue=" -C" | |
| 175 falsevalue="" | |
| 176 argument="-C" | |
| 177 checked="true" | |
| 178 label="Exclude chimeric fragments" | |
| 179 help="If specified, the chimeric fragments (those fragments that have their two ends aligned to different chromosomes) will NOT be included for summarization. This option is only applicable for paired-end read data." /> | |
| 180 </section> | |
| 181 | |
| 182 <section name="extended_parameters" title="Advanced options"> | |
| 183 <param name="gff_feature_type" | |
| 184 type="text" | |
| 185 value="exon" | |
| 186 argument="-t" | |
| 187 label="GFF feature type filter" | |
| 188 help="Specify the feature type. Only rows which have the matched matched feature type in the provided GTF annotation file will be included for read counting. `exon' by default." /> | |
| 189 | |
| 190 <param name="gff_feature_attribute" | |
| 191 type="text" | |
| 192 value="gene_id" | |
| 193 argument="-g" | |
| 194 label="GFF gene identifier" | |
| 195 help="Specify the attribute type used to group features (eg. exons) into meta-features (eg. genes), when GTF annotation is provided. `gene_id' by default. This attribute type is usually the gene identifier. This argument is useful for the meta-feature level summarization." /> | |
| 196 | |
| 197 <param name="summarization_level" | |
| 198 type="boolean" | |
| 199 truevalue=" -f" | |
| 200 falsevalue="" | |
| 201 argument="-f" | |
| 202 label="On feature level" | |
| 203 help="If specified, read summarization will be performed at the feature level. By default (-f is not specified), the read summarization is performed at the meta-feature level." /> | |
| 204 | |
| 205 <param name ="contribute_to_multiple_features" | |
| 206 type="boolean" | |
| 207 truevalue=" -O" | |
| 208 falsevalue="" | |
| 209 argument="-O" | |
| 210 label="Allow read to contribute to multiple features" | |
| 211 help="If specified, reads (or fragments if -p is specified) will be allowed to be assigned to more than one matched meta- feature (or matched feature if -f is specified)" /> | |
| 212 | |
| 213 <param name="strand_specificity" | |
| 214 type="select" | |
| 215 label="Strand specificity of the protocol" | |
| 216 argument="-s" | |
| 217 help="Indicate if strand-specific read counting should be performed."> | |
| 218 <option value="0" selected="true">Unstranded</option> | |
| 219 <option value="1">Stranded (forwards)</option> | |
| 220 <option value="2">Stranded (reverse)</option> | |
| 221 </param> | |
| 222 | |
| 223 <conditional name="multimapping_enabled"> | |
| 224 <param name="multimapping_counts" | |
| 225 type="select" | |
| 226 argument="-M" | |
| 227 label="Count multi-mapping reads/fragments" | |
| 228 help="If specified, multi-mapping reads/fragments will be counted (ie. a multi-mapping read will be counted up to N times if it has N reported mapping locations). The program uses the `NH' tag to find multi-mapping reads."> | |
| 229 <option value="" selected="true">Disabled; multi-mapping reads are excluded (default)</option> | |
| 230 <option value=" -M">Enabled; multi-mapping reads are included</option> | |
| 231 </param> | |
| 232 <when value=" -M"> | |
| 233 <param name="fraction" | |
| 234 type="boolean" | |
| 235 truevalue="--fraction" | |
| 236 falsevalue="" | |
| 237 argument="--fraction" | |
| 238 label="Assign fractions to multimapping reads" | |
| 239 help="If specified, a fractional count 1/n will be generated for each multi-mapping read, where n is the number of alignments (indica- ted by 'NH' tag) reported for the read. This option must be used together with the '-M' option." /> | |
| 240 </when> | |
| 241 <when value="" /> | |
| 242 </conditional> | |
| 243 | |
| 244 <param name="mapping_quality" | |
| 245 type="integer" | |
| 246 value="12" | |
| 247 argument="-Q" | |
| 248 label="Minimum mapping quality per read" | |
| 249 help="The minimum mapping quality score a read must satisfy in order to be counted. For paired-end reads, at least one end should satisfy this criteria. 12 by default." /> | |
| 250 | |
| 251 <param name="largest_overlap" | |
| 252 type="boolean" | |
| 253 truevalue=" --largestOverlap" | |
| 254 falsevalue="" | |
| 255 argument="--largestOverlap" | |
| 256 label="Largest overlap" | |
| 257 help="If specified, reads (or fragments) will be assigned to the target that has the largest number of overlapping bases" /> | |
| 258 | |
| 259 <param name="min_overlap" | |
| 260 type="integer" | |
| 261 value="1" | |
| 262 argument="--minOverlap" | |
| 263 label="Minimum overlap" | |
| 264 help="Specify the minimum required number of overlapping bases between a read (or a fragment) and a feature. 1 by default. If a negative value is provided, the read will be extended from both ends." /> | |
| 265 | |
| 266 <param name="read_extension_5p" | |
| 267 type="integer" | |
| 268 value="0" | |
| 269 argument="--readExtension5" | |
| 270 label="Read 5' extension" | |
| 271 help="Reads are extended upstream by ... bases from their 5' end" /> | |
| 272 | |
| 273 <param name="read_extension_3p" | |
| 274 type="integer" | |
| 275 value="0" | |
| 276 argument="--readExtension3" | |
| 277 label="Read 3' extension" | |
| 278 help="Reads are extended upstream by ... bases from their 3' end" /> | |
| 279 | |
| 280 <param name="read_reduction" | |
| 281 type="select" | |
| 282 label="Reduce read to single position" | |
| 283 argument="--read2pos" | |
| 284 help="The read is reduced to its 5' most base or 3'most base. Read summarization is then performed based on thesingle base which the read is reduced to."> | |
| 285 <option value="" selected="true">Leave the read as it is</option> | |
| 286 <option value="--read2pos 5">Reduce it to the 5' end</option> | |
| 287 <option value="--read2pos 3">Reduce it to the 3' end</option> | |
| 288 </param> | |
| 289 | |
| 290 <param name="primary" | |
| 291 type="boolean" | |
| 292 truevalue=" --primary" | |
| 293 falsevalue="" | |
| 294 argument="--primary" | |
| 295 label="Only count primary alignments" | |
| 296 help="If specified, only primary alignments will be counted. Primaryand secondary alignments are identified using bit 0x100 in theFlag field of SAM/BAM files. All primary alignments in a datasetwill be counted no matter they are from multi-mapping reads ornot ('-M' is ignored)." /> | |
| 297 | |
| 298 <param name="ignore_dup" | |
| 299 type="boolean" | |
| 300 truevalue=" --ignoreDup" | |
| 301 falsevalue="" | |
| 302 argument="--ignoreDup" | |
| 303 label="Ignore reads marked as duplicate" | |
| 304 help="If specified, reads that were marked asduplicates will be ignored. Bit Ox400 in FLAG field of SAM/BAMfile is used for identifying duplicate reads. In paired enddata, the entire read pair will be ignored if at least one endis found to be a duplicate read." /> | |
| 305 | |
| 306 <param name="count_split_alignments_only" | |
| 307 type="boolean" | |
| 308 truevalue=" --countSplitAlignmentsOnly" | |
| 309 falsevalue="" | |
| 310 argument="--countSplitAlignmentsOnly" | |
| 311 label="Ignore reads marked as duplicate" | |
| 312 help="If specified, only split alignments (CIGARstrings containing letter `N') will be counted. All the otheralignments will be ignored. An example of split alignments isthe exon-spanning reads in RNA-seq data." /> | |
| 313 </section> | |
| 314 </inputs> | |
| 315 <outputs> | |
| 316 <data format="tabular" | |
| 317 name="output_medium" | |
| 318 label="${tool.name} on ${on_string}"> | |
| 319 <filter>format == "tabdel_medium"</filter> | |
| 320 <actions> | |
| 321 <action name="column_names" type="metadata" default="Geneid,${alignment.name},Length" /> | |
| 322 </actions> | |
| 323 </data> | |
| 324 | |
| 325 <data format="tabular" | |
| 326 name="output_short" | |
| 327 label="${tool.name} on ${on_string}"> | |
| 328 <filter>format == "tabdel_short"</filter> | |
| 329 <actions> | |
| 330 <action name="column_names" type="metadata" default="Geneid,${alignment.name}" /> | |
| 331 </actions> | |
| 332 </data> | |
| 333 | |
| 334 <data format="tabular" | |
| 335 name="output_full" | |
| 336 label="${tool.name} on ${on_string}: count table"> | |
| 337 <filter>format == "tabdel_full"</filter> | |
| 338 <actions> | |
| 339 <action name="column_names" type="metadata" default="Geneid,Chr,Start,End,Strand,Length,${alignment.name}" /> | |
| 340 </actions> | |
| 341 </data> | |
| 342 | |
| 343 <data format="tabular" | |
| 344 name="output_summary" | |
| 345 hidden="true" | |
| 346 label="${tool.name} on ${on_string}: summary"> | |
| 347 <actions> | |
| 348 <action name="column_names" type="metadata" default="Status,${alignment.name}" /> | |
| 349 </actions> | |
| 350 </data> | |
| 351 | |
| 352 <data format="tabular" | |
| 353 name="output_feature_lengths" | |
| 354 label="${tool.name} on ${on_string}: feature lengths"> | |
| 355 <filter>include_feature_length_file</filter> | |
| 356 <actions> | |
| 357 <action name="column_names" type="metadata" default="Feature,Length" /> | |
| 358 </actions> | |
| 359 </data> | |
| 360 </outputs> | |
| 361 <tests> | |
| 362 <test> | |
| 363 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> | |
| 364 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> | |
| 365 <param name="format" value="tabdel_short" /> | |
| 366 <param name="include_feature_length_file" value="true"/> | |
| 367 <output name="output" file="output_1_short.tab"/> | |
| 368 <output name="output_summary" file="output_1_summary.tab"/> | |
| 369 </test> | |
| 370 <test> | |
| 371 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> | |
| 372 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> | |
| 373 <param name="format" value="tabdel_medium" /> | |
| 374 <param name="include_feature_length_file" value="true"/> | |
| 375 <output name="output" file="output_1_medium.tab"/> | |
| 376 <output name="output_summary" file="output_1_summary.tab"/> | |
| 377 </test> | |
| 378 <test> | |
| 379 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> | |
| 380 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> | |
| 381 <param name="format" value="tabdel_full" /> | |
| 382 <param name="include_feature_length_file" value="true"/> | |
| 383 <output name="output" file="output_1_full.tab"/> | |
| 384 <output name="output_summary" file="output_1_summary.tab"/> | |
| 385 <output name="output_feature_lengths" file="output_feature_lengths.tab"/> | |
| 386 </test> | |
| 387 | |
| 388 <test> | |
| 389 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> | |
| 390 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> | |
| 391 <param name="format" value="tabdel_short" /> | |
| 392 <param name="include_feature_length_file" value="true"/> | |
| 393 <output name="output" file="output_2_short.tab"/> | |
| 394 <output name="output_summary" file="output_2_summary.tab"/> | |
| 395 </test> | |
| 396 <test> | |
| 397 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> | |
| 398 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> | |
| 399 <param name="format" value="tabdel_medium" /> | |
| 400 <param name="include_feature_length_file" value="true"/> | |
| 401 <output name="output" file="output_2_medium.tab"/> | |
| 402 <output name="output_summary" file="output_2_summary.tab"/> | |
| 403 </test> | |
| 404 <test> | |
| 405 <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> | |
| 406 <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> | |
| 407 <param name="format" value="tabdel_full" /> | |
| 408 <param name="include_feature_length_file" value="true"/> | |
| 409 <output name="output" file="output_2_full.tab"/> | |
| 410 <output name="output_summary" file="output_2_summary.tab"/> | |
| 411 <output name="output_feature_lengths" file="output_feature_lengths.tab"/> | |
| 412 </test> | |
| 413 </tests> | |
| 414 | |
| 415 <help><![CDATA[ | |
| 416 featureCounts | |
| 417 ############# | |
| 418 | |
| 419 Overview | |
| 420 -------- | |
| 421 FeatureCounts is a light-weight read counting program written entirely in the C programming language. It can be used to count both gDNA-seq and RNA-seq reads for genomic features in in SAM/BAM files. | |
| 422 | |
| 423 Input formats | |
| 424 ------------- | |
| 425 Alignments should be provided in either: | |
| 426 | |
| 427 - SAM format, http://samtools.sourceforge.net/samtools.shtml#5 | |
| 428 - BAM format | |
| 429 | |
| 430 Gene regions should be provided in the GFF/GTF format: | |
| 431 | |
| 432 - http://genome.ucsc.edu/FAQ/FAQformat.html#format3 | |
| 433 - http://www.ensembl.org/info/website/upload/gff.html | |
| 434 | |
| 435 Output format | |
| 436 ------------- | |
| 437 FeatureCounts produces a table containing the counted reads, per gene, per row. Optionally the last column can be set to be the effective gene-length. These tables are compatible with the DESeq2 Galaxy wrapper by IUC. | |
| 438 ]]></help> | |
| 439 <citations> | |
| 440 <citation type="doi">10.1093/bioinformatics/btt656</citation> | |
| 441 </citations> | |
| 442 </tool> |
