annotate umi-tools_dedup.xml @ 11:cf4494361a56 draft

"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
author iuc
date Wed, 10 Feb 2021 19:26:42 +0000
parents 0ac9b15f11c2
children 083c516d19a9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
1 <tool id="umi_tools_dedup" name="UMI-tools deduplicate" version="@VERSION@+galaxy1">
0
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
2 <description>Extract UMI from fastq files</description>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
3 <macros>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
4 <import>macros.xml</import>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
5 </macros>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
6 <expand macro="requirements">
10
0ac9b15f11c2 "planemo upload commit 6ba769440f8f6a62e9ebfac069a30edc541bac0a"
iuc
parents: 9
diff changeset
7 <requirement type="package" version="1.9">samtools</requirement>
0
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
8 </expand>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
9 <command detect_errors="exit_code"><![CDATA[
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
10 #if $input.is_of_type("sam"):
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
11 #set $input_file = $input
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
12 #else:
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
13 ln -sf '${input}' 'input.bam' &&
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
14 ln -sf '$input.metadata.bam_index' 'input.bam.bai' &&
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
15 #set $input_file = 'input.bam'
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
16 #end if
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
17
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
18 umi_tools dedup
11
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
19 '$output_stats_bool'
0
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
20 --random-seed 0
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
21 --extract-umi-method $extract_umi_method
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
22 #if str($extract_umi_method) != 'read_id':
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
23 --umi-separator '$umi_separator' --umi-tag '$umi_tag'
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
24 #end if
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
25 --method $method --edit-distance-threshold $edit_distance_threshold
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
26 $paired $spliced_is_unique --soft-clip-threshold $soft_clip_threshold
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
27 $read_length $whole_contig --subset $subset $per_contig $per_gene
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
28 #if $gene_transcript_map:
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
29 --gene-transcript-map '$gene_transcript_map'
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
30 #end if
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
31 #if len(str($gene_tag)) > 0:
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
32 --gene-tag '$gene_tag'
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
33 #end if
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
34 #if $input.is_of_type("sam"):
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
35 --in-sam
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
36 #end if
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
37 -I '$input_file' -S deduped.bam &&
9
a289db9d3bbc "planemo upload commit 5d3fc4232e0e036ac1ed9e2c36adc41d6af4987f"
iuc
parents: 0
diff changeset
38 samtools sort deduped.bam -@ \${GALAXY_SLOTS:-1} -T "\${TMPDIR:-.}" -o '$output' -O BAM
0
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
39 ]]></command>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
40 <inputs>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
41 <param name="input" type="data" format="sam,bam" label="Reads to deduplicate in SAM or BAM format" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
42 <param name="extract_umi_method" argument="--extract-umi-method" type="select">
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
43 <option value="read_id" selected="True">Read ID</option>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
44 <option value="tag">Tag</option>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
45 </param>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
46 <param name="umi_separator" argument="--umi-separator" type="text" label="Separator between read id and UMI." help="Ignored unless extracting by tag" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
47 <param name="umi_tag" argument="--umi-tag" type="text" label="Tag which contains UMI." />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
48 <param argument="--method" type="select" label="Method used to identify PCR duplicates within reads." help="All methods start by identifying the reads with the same mapping position">
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
49 <option value="unique">Reads group share the exact same UMI</option>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
50 <option value="percentile">Reads group share the exact same UMI. UMIs with counts less than 1% of the median counts for UMIs at the same position are ignored</option>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
51 <option value="cluster">Identify clusters based on hamming distance</option>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
52 <option value="adjacency">Identify clusters based on hamming distance and resolve networks by using the node counts</option>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
53 <option value="directional">Identify clusters based on distance and counts, restrict network expansion by threshold</option>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
54 </param>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
55 <param name="edit_distance_threshold" argument="--edit-distance-threshold" type="integer" value="1" label="Edit distance threshold" help="For the adjacency and cluster methods the threshold for the edit distance to connect two UMIs in the network can be increased. The default value of 1 works best unless the UMI is very long (&gt;14bp)" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
56 <param argument="--paired" type="boolean" truevalue="--paired" falsevalue="" label="BAM is paired end" help="This will also force the use of the template length to determine reads with the same mapping coordinates." />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
57 <param name="spliced_is_unique" argument="--spliced-is-unique" type="boolean" truevalue="--spliced-is-unique" falsevalue="" label="Spliced reads are unique" help="Causes two reads that start in the same position on the same strand and having the same UMI to be considered unique if one is spliced and the other is not. (Uses the 'N' cigar operation to test for splicing)" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
58 <param name="soft_clip_threshold" argument="--soft-clip-threshold" type="integer" value="4" label="Soft clip threshold" help="Mappers that soft clip, will sometimes do so rather than mapping a spliced read if there is only a small overhang over the exon junction. By setting this option, you can treat reads with at least this many bases soft-clipped at the 3' end as spliced." />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
59 <param name="read_length" argument="--read-length" type="boolean" truevalue="--read-length" falsevalue="" label="Use the read length as as a criterion when deduping" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
60 <param name="whole_contig" argument="--whole-contig" type="boolean" truevalue="--whole-contig" falsevalue="" label="Consider all alignments to a single contig together" help="This is useful if you have aligned to a transcriptome multi-fasta" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
61 <param argument="--subset" type="float" min="0.0" max="1.0" value="1.0" label="Only consider a random selection of the reads" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
62 <param argument="--chrom" type="boolean" truevalue="--chrom" falsevalue="" label="Only consider a single chromosome" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
63 <param name="per_contig" argument="--per-contig" type="boolean" truevalue="--per-contig" falsevalue="" label="Deduplicate per contig" help="Field 3 in BAM; RNAME. All reads with the same contig will be considered to have the same alignment position. This is useful if your library prep generates PCR duplicates with non identical alignment positions such as CEL-Seq. In this case, you would align to a reference transcriptome with one transcript per gene" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
64 <param name="per_gene" argument="--per-gene" type="boolean" truevalue="--per-gene" falsevalue="" label="Deduplicate per gene" help="As above except with this option you can align to a reference transcriptome with more than one transcript per gene. You need to also provide a map of genes to transcripts. This will also add a metacontig ('MC') tag to the output BAM file." />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
65 <param name="gene_transcript_map" argument="--gene-transcript-map" type="data" format="tabular" optional="True" label="Tabular file mapping genes to transripts" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
66 <param name="gene_tag" argument="--gene-tag" type="text" optional="True" label="Deduplicate by this gene tag" help="As --per-gene except here the gene information is encoded in the bam read tag specified so you do not need to supply the mapping file." />
11
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
67 <param name="output_stats_bool" type="boolean" truevalue="--output-stats=stats_outputs" falsevalue="" checked="false" label="Output UMI related statistics files?"/>
0
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
68 </inputs>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
69 <outputs>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
70 <data format="bam" name="output" />
11
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
71 <collection name="output_stats" type="list" label="UMI_tools dedup stats">
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
72 <filter>output_stats_bool</filter>
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
73 <data name="edit_distance" format="tabular" from_work_dir="stats_outputs_edit_distance.tsv"/>
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
74 <data name="per_umi" format="tabular" from_work_dir="stats_outputs_per_umi.tsv"/>
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
75 <data name="per_umi_per_position" format="tabular" from_work_dir="stats_outputs_per_umi_per_position.tsv"/>
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
76 </collection>
0
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
77 </outputs>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
78 <tests>
11
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
79 <test expect_num_outputs="1">
0
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
80 <param name="input" value="group_in1.sam" ftype="sam" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
81 <param name="extract_umi_method" value="read_id" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
82 <param name="method" value="unique" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
83 <output name="output" file="dedup_out1.bam" ftype="bam" sort="True"/>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
84 </test>
11
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
85 <test expect_num_outputs="1">
0
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
86 <param name="input" value="group_in2.bam" ftype="bam" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
87 <param name="extract_umi_method" value="read_id" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
88 <param name="paired" value="True" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
89 <param name="method" value="unique" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
90 <output name="output" file="dedup_out2.bam" ftype="bam" sort="True" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
91 </test>
11
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
92 <test expect_num_outputs="1">
0
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
93 <param name="input" value="group_in3.bam" ftype="bam" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
94 <param name="extract_umi_method" value="read_id" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
95 <param name="method" value="unique" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
96 <output name="output" file="dedup_out3.bam" ftype="bam" sort="True" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
97 </test>
11
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
98 <test expect_num_outputs="1">
0
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
99 <param name="input" value="group_in4.bam" ftype="bam" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
100 <param name="extract_umi_method" value="tag" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
101 <param name="umi_tag" value="BX" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
102 <param name="method" value="unique" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
103 <output name="output" file="dedup_out4.bam" ftype="bam" sort="True" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
104 </test>
11
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
105 <test expect_num_outputs="1">
0
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
106 <param name="input" value="group_in5.bam" ftype="bam" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
107 <param name="extract_umi_method" value="read_id" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
108 <param name="umi_tag" value="BX" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
109 <param name="method" value="cluster" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
110 <output name="output" file="dedup_out5.bam" ftype="bam" sort="True" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
111 </test>
11
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
112 <test expect_num_outputs="1">
0
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
113 <param name="input" value="group_in6.bam" ftype="bam" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
114 <param name="extract_umi_method" value="read_id" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
115 <param name="umi_tag" value="BX" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
116 <param name="method" value="directional" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
117 <output name="output" file="dedup_out6.bam" ftype="bam" sort="True" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
118 </test>
11
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
119 <test expect_num_outputs="5">
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
120 <param name="input" value="group_in6.bam" ftype="bam" />
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
121 <param name="extract_umi_method" value="read_id" />
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
122 <param name="umi_tag" value="BX" />
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
123 <param name="method" value="directional" />
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
124 <param name="output_stats_bool" value="true"/>
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
125 <output name="output" file="dedup_out6.bam" ftype="bam" sort="True" />
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
126 <output_collection name="output_stats">
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
127 <element name="edit_distance" file="stats_outputs_edit_distance.tsv" />
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
128 <element name="per_umi" file="stats_outputs_per_umi.tsv" />
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
129 <element name="per_umi_per_position" file="stats_outputs_per_umi_per_position.tsv" />
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
130 </output_collection>
cf4494361a56 "planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents: 10
diff changeset
131 </test>
0
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
132 </tests>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
133 <help><![CDATA[
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
134 umi_tools dedup - Deduplicate reads based on their UMI
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
135 ======================================================
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
136
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
137 Purpose
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
138 -------
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
139
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
140 The purpose of this command is to deduplicate BAM files based on the first
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
141 mapping co-ordinate and the UMI attached to the read. It is assumed that the
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
142 FASTQ files were processed with extract_umi.py before mapping and thus the UMI
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
143 is the last word of the read name. e.g:
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
144
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
145 @HISEQ:87:00000000_AATT
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
146
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
147 where AATT is the UMI sequeuence.
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
148
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
149 If you have used an alternative method which does not separate the
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
150 read id and UMI with a "_", such as bcl2fastq which uses ":", you can
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
151 specify the separator with the option "--umi-separator=<sep>",
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
152 replacing <sep> with e.g ":".
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
153
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
154 Alternatively, if your UMIs are encoded in a tag, you can specify this
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
155 by setting the option --extract-umi-method=tag and set the tag name
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
156 with the --umi-tag option. For example, if your UMIs are encoded in
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
157 the 'UM' tag, provide the following options:
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
158 "--extract-umi-method=tag --umi-tag=UM"
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
159
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
160 The start postion of a read is considered to be the start of its alignment
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
161 minus any soft clipped bases. A read aligned at position 500 with
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
162 cigar 2S98M will be assumed to start at postion 498.
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
163
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
164
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
165 Methods
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
166 -------
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
167
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
168 dedup can be run with multiple methods to identify groups of reads with
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
169 the same (or similar) UMI(s). All methods start by identifying the
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
170 reads with the same mapping position.
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
171
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
172 The simpliest method, "unique", groups reads with the exact same
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
173 UMI. The network-based methods, "cluster", "adjacency" and
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
174 "directional", build networks where nodes are UMIs and edges connect
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
175 UMIs with an edit distance <= threshold (usually 1). The groups of
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
176 reads are then defined from the network in a method-specific manner.
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
177
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
178 "unique"
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
179 Reads group share the exact same UMI
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
180
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
181 "percentile"
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
182 Reads group share the exact same UMI. UMIs with counts < 1% of the
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
183 median counts for UMIs at the same position are ignored.
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
184
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
185 "cluster"
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
186 Identify clusters of connected UMIs (based on hamming distance
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
187 threshold). Each network is a read group
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
188
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
189 "adjacency"
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
190 Cluster UMIs as above. For each cluster, select the node(UMI)
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
191 with the highest counts. Visit all nodes one edge away. If all
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
192 nodes have been visted, stop. Otherise, repeat with remaining
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
193 nodes until all nodes have been visted. Each step
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
194 defines a read group.
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
195
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
196 "directional" (default)
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
197 Identify clusters of connected UMIs (based on hamming distance
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
198 threshold) and umi A counts >= (2* umi B counts) - 1. Each
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
199 network is a read group.
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
200
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
201 Options
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
202 -------
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
203
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
204 --extract-umi-method (choice)
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
205 How are the UMIs encoded in the read?
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
206
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
207 Options are:
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
208
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
209 - "read_id" (default)
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
210 UMIs contained at the end of the read separated as
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
211 specified with --umi-separator option
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
212
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
213 - "tag"
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
214 UMIs contained in a tag, see --umi-tag option
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
215
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
216 --umi-separator (string)
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
217 Separator between read id and UMI. See --extract-umi-method above
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
218
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
219 --umi-tag (string)
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
220 Tag which contains UMI. See --extract-umi-method above
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
221
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
222 --edit-distance-threshold (int)
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
223 For the adjacency and cluster methods the threshold for the
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
224 edit distance to connect two UMIs in the network can be
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
225 increased. The default value of 1 works best unless the UMI is
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
226 very long (>14bp)
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
227
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
228 --paired
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
229 BAM is paired end - output both read pairs. This will also
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
230 force the use of the template length to determine reads with
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
231 the same mapping coordinates.
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
232
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
233 --spliced-is-unique
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
234 Causes two reads that start in the same position on the same
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
235 strand and having the same UMI to be considered unique if one is
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
236 spliced and the other is not. (Uses the 'N' cigar operation to test
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
237 for splicing)
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
238
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
239 --soft-clip-threshold (int)
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
240 Mappers that soft clip, will sometimes do so rather than mapping a
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
241 spliced read if there is only a small overhang over the exon
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
242 junction. By setting this option, you can treat reads with at least
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
243 this many bases soft-clipped at the 3' end as spliced.
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
244
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
245 --multimapping-detection-method (string, choice)
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
246 If the sam/bam contains tags to identify multimapping reads, you can
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
247 specify for use when selecting the best read at a given loci.
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
248 Supported tags are "NH", "X0" and "XT". If not specified, the read
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
249 with the highest mapping quality will be selected
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
250
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
251 --read-length
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
252 Use the read length as as a criteria when deduping, for e.g sRNA-Seq
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
253
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
254 --whole-contig
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
255 Consider all alignments to a single contig together. This is useful if
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
256 you have aligned to a transcriptome multi-fasta
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
257
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
258 --subset (float, [0-1])
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
259 Only consider a fraction of the reads, chosen at random. This is useful
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
260 for doing saturation analyses.
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
261
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
262 --chrom
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
263 Only consider a single chromosome. This is useful for debugging purposes
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
264
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
265 --per-contig (string)
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
266 Deduplicate per contig (field 3 in BAM; RNAME).
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
267 All reads with the same contig will be
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
268 considered to have the same alignment position. This is useful
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
269 if your library prep generates PCR duplicates with non identical
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
270 alignment positions such as CEL-Seq. In this case, you would
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
271 align to a reference transcriptome with one transcript per gene
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
272
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
273 --per-gene (string)
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
274 Deduplicate per gene. As above except with this option you can
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
275 align to a reference transcriptome with more than one transcript
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
276 per gene. You need to also provide --gene-transcript-map option.
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
277 This will also add a metacontig ('MC') tag to the reads if used
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
278 in conjunction with --output-bam
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
279
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
280 --gene-transcript-map (string)
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
281 File mapping genes to transripts (tab separated), e.g:
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
282
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
283 gene1 transcript1
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
284 gene1 transcript2
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
285 gene2 transcript3
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
286
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
287 --gene-tag (string)
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
288 Deduplicate per gene. As per --per-gene except here the gene
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
289 information is encoded in the bam read tag specified so you do
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
290 not need to supply --gene-transcript-map
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
291
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
292 --output-bam (string, filename)
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
293 Output a tagged bam file to stdout or -S <filename>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
294
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
295 -i, --in-sam/-o, --out-sam
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
296 By default, inputs are assumed to be in BAM format and output are output
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
297 in BAM format. Use these options to specify the use of SAM format for
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
298 inputs or outputs.
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
299
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
300 -I (string, filename) input file name
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
301 The input file must be sorted and indexed.
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
302
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
303 -S (string, filename) output file name
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
304
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
305 -L (string, filename) log file name
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
306
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
307 Usage
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
308 -----
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
309 umi_tools dedup -I infile.bam -S grouped.bam --
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
310
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
311 ]]></help>
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
312 <expand macro="citations" />
ec7b02a30ed3 planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff changeset
313 </tool>