Mercurial > repos > iuc > umi_tools_dedup
annotate umi-tools_dedup.xml @ 11:cf4494361a56 draft
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
| author | iuc |
|---|---|
| date | Wed, 10 Feb 2021 19:26:42 +0000 |
| parents | 0ac9b15f11c2 |
| children | 083c516d19a9 |
| rev | line source |
|---|---|
|
11
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
1 <tool id="umi_tools_dedup" name="UMI-tools deduplicate" version="@VERSION@+galaxy1"> |
|
0
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
2 <description>Extract UMI from fastq files</description> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
3 <macros> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
4 <import>macros.xml</import> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
5 </macros> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
6 <expand macro="requirements"> |
|
10
0ac9b15f11c2
"planemo upload commit 6ba769440f8f6a62e9ebfac069a30edc541bac0a"
iuc
parents:
9
diff
changeset
|
7 <requirement type="package" version="1.9">samtools</requirement> |
|
0
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
8 </expand> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
9 <command detect_errors="exit_code"><![CDATA[ |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
10 #if $input.is_of_type("sam"): |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
11 #set $input_file = $input |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
12 #else: |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
13 ln -sf '${input}' 'input.bam' && |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
14 ln -sf '$input.metadata.bam_index' 'input.bam.bai' && |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
15 #set $input_file = 'input.bam' |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
16 #end if |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
17 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
18 umi_tools dedup |
|
11
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
19 '$output_stats_bool' |
|
0
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
20 --random-seed 0 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
21 --extract-umi-method $extract_umi_method |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
22 #if str($extract_umi_method) != 'read_id': |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
23 --umi-separator '$umi_separator' --umi-tag '$umi_tag' |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
24 #end if |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
25 --method $method --edit-distance-threshold $edit_distance_threshold |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
26 $paired $spliced_is_unique --soft-clip-threshold $soft_clip_threshold |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
27 $read_length $whole_contig --subset $subset $per_contig $per_gene |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
28 #if $gene_transcript_map: |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
29 --gene-transcript-map '$gene_transcript_map' |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
30 #end if |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
31 #if len(str($gene_tag)) > 0: |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
32 --gene-tag '$gene_tag' |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
33 #end if |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
34 #if $input.is_of_type("sam"): |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
35 --in-sam |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
36 #end if |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
37 -I '$input_file' -S deduped.bam && |
|
9
a289db9d3bbc
"planemo upload commit 5d3fc4232e0e036ac1ed9e2c36adc41d6af4987f"
iuc
parents:
0
diff
changeset
|
38 samtools sort deduped.bam -@ \${GALAXY_SLOTS:-1} -T "\${TMPDIR:-.}" -o '$output' -O BAM |
|
0
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
39 ]]></command> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
40 <inputs> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
41 <param name="input" type="data" format="sam,bam" label="Reads to deduplicate in SAM or BAM format" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
42 <param name="extract_umi_method" argument="--extract-umi-method" type="select"> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
43 <option value="read_id" selected="True">Read ID</option> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
44 <option value="tag">Tag</option> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
45 </param> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
46 <param name="umi_separator" argument="--umi-separator" type="text" label="Separator between read id and UMI." help="Ignored unless extracting by tag" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
47 <param name="umi_tag" argument="--umi-tag" type="text" label="Tag which contains UMI." /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
48 <param argument="--method" type="select" label="Method used to identify PCR duplicates within reads." help="All methods start by identifying the reads with the same mapping position"> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
49 <option value="unique">Reads group share the exact same UMI</option> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
50 <option value="percentile">Reads group share the exact same UMI. UMIs with counts less than 1% of the median counts for UMIs at the same position are ignored</option> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
51 <option value="cluster">Identify clusters based on hamming distance</option> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
52 <option value="adjacency">Identify clusters based on hamming distance and resolve networks by using the node counts</option> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
53 <option value="directional">Identify clusters based on distance and counts, restrict network expansion by threshold</option> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
54 </param> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
55 <param name="edit_distance_threshold" argument="--edit-distance-threshold" type="integer" value="1" label="Edit distance threshold" help="For the adjacency and cluster methods the threshold for the edit distance to connect two UMIs in the network can be increased. The default value of 1 works best unless the UMI is very long (>14bp)" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
56 <param argument="--paired" type="boolean" truevalue="--paired" falsevalue="" label="BAM is paired end" help="This will also force the use of the template length to determine reads with the same mapping coordinates." /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
57 <param name="spliced_is_unique" argument="--spliced-is-unique" type="boolean" truevalue="--spliced-is-unique" falsevalue="" label="Spliced reads are unique" help="Causes two reads that start in the same position on the same strand and having the same UMI to be considered unique if one is spliced and the other is not. (Uses the 'N' cigar operation to test for splicing)" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
58 <param name="soft_clip_threshold" argument="--soft-clip-threshold" type="integer" value="4" label="Soft clip threshold" help="Mappers that soft clip, will sometimes do so rather than mapping a spliced read if there is only a small overhang over the exon junction. By setting this option, you can treat reads with at least this many bases soft-clipped at the 3' end as spliced." /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
59 <param name="read_length" argument="--read-length" type="boolean" truevalue="--read-length" falsevalue="" label="Use the read length as as a criterion when deduping" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
60 <param name="whole_contig" argument="--whole-contig" type="boolean" truevalue="--whole-contig" falsevalue="" label="Consider all alignments to a single contig together" help="This is useful if you have aligned to a transcriptome multi-fasta" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
61 <param argument="--subset" type="float" min="0.0" max="1.0" value="1.0" label="Only consider a random selection of the reads" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
62 <param argument="--chrom" type="boolean" truevalue="--chrom" falsevalue="" label="Only consider a single chromosome" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
63 <param name="per_contig" argument="--per-contig" type="boolean" truevalue="--per-contig" falsevalue="" label="Deduplicate per contig" help="Field 3 in BAM; RNAME. All reads with the same contig will be considered to have the same alignment position. This is useful if your library prep generates PCR duplicates with non identical alignment positions such as CEL-Seq. In this case, you would align to a reference transcriptome with one transcript per gene" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
64 <param name="per_gene" argument="--per-gene" type="boolean" truevalue="--per-gene" falsevalue="" label="Deduplicate per gene" help="As above except with this option you can align to a reference transcriptome with more than one transcript per gene. You need to also provide a map of genes to transcripts. This will also add a metacontig ('MC') tag to the output BAM file." /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
65 <param name="gene_transcript_map" argument="--gene-transcript-map" type="data" format="tabular" optional="True" label="Tabular file mapping genes to transripts" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
66 <param name="gene_tag" argument="--gene-tag" type="text" optional="True" label="Deduplicate by this gene tag" help="As --per-gene except here the gene information is encoded in the bam read tag specified so you do not need to supply the mapping file." /> |
|
11
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
67 <param name="output_stats_bool" type="boolean" truevalue="--output-stats=stats_outputs" falsevalue="" checked="false" label="Output UMI related statistics files?"/> |
|
0
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
68 </inputs> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
69 <outputs> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
70 <data format="bam" name="output" /> |
|
11
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
71 <collection name="output_stats" type="list" label="UMI_tools dedup stats"> |
|
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
72 <filter>output_stats_bool</filter> |
|
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
73 <data name="edit_distance" format="tabular" from_work_dir="stats_outputs_edit_distance.tsv"/> |
|
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
74 <data name="per_umi" format="tabular" from_work_dir="stats_outputs_per_umi.tsv"/> |
|
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
75 <data name="per_umi_per_position" format="tabular" from_work_dir="stats_outputs_per_umi_per_position.tsv"/> |
|
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
76 </collection> |
|
0
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
77 </outputs> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
78 <tests> |
|
11
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
79 <test expect_num_outputs="1"> |
|
0
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
80 <param name="input" value="group_in1.sam" ftype="sam" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
81 <param name="extract_umi_method" value="read_id" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
82 <param name="method" value="unique" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
83 <output name="output" file="dedup_out1.bam" ftype="bam" sort="True"/> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
84 </test> |
|
11
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
85 <test expect_num_outputs="1"> |
|
0
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
86 <param name="input" value="group_in2.bam" ftype="bam" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
87 <param name="extract_umi_method" value="read_id" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
88 <param name="paired" value="True" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
89 <param name="method" value="unique" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
90 <output name="output" file="dedup_out2.bam" ftype="bam" sort="True" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
91 </test> |
|
11
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
92 <test expect_num_outputs="1"> |
|
0
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
93 <param name="input" value="group_in3.bam" ftype="bam" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
94 <param name="extract_umi_method" value="read_id" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
95 <param name="method" value="unique" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
96 <output name="output" file="dedup_out3.bam" ftype="bam" sort="True" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
97 </test> |
|
11
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
98 <test expect_num_outputs="1"> |
|
0
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
99 <param name="input" value="group_in4.bam" ftype="bam" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
100 <param name="extract_umi_method" value="tag" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
101 <param name="umi_tag" value="BX" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
102 <param name="method" value="unique" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
103 <output name="output" file="dedup_out4.bam" ftype="bam" sort="True" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
104 </test> |
|
11
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
105 <test expect_num_outputs="1"> |
|
0
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
106 <param name="input" value="group_in5.bam" ftype="bam" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
107 <param name="extract_umi_method" value="read_id" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
108 <param name="umi_tag" value="BX" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
109 <param name="method" value="cluster" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
110 <output name="output" file="dedup_out5.bam" ftype="bam" sort="True" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
111 </test> |
|
11
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
112 <test expect_num_outputs="1"> |
|
0
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
113 <param name="input" value="group_in6.bam" ftype="bam" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
114 <param name="extract_umi_method" value="read_id" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
115 <param name="umi_tag" value="BX" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
116 <param name="method" value="directional" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
117 <output name="output" file="dedup_out6.bam" ftype="bam" sort="True" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
118 </test> |
|
11
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
119 <test expect_num_outputs="5"> |
|
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
120 <param name="input" value="group_in6.bam" ftype="bam" /> |
|
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
121 <param name="extract_umi_method" value="read_id" /> |
|
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
122 <param name="umi_tag" value="BX" /> |
|
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
123 <param name="method" value="directional" /> |
|
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
124 <param name="output_stats_bool" value="true"/> |
|
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
125 <output name="output" file="dedup_out6.bam" ftype="bam" sort="True" /> |
|
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
126 <output_collection name="output_stats"> |
|
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
127 <element name="edit_distance" file="stats_outputs_edit_distance.tsv" /> |
|
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
128 <element name="per_umi" file="stats_outputs_per_umi.tsv" /> |
|
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
129 <element name="per_umi_per_position" file="stats_outputs_per_umi_per_position.tsv" /> |
|
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
130 </output_collection> |
|
cf4494361a56
"planemo upload commit 2da1197aac6a18df9252e5da096645d2ecaece88"
iuc
parents:
10
diff
changeset
|
131 </test> |
|
0
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
132 </tests> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
133 <help><![CDATA[ |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
134 umi_tools dedup - Deduplicate reads based on their UMI |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
135 ====================================================== |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
136 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
137 Purpose |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
138 ------- |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
139 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
140 The purpose of this command is to deduplicate BAM files based on the first |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
141 mapping co-ordinate and the UMI attached to the read. It is assumed that the |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
142 FASTQ files were processed with extract_umi.py before mapping and thus the UMI |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
143 is the last word of the read name. e.g: |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
144 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
145 @HISEQ:87:00000000_AATT |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
146 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
147 where AATT is the UMI sequeuence. |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
148 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
149 If you have used an alternative method which does not separate the |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
150 read id and UMI with a "_", such as bcl2fastq which uses ":", you can |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
151 specify the separator with the option "--umi-separator=<sep>", |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
152 replacing <sep> with e.g ":". |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
153 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
154 Alternatively, if your UMIs are encoded in a tag, you can specify this |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
155 by setting the option --extract-umi-method=tag and set the tag name |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
156 with the --umi-tag option. For example, if your UMIs are encoded in |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
157 the 'UM' tag, provide the following options: |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
158 "--extract-umi-method=tag --umi-tag=UM" |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
159 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
160 The start postion of a read is considered to be the start of its alignment |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
161 minus any soft clipped bases. A read aligned at position 500 with |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
162 cigar 2S98M will be assumed to start at postion 498. |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
163 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
164 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
165 Methods |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
166 ------- |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
167 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
168 dedup can be run with multiple methods to identify groups of reads with |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
169 the same (or similar) UMI(s). All methods start by identifying the |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
170 reads with the same mapping position. |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
171 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
172 The simpliest method, "unique", groups reads with the exact same |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
173 UMI. The network-based methods, "cluster", "adjacency" and |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
174 "directional", build networks where nodes are UMIs and edges connect |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
175 UMIs with an edit distance <= threshold (usually 1). The groups of |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
176 reads are then defined from the network in a method-specific manner. |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
177 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
178 "unique" |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
179 Reads group share the exact same UMI |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
180 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
181 "percentile" |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
182 Reads group share the exact same UMI. UMIs with counts < 1% of the |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
183 median counts for UMIs at the same position are ignored. |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
184 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
185 "cluster" |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
186 Identify clusters of connected UMIs (based on hamming distance |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
187 threshold). Each network is a read group |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
188 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
189 "adjacency" |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
190 Cluster UMIs as above. For each cluster, select the node(UMI) |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
191 with the highest counts. Visit all nodes one edge away. If all |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
192 nodes have been visted, stop. Otherise, repeat with remaining |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
193 nodes until all nodes have been visted. Each step |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
194 defines a read group. |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
195 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
196 "directional" (default) |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
197 Identify clusters of connected UMIs (based on hamming distance |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
198 threshold) and umi A counts >= (2* umi B counts) - 1. Each |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
199 network is a read group. |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
200 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
201 Options |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
202 ------- |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
203 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
204 --extract-umi-method (choice) |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
205 How are the UMIs encoded in the read? |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
206 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
207 Options are: |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
208 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
209 - "read_id" (default) |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
210 UMIs contained at the end of the read separated as |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
211 specified with --umi-separator option |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
212 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
213 - "tag" |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
214 UMIs contained in a tag, see --umi-tag option |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
215 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
216 --umi-separator (string) |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
217 Separator between read id and UMI. See --extract-umi-method above |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
218 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
219 --umi-tag (string) |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
220 Tag which contains UMI. See --extract-umi-method above |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
221 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
222 --edit-distance-threshold (int) |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
223 For the adjacency and cluster methods the threshold for the |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
224 edit distance to connect two UMIs in the network can be |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
225 increased. The default value of 1 works best unless the UMI is |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
226 very long (>14bp) |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
227 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
228 --paired |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
229 BAM is paired end - output both read pairs. This will also |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
230 force the use of the template length to determine reads with |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
231 the same mapping coordinates. |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
232 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
233 --spliced-is-unique |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
234 Causes two reads that start in the same position on the same |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
235 strand and having the same UMI to be considered unique if one is |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
236 spliced and the other is not. (Uses the 'N' cigar operation to test |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
237 for splicing) |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
238 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
239 --soft-clip-threshold (int) |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
240 Mappers that soft clip, will sometimes do so rather than mapping a |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
241 spliced read if there is only a small overhang over the exon |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
242 junction. By setting this option, you can treat reads with at least |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
243 this many bases soft-clipped at the 3' end as spliced. |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
244 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
245 --multimapping-detection-method (string, choice) |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
246 If the sam/bam contains tags to identify multimapping reads, you can |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
247 specify for use when selecting the best read at a given loci. |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
248 Supported tags are "NH", "X0" and "XT". If not specified, the read |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
249 with the highest mapping quality will be selected |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
250 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
251 --read-length |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
252 Use the read length as as a criteria when deduping, for e.g sRNA-Seq |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
253 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
254 --whole-contig |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
255 Consider all alignments to a single contig together. This is useful if |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
256 you have aligned to a transcriptome multi-fasta |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
257 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
258 --subset (float, [0-1]) |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
259 Only consider a fraction of the reads, chosen at random. This is useful |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
260 for doing saturation analyses. |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
261 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
262 --chrom |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
263 Only consider a single chromosome. This is useful for debugging purposes |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
264 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
265 --per-contig (string) |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
266 Deduplicate per contig (field 3 in BAM; RNAME). |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
267 All reads with the same contig will be |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
268 considered to have the same alignment position. This is useful |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
269 if your library prep generates PCR duplicates with non identical |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
270 alignment positions such as CEL-Seq. In this case, you would |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
271 align to a reference transcriptome with one transcript per gene |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
272 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
273 --per-gene (string) |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
274 Deduplicate per gene. As above except with this option you can |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
275 align to a reference transcriptome with more than one transcript |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
276 per gene. You need to also provide --gene-transcript-map option. |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
277 This will also add a metacontig ('MC') tag to the reads if used |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
278 in conjunction with --output-bam |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
279 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
280 --gene-transcript-map (string) |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
281 File mapping genes to transripts (tab separated), e.g: |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
282 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
283 gene1 transcript1 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
284 gene1 transcript2 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
285 gene2 transcript3 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
286 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
287 --gene-tag (string) |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
288 Deduplicate per gene. As per --per-gene except here the gene |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
289 information is encoded in the bam read tag specified so you do |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
290 not need to supply --gene-transcript-map |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
291 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
292 --output-bam (string, filename) |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
293 Output a tagged bam file to stdout or -S <filename> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
294 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
295 -i, --in-sam/-o, --out-sam |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
296 By default, inputs are assumed to be in BAM format and output are output |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
297 in BAM format. Use these options to specify the use of SAM format for |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
298 inputs or outputs. |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
299 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
300 -I (string, filename) input file name |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
301 The input file must be sorted and indexed. |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
302 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
303 -S (string, filename) output file name |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
304 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
305 -L (string, filename) log file name |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
306 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
307 Usage |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
308 ----- |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
309 umi_tools dedup -I infile.bam -S grouped.bam -- |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
310 |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
311 ]]></help> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
312 <expand macro="citations" /> |
|
ec7b02a30ed3
planemo upload commit eea727c3bdfe36d9d16036d5ab79fb8b27c4e82e
iuc
parents:
diff
changeset
|
313 </tool> |
