comparison mismatch_frequencies.xml @ 12:848d799e6fe8

Uploaded
author mvdbeek
date Tue, 31 Mar 2015 09:43:20 -0400
parents
children 7c5b252bf6af
comparison
equal deleted inserted replaced
11:f69b6df76e0e 12:848d799e6fe8
1 <tool id="mismatch_frequencies" name="Mismatch Frequencies" version="0.0.5" hidden="false" >
2 <description>Analyze mismatch frequencies in BAM/SAM alignments</description>
3 <requirements>
4 <requirement type="package" version="0.8.1">pysam</requirement>
5 <requirement type="package" version="0.14.1">pandas</requirement>
6 <requirement type="package" version="1.4">matplotlib</requirement>
7 </requirements>
8 <command interpreter="python">mismatch_frequencies.py --input
9 #for i in $rep
10 "$i.input_file"
11 #end for
12 --name
13 #for i in $rep
14 "$i.input_file.name"
15 #end for
16 --output_pdf $output_pdf --output_tab $output_tab --min $min_length --max $max_length
17 --n_mm $number_of_mismatches
18 --five_p $five_p
19 --three_p $three_p
20 </command>
21 <inputs>
22 <repeat name="rep" title="alignment files" min="1">
23 <param name="input_file" type="data" format="bam,sam" label="Alignment file" help="The input alignment file(s) for which you want to calculate mismatch frequencies."/>
24 </repeat>
25 <param name="number_of_mismatches" label="Maximum number of allowed mismatches per read" help="Discard reads with more than the chosen number of mismatches from the frequency calculation" type="integer" value="3"/>
26 <param name="min_length" label="Minumum read length to analyse" type="integer" value="21"/>
27 <param name="max_length" label="Maximum read length to analyse" type="integer" value="21"/>
28 <param name="five_p" label="Ignore mismatches in the first N nucleotides of a read" type="integer" value="0"/>
29 <param name="three_p" label="Ignore mismatches in the last N nucleotides of a read" help="useful to discriminate between tailing events and editing events" type="integer" value="3"/>
30 </inputs>
31 <outputs>
32 <data format="tabular" name="output_tab" />
33 <data format="pdf" name="output_pdf" />
34 </outputs>
35 <tests>
36 <test>
37 <param name="rep_0|input_file" value="3mismatches_ago2ip_s2.bam" ftype="bam" />
38 <param name="rep_1|input_file" value="3mismatches_ago2ip_ovary.bam" ftype="bam" />
39 <param name="number_of_mismatches" value="1" />
40 <param name="min_length" value="21" />
41 <param name="max_length" value="21" />
42 <output name="tabular" file="mismatch.tab" ftype="tabular"/>
43 </test>
44 </tests>
45 <help>
46
47 .. class:: infomark
48
49
50 ***What it does***
51
52 This tool reconstitues for each aligned read of an alignment file in SAM/BAM format whether
53 a mismatch is annotated in the MD tag, and if that is the case counts the identity of the
54 mismatch relative to the reference sequence. The output is a PDF document with the calculated
55 frequency for each mismatch that occured relative to the total number of valid reads and a table
56 with the corresponding values. Read length can be limited to a specific read length, and 5 prime and
57 3 prime-most nucleotides of a read can be ignored.
58
59 ----
60
61 .. class:: warningmark
62
63 ***Warning***
64
65 This tool skips all read that have insertions and has been tested only with bowtie and bowtie2
66 generated alignment files.
67
68 Written by Marius van den Beek, m.vandenbeek at gmail . com
69 </help>
70 </tool>