Mercurial > repos > drosofff > msp_sr_signature
annotate signature.py @ 13:7b1f4bc21749 draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit a0ce81ec2c12e7f635b1a1056772ec306c4fb364
author | drosofff |
---|---|
date | Wed, 07 Jun 2017 18:14:58 -0400 |
parents | b1a15b5a3f1b |
children |
rev | line source |
---|---|
0 | 1 #!/usr/bin/python |
2 # script for computing overlap signatures from a bowtie output | |
3 # Christophe Antoniewski <drosofff@gmail.com> | |
4 # Usage signature.py <1:input> <2:format of input> <3:minsize query> <4:maxsize query> <5:minsize target> <6:maxsize target> | |
11
b1a15b5a3f1b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit 062e78aef14c4655d1b32d5f29ca543a50389b08
drosofff
parents:
2
diff
changeset
|
5 # <7:minscope> <8:maxscope> <9:output> <10:bowtie index> <11:procedure option> <12: graph (global or lattice)> |
b1a15b5a3f1b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit 062e78aef14c4655d1b32d5f29ca543a50389b08
drosofff
parents:
2
diff
changeset
|
6 # <13: R code> |
0 | 7 # version 2.0.0 |
8 | |
2 | 9 import subprocess |
10 import argparse | |
11
b1a15b5a3f1b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_signature commit 062e78aef14c4655d1b32d5f29ca543a50389b08
drosofff
parents:
2
diff
changeset
|
11 from smRtools import HandleSmRNAwindows |
2 | 12 |
0 | 13 |
14 def Parser(): | |
2 | 15 the_parser = argparse.ArgumentParser() |
16 the_parser.add_argument( | |
17 '--input', action="store", type=str, help="input alignment file") | |
18 the_parser.add_argument('--inputFormat', action="store", type=str, choices=[ | |
19 "tabular", "bam", "sam"], help="format of alignment file (tabular/bam/sam)") | |
20 the_parser.add_argument( | |
21 '--minquery', type=int, help="Minimum readsize of query reads (nt) - must be an integer") | |
22 the_parser.add_argument( | |
23 '--maxquery', type=int, help="Maximum readsize of query reads (nt) - must be an integer") | |
24 the_parser.add_argument( | |
25 '--mintarget', type=int, help="Minimum readsize of target reads (nt) - must be an integer") | |
26 the_parser.add_argument( | |
27 '--maxtarget', type=int, help="Maximum readsize of target reads (nt) - must be an integer") | |
28 the_parser.add_argument( | |
29 '--minscope', type=int, help="Minimum overlap analyzed (nt) - must be an integer") | |
30 the_parser.add_argument( | |
31 '--maxscope', type=int, help="Maximum overlap analyzed (nt) - must be an integer") | |
32 the_parser.add_argument( | |
33 '--outputOverlapDataframe', action="store", type=str, help="Overlap dataframe") | |
34 the_parser.add_argument('--referenceGenome', action='store', | |
35 help="path to the bowtie-indexed or fasta reference") | |
36 the_parser.add_argument('--extract_index', action='store_true', | |
37 help="specify if the reference is an indexed Bowtie reference") | |
38 the_parser.add_argument('--graph', action='store', choices=[ | |
39 "global", "lattice"], help="small RNA signature is computed either globally or by item (global-lattice)") | |
40 the_parser.add_argument( | |
41 '--rcode', type=str, help="R code to be passed to the python script") | |
42 args = the_parser.parse_args() | |
43 return args | |
0 | 44 |
45 args = Parser() | |
46 | |
47 if args.extract_index: | |
2 | 48 GenomeFormat = "bowtieIndex" |
0 | 49 else: |
2 | 50 GenomeFormat = "fastaSource" |
0 | 51 |
52 if args.inputFormat == "tabular": | |
2 | 53 Genome = HandleSmRNAwindows( |
54 args.input, args.inputFormat, args.referenceGenome, GenomeFormat) | |
0 | 55 elif args.inputFormat == "sam": |
2 | 56 Genome = HandleSmRNAwindows( |
57 args.input, args.inputFormat, args.referenceGenome, GenomeFormat) | |
0 | 58 else: |
2 | 59 Genome = HandleSmRNAwindows( |
60 args.input, args.inputFormat, args.referenceGenome, GenomeFormat) | |
0 | 61 |
62 # replace objDic by Genome.instanceDict or... objDic = Genome.instanceDict | |
63 objDic = Genome.instanceDict | |
64 | |
65 args.maxscope += 1 | |
66 | |
2 | 67 general_frequency_table = dict( |
68 [(i, 0) for i in range(args.minscope, args.maxscope)]) | |
69 general_percent_table = dict( | |
70 [(i, 0) for i in range(args.minscope, args.maxscope)]) | |
71 OUT = open(args.outputOverlapDataframe, "w") | |
0 | 72 |
73 if args.graph == "global": | |
2 | 74 # for normalized summing of local_percent_table(s) |
75 readcount_dic = {} | |
76 Total_read_in_objDic = 0 | |
77 for item in objDic: | |
78 readcount_dic[item] = objDic[item].readcount( | |
79 args.minquery, args.maxquery) | |
80 Total_read_in_objDic += readcount_dic[item] | |
81 ###### | |
82 for x in (objDic): | |
83 local_frequency_table = objDic[x].signature( | |
84 args.minquery, args.maxquery, args.mintarget, args.maxtarget, range(args.minscope, args.maxscope)) | |
85 local_percent_table = objDic[x].hannon_signature( | |
86 args.minquery, args.maxquery, args.mintarget, args.maxtarget, range(args.minscope, args.maxscope)) | |
87 try: | |
88 for overlap in local_frequency_table.keys(): | |
89 general_frequency_table[overlap] = general_frequency_table.get( | |
90 overlap, 0) + local_frequency_table[overlap] | |
91 except: | |
92 pass | |
93 try: | |
94 for overlap in local_percent_table.keys(): | |
95 general_percent_table[overlap] = general_percent_table.get( | |
96 overlap, 0) + (1. / Total_read_in_objDic * readcount_dic[x] * local_percent_table[overlap]) | |
97 except: | |
98 pass | |
99 print >> OUT, "overlap\tnum of pairs\tprobability" | |
100 for classe in sorted(general_frequency_table): | |
101 print >> OUT, "%i\t%i\t%f" % ( | |
102 classe, general_frequency_table[classe], general_percent_table[classe]) | |
0 | 103 |
104 else: | |
2 | 105 print >> OUT, "overlap\tnum of pairs\tprobability\titem" |
106 for x in (objDic): | |
107 local_frequency_table = objDic[x].signature( | |
108 args.minquery, args.maxquery, args.mintarget, args.maxtarget, range(args.minscope, args.maxscope)) | |
109 local_percent_table = objDic[x].hannon_signature( | |
110 args.minquery, args.maxquery, args.mintarget, args.maxtarget, range(args.minscope, args.maxscope)) | |
111 for classe in range(args.minscope, args.maxscope): | |
112 print >> OUT, "%i\t%i\t%f\t%s" % ( | |
113 classe, local_frequency_table[classe], local_percent_table[classe], x) | |
0 | 114 |
115 OUT.close() | |
116 | |
2 | 117 # Run the R script that is defined in the xml using the Rscript binary |
118 # provided with R. | |
119 R_command = "Rscript " + args.rcode | |
0 | 120 process = subprocess.Popen(R_command.split()) |