Mercurial > repos > drosofff > pirna_signatures
comparison piRNAsignature.py @ 2:8d4ca527888b draft
Uploaded
author | drosofff |
---|---|
date | Mon, 23 Jun 2014 04:10:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:d1b99ef50f79 | 2:8d4ca527888b |
---|---|
1 #!/usr/bin/python | |
2 # script for computing overlap signatures from a bowtie output | |
3 # Christophe Antoniewski <drosofff@gmail.com> | |
4 # Usage piRNAsignature.py <1:input> <2:format of input> <3:minsize query> <4:maxsize query> <5:minsize target> <6:maxsize target> | |
5 # <7:minscope> <8:maxscope> <9:output> <10:bowtie index> <11:procedure option> <12: graph (global or lattice)> | |
6 # <13: R code> | |
7 | |
8 import sys, subprocess | |
9 from smRtools import * | |
10 from collections import defaultdict # test whether it is required | |
11 | |
12 if sys.argv[11] == "--extract_index": | |
13 if sys.argv[2] == "tabular": | |
14 Genome = HandleSmRNAwindows (sys.argv[1],"tabular",sys.argv[10],"bowtieIndex") | |
15 elif sys.argv[2] == "sam": | |
16 Genome = HandleSmRNAwindows (sys.argv[1],"sam",sys.argv[10],"bowtieIndex") | |
17 else: | |
18 Genome = HandleSmRNAwindows (sys.argv[1],"bam",sys.argv[10],"bowtieIndex") | |
19 else: | |
20 if sys.argv[2] == "tabular": | |
21 Genome = HandleSmRNAwindows (sys.argv[1],"tabular",sys.argv[10],"fastaSource") | |
22 elif sys.argv[2] == "sam": | |
23 Genome = HandleSmRNAwindows (sys.argv[1],"sam",sys.argv[10],"fastaSource") | |
24 else: | |
25 Genome = HandleSmRNAwindows (sys.argv[1],"bam",sys.argv[10],"fastaSource") | |
26 # this decisional tree may be simplified if sam and bam inputs are treated the same way by pysam | |
27 | |
28 # replace objDic by Genome.instanceDict or... objDic = Genome.instanceDict | |
29 objDic = Genome.instanceDict | |
30 | |
31 minquery = int(sys.argv[3]) | |
32 maxquery = int(sys.argv[4]) | |
33 mintarget = int(sys.argv[5]) | |
34 maxtarget = int(sys.argv[6]) | |
35 minscope = int(sys.argv[7]) | |
36 maxscope = int(sys.argv[8]) + 1 | |
37 general_frequency_table = dict ([(i,0) for i in range(minscope,maxscope)]) | |
38 general_percent_table = dict ([(i,0) for i in range(minscope,maxscope)]) | |
39 OUT = open (sys.argv[9], "w") | |
40 | |
41 if sys.argv[12] == "global": | |
42 ###### for normalized summing of local_percent_table(s) | |
43 readcount_dic = {} | |
44 Total_read_in_objDic = 0 | |
45 for item in objDic: | |
46 readcount_dic[item] = objDic[item].readcount(minquery, maxquery) | |
47 Total_read_in_objDic += readcount_dic[item] | |
48 ###### | |
49 for x in (objDic): | |
50 local_frequency_table = objDic[x].signature( minquery, maxquery, mintarget, maxtarget, range(minscope,maxscope) ) | |
51 local_percent_table = objDic[x].hannon_signature( minquery, maxquery, mintarget, maxtarget, range(minscope,maxscope) ) | |
52 try: | |
53 for overlap in local_frequency_table.keys(): | |
54 general_frequency_table[overlap] = general_frequency_table.get(overlap, 0) + local_frequency_table[overlap] | |
55 except: | |
56 pass | |
57 try: | |
58 for overlap in local_percent_table.keys(): | |
59 general_percent_table[overlap] = general_percent_table.get(overlap, 0) + (1./Total_read_in_objDic*readcount_dic[x]*local_percent_table[overlap]) | |
60 except: | |
61 pass | |
62 print >> OUT, "overlap\tnum of pairs\tprobability" | |
63 for classe in sorted(general_frequency_table): | |
64 print >> OUT, "%i\t%i\t%f" % (classe, general_frequency_table[classe], general_percent_table[classe]) | |
65 | |
66 else: | |
67 print >> OUT, "overlap\tnum of pairs\tprobability\titem" | |
68 for x in (objDic): | |
69 local_frequency_table = objDic[x].signature( minquery, maxquery, mintarget, maxtarget, range(minscope,maxscope) ) | |
70 local_percent_table = objDic[x].hannon_signature( minquery, maxquery, mintarget, maxtarget, range(minscope,maxscope) ) | |
71 for classe in range(minscope,maxscope): | |
72 print >> OUT, "%i\t%i\t%f\t%s" % (classe, local_frequency_table[classe], local_percent_table[classe], x) | |
73 | |
74 OUT.close() | |
75 | |
76 ## Run the R script that is defined in the xml using the Rscript binary provided with R. | |
77 R_command="Rscript "+ sys.argv[13] | |
78 process = subprocess.Popen(R_command.split()) |