annotate MirParser.py @ 1:101fec3cba04 draft

planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author drosofff
date Thu, 13 Aug 2015 06:16:29 -0400
parents 035df35a257e
children 74394e39ad22
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
1 #!/usr/bin/env python
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
2 # python parser module for pre-mir and mature miRNAs, guided by mirbase.org GFF3
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
3 # version 0.0.9 (1-6-2014)
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
4 # Usage MirParser.py <1:index source> <2:extraction directive> <3:output pre-mir> <4: output mature miRs> <5:mirbase GFF3>
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
5 # <6:pathToLatticeDataframe or "dummy_dataframe_path"> <7:Rcode or "dummy_plotCode"> <8:latticePDF or "dummy_latticePDF">
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
6 # <9:10:11 filePath:FileExt:FileLabel> <.. ad lib>
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
7
1
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
8 import sys
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
9 import subprocess
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
10
0
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
11 from smRtools import *
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
12
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
13 IndexSource = sys.argv[1]
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
14 ExtractionDirective = sys.argv[2]
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
15 if ExtractionDirective == "--do_not_extract_index":
1
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
16 genomeRefFormat = "fastaSource"
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
17 elif ExtractionDirective == "--extract_index":
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
18 genomeRefFormat = "bowtieIndex"
0
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
19 OutputPre_mirs = sys.argv[3]
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
20 OutputMature_Mirs = sys.argv[4]
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
21 GFF3_file = sys.argv[5]
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
22 lattice = sys.argv[6]
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
23 Rcode = sys.argv[7]
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
24 latticePDF = sys.argv[8]
1
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
25 Triplets = [sys.argv[9:][i:i + 3] for i in xrange(0, len(sys.argv[9:]), 3)]
0
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
26 MasterListOfGenomes = {}
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
27
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
28 for [filePath, FileExt, FileLabel] in Triplets:
1
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
29 print FileLabel
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
30 MasterListOfGenomes[FileLabel] = HandleSmRNAwindows(alignmentFile=filePath,
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
31 alignmentFileFormat=FileExt,
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
32 genomeRefFile=IndexSource,
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
33 genomeRefFormat=genomeRefFormat,
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
34 biosample=FileLabel)
0
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
35
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
36 header = ["gene"]
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
37 for [filePath, FileExt, FileLabel] in Triplets:
1
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
38 header.append(FileLabel)
0
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
39
1
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
40 hit_table = ["\t".join(header)] # table header: gene, sample1, sample2, sample3, etc. separated by tabulation
0
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
41
1
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
42 # read GFF3 to subinstantiate
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
43 gff3 = open(GFF3_file, "r")
0
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
44 lattice_dataframe = []
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
45 for line in gff3:
1
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
46 if line[0] == "#":
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
47 continue
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
48 gff_fields = line[:-1].split("\t")
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
49 chrom = gff_fields[0]
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
50 gff_name = gff_fields[-1].split("Name=")[-1].split(";")[0] # to isolate the GFF Name
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
51 item_upstream_coordinate = int(gff_fields[3])
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
52 item_downstream_coordinate = int(gff_fields[4])
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
53 if gff_fields[6] == "+":
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
54 item_polarity = "forward"
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
55 else:
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
56 item_polarity = "reverse"
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
57 item_line = [gff_name]
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
58 for sample in header[1:]:
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
59 count = MasterListOfGenomes[sample].instanceDict[chrom].readcount(upstream_coord=item_upstream_coordinate,
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
60 downstream_coord=item_downstream_coordinate,
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
61 polarity=item_polarity)
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
62 item_line.append(str(count))
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
63 # subtreatement for lattice
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
64 if lattice != "dummy_dataframe_path":
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
65 if ("5p" not in gff_name) and ("3p" not in gff_name):
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
66 lattice_dataframe.append(MasterListOfGenomes[sample].instanceDict[chrom].readcoverage(
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
67 upstream_coord=item_upstream_coordinate,
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
68 downstream_coord=item_downstream_coordinate,
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
69 windowName=gff_name + "_" + sample))
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
70 # end of subtreatement for lattice
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
71 hit_table.append("\t".join(item_line))
0
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
72 gff3.close()
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
73
1
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
74 Fpremirs = open(OutputPre_mirs, "w")
0
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
75 print >> Fpremirs, hit_table[0]
1
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
76 finalPreList = [i for i in sorted(hit_table[1:]) if ("5p" not in i) and ("3p" not in i)]
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
77 print >> Fpremirs, "\n".join(finalPreList)
0
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
78 Fpremirs.close()
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
79
1
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
80 Fmaturemires = open(OutputMature_Mirs, "w")
0
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
81 print >> Fmaturemires, hit_table[0]
1
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
82 finalMatureList = [i for i in sorted(hit_table[1:]) if ("5p" in i) or ("3p" in i)]
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
83 print >> Fmaturemires, "\n".join(finalMatureList)
0
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
84 Fmaturemires.close()
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
85
035df35a257e planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff changeset
86 if lattice != "dummy_dataframe_path":
1
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
87 Flattice = open(lattice, "w")
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
88 print >> Flattice, "%s\t%s\t%s\t%s\t%s\t%s\t%s" % ("sample",
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
89 "mir",
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
90 "offset",
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
91 "offsetNorm",
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
92 "counts",
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
93 "countsNorm",
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
94 "polarity")
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
95 print >> Flattice, "\n".join(lattice_dataframe)
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
96 Flattice.close()
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
97 R_command = "Rscript " + Rcode
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
98 process = subprocess.Popen(R_command.split())
101fec3cba04 planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents: 0
diff changeset
99 process.wait()