mir_parser: MirParser.py comparison

comparison MirParser.py @ 1:101fec3cba04 draft

planemo upload for repository https://bitbucket.org/drosofff/gedtools/

author	drosofff
date	Thu, 13 Aug 2015 06:16:29 -0400
parents	035df35a257e
children	74394e39ad22

comparison

equal deleted inserted replaced

-:035df35a257e
+:101fec3cba04
 # version 0.0.9 (1-6-2014)
 # Usage MirParser.py  <1:index source> <2:extraction directive> <3:output pre-mir> <4: output mature miRs> <5:mirbase GFF3>
 #                     <6:pathToLatticeDataframe or "dummy_dataframe_path"> <7:Rcode or "dummy_plotCode"> <8:latticePDF or "dummy_latticePDF">
 #                     <9:10:11 filePath:FileExt:FileLabel> <.. ad  lib>
-import sys, subprocess
+import sys
+import subprocess
 from smRtools import *
 IndexSource = sys.argv[1]
 ExtractionDirective = sys.argv[2]
 if ExtractionDirective == "--do_not_extract_index":
 genomeRefFormat = "fastaSource"
-elif  ExtractionDirective == "--extract_index":
+elif ExtractionDirective == "--extract_index":
 genomeRefFormat = "bowtieIndex"
 OutputPre_mirs = sys.argv[3]
 OutputMature_Mirs = sys.argv[4]
 GFF3_file = sys.argv[5]
 lattice = sys.argv[6]
 Rcode = sys.argv[7]
 latticePDF = sys.argv[8]
-Triplets = [sys.argv[9:][i:i+3] for i in xrange(0, len(sys.argv[9:]), 3)]
+Triplets = [sys.argv[9:][i:i + 3] for i in xrange(0, len(sys.argv[9:]), 3)]
 MasterListOfGenomes = {}
 for [filePath, FileExt, FileLabel] in Triplets:
 print FileLabel
-MasterListOfGenomes[FileLabel] = HandleSmRNAwindows (alignmentFile=filePath, alignmentFileFormat=FileExt, genomeRefFile=IndexSource, genomeRefFormat=genomeRefFormat, biosample=FileLabel)
+MasterListOfGenomes[FileLabel] = HandleSmRNAwindows(alignmentFile=filePath,
+alignmentFileFormat=FileExt,
+genomeRefFile=IndexSource,
+genomeRefFormat=genomeRefFormat,
+biosample=FileLabel)
 header = ["gene"]
 for [filePath, FileExt, FileLabel] in Triplets:
 header.append(FileLabel)
-hit_table = ["\t".join(header)] # table header: gene, sample1, sample2, sample3, etc. separated by tabulation
+hit_table = ["\t".join(header)]  # table header: gene, sample1, sample2, sample3, etc. separated by tabulation
-## read GFF3 to subinstantiate
+# read GFF3 to subinstantiate
-gff3 = open (GFF3_file, "r")
+gff3 = open(GFF3_file, "r")
 lattice_dataframe = []
 for line in gff3:
-if line[0] == "#": continue
+if line[0] == "#":
-gff_fields = line[:-1].split("\t")
+continue
-chrom = gff_fields[0]
+gff_fields = line[:-1].split("\t")
-gff_name = gff_fields[-1].split("Name=")[-1].split(";")[0] # to isolate the GFF Name
+chrom = gff_fields[0]
-item_upstream_coordinate = int(gff_fields[3])
+gff_name = gff_fields[-1].split("Name=")[-1].split(";")[0]  # to isolate the GFF Name
-item_downstream_coordinate = int(gff_fields[4])
+item_upstream_coordinate = int(gff_fields[3])
-if gff_fields[6] == "+":
+item_downstream_coordinate = int(gff_fields[4])
-item_polarity = "forward"
+if gff_fields[6] == "+":
-else:
+item_polarity = "forward"
-item_polarity = "reverse"
+else:
-item_line = [gff_name]
+item_polarity = "reverse"
-for sample in header[1:]:
+item_line = [gff_name]
-count = MasterListOfGenomes[sample].instanceDict[chrom].readcount(upstream_coord=item_upstream_coordinate, downstream_coord=item_downstream_coordinate, polarity=item_polarity)
+for sample in header[1:]:
-item_line.append(str(count))
+count = MasterListOfGenomes[sample].instanceDict[chrom].readcount(upstream_coord=item_upstream_coordinate,
-## subtreatement for lattice
+downstream_coord=item_downstream_coordinate,
-if lattice != "dummy_dataframe_path":
+polarity=item_polarity)
-if ("5p" not in gff_name) and  ("3p" not in gff_name):
+item_line.append(str(count))
-lattice_dataframe.append(MasterListOfGenomes[sample].instanceDict[chrom].readcoverage(upstream_coord=item_upstream_coordinate, downstream_coord=item_downstream_coordinate, windowName=gff_name+"_"+sample) )
+# subtreatement for lattice
-## end of subtreatement for lattice
+if lattice != "dummy_dataframe_path":
-hit_table.append("\t".join(item_line) )
+if ("5p" not in gff_name) and ("3p" not in gff_name):
+lattice_dataframe.append(MasterListOfGenomes[sample].instanceDict[chrom].readcoverage(
+upstream_coord=item_upstream_coordinate,
+downstream_coord=item_downstream_coordinate,
+windowName=gff_name + "_" + sample))
+# end of subtreatement for lattice
+hit_table.append("\t".join(item_line))
 gff3.close()
-Fpremirs = open (OutputPre_mirs, "w")
+Fpremirs = open(OutputPre_mirs, "w")
 print >> Fpremirs, hit_table[0]
-finalPreList = [ i for i in sorted(hit_table[1:]) if ("5p" not in i) and  ("3p" not in i)]
+finalPreList = [i for i in sorted(hit_table[1:]) if ("5p" not in i) and ("3p" not in i)]
-print >> Fpremirs, "\n".join(finalPreList )
+print >> Fpremirs, "\n".join(finalPreList)
 Fpremirs.close()
-Fmaturemires = open (OutputMature_Mirs, "w")
+Fmaturemires = open(OutputMature_Mirs, "w")
 print >> Fmaturemires, hit_table[0]
-finalMatureList = [ i for i in sorted(hit_table[1:]) if ("5p" in i) or ("3p" in i)]
+finalMatureList = [i for i in sorted(hit_table[1:]) if ("5p" in i) or ("3p" in i)]
-print >> Fmaturemires, "\n".join(finalMatureList )
+print >> Fmaturemires, "\n".join(finalMatureList)
 Fmaturemires.close()
 if lattice != "dummy_dataframe_path":
 Flattice = open(lattice, "w")
-print >> Flattice, "%s\t%s\t%s\t%s\t%s\t%s\t%s" % ("sample", "mir", "offset", "offsetNorm", "counts","countsNorm",  "polarity")
+print >> Flattice, "%s\t%s\t%s\t%s\t%s\t%s\t%s" % ("sample",
-print >> Flattice, "\n".join(lattice_dataframe)
+"mir",
-Flattice.close()
+"offset",
-R_command="Rscript "+ Rcode
+"offsetNorm",
-process = subprocess.Popen(R_command.split())
+"counts",
-process.wait()
+"countsNorm",
+"polarity")
+print >> Flattice, "\n".join(lattice_dataframe)
+Flattice.close()
+R_command = "Rscript " + Rcode
+process = subprocess.Popen(R_command.split())
+process.wait()

Mercurial > repos > drosofff > mir_parser

comparison MirParser.py @ 1:101fec3cba04 draft