Mercurial > repos > drosofff > mississipi_gcc2
view FeaturesParser.py @ 0:de6a6afc5a79 draft default tip
Uploaded
| author | drosofff |
|---|---|
| date | Tue, 24 Jun 2014 12:16:43 -0400 |
| parents | |
| children |
line wrap: on
line source
#!/usr/bin/python # python parser module to analyse Features in sRbowtie alignments (guided by a GFF3 file) # version 0.9 # Usage FeaturesParser.py <1:index source> <2:extraction directive> <3:output> <4:GFF3 guide file> <5:6:7 filePath:FileExt:FileLabel> <.. ad lib> import sys from smRtools import * from collections import * IndexSource = sys.argv[1] ExtractionDirective = sys.argv[2] if ExtractionDirective == "--do_not_extract_index": genomeRefFormat = "fastaSource" elif ExtractionDirective == "--extract_index": genomeRefFormat = "bowtieIndex" Output = sys.argv[3] GFF3_file = sys.argv[4] Triplets = [sys.argv[5:][i:i+3] for i in xrange(0, len(sys.argv[5:]), 3)] MasterListOfGenomes = {} FeatureDict = defaultdict(dict) for [filePath, FileExt, FileLabel] in Triplets: MasterListOfGenomes[FileLabel] = HandleSmRNAwindows (filePath, FileExt, IndexSource, genomeRefFormat) FeatureDict[FileLabel] = MasterListOfGenomes[FileLabel].CountFeatures(GFF3=GFF3_file) # add some code to pick up the GFF3 features in their order of appearence. F = open(GFF3_file, "r") featureList = [] for line in F: if line[0] == "#": continue feature = line.split()[2] if feature not in featureList: featureList.append(feature) F.close() header = ["#Feature"] for [filePath, FileExt, FileLabel] in Triplets: header.append(FileLabel) F = open (sys.argv[3], "w") print >> F, "\t".join(header) for feature in featureList: line=[feature] for sample in header[1:]: count = str (FeatureDict[sample][feature]) # uncomment to get percentage in addition to counts # percent = float(FeatureDict[sample][feature]) / MasterListOfGenomes[sample].alignedReads # value = "%s | %0.2f" % (count, percent) # line.append(value) line.append(count) print >> F, "\t".join(line ) line = ["Unfeatured"] for sample in header[1:]: matched = 0 for feature in FeatureDict[sample]: matched += FeatureDict[sample][feature] unmatched = MasterListOfGenomes[sample].alignedReads - matched # uncomment to get percentage in addition to counts # percent = float (unmatched) / (matched + unmatched) # value = "%s | %0.2f" % (unmatched, percent) # line.append(value) line.append("%s" % unmatched) print >> F, "\t".join(line) F.close()
