Mercurial > repos > drosofff > mississipi_gcc2
comparison FeaturesParser.py @ 0:de6a6afc5a79 draft default tip
Uploaded
| author | drosofff |
|---|---|
| date | Tue, 24 Jun 2014 12:16:43 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:de6a6afc5a79 |
|---|---|
| 1 #!/usr/bin/python | |
| 2 # python parser module to analyse Features in sRbowtie alignments (guided by a GFF3 file) | |
| 3 # version 0.9 | |
| 4 # Usage FeaturesParser.py <1:index source> <2:extraction directive> <3:output> <4:GFF3 guide file> <5:6:7 filePath:FileExt:FileLabel> <.. ad lib> | |
| 5 | |
| 6 import sys | |
| 7 from smRtools import * | |
| 8 from collections import * | |
| 9 | |
| 10 IndexSource = sys.argv[1] | |
| 11 ExtractionDirective = sys.argv[2] | |
| 12 if ExtractionDirective == "--do_not_extract_index": | |
| 13 genomeRefFormat = "fastaSource" | |
| 14 elif ExtractionDirective == "--extract_index": | |
| 15 genomeRefFormat = "bowtieIndex" | |
| 16 Output = sys.argv[3] | |
| 17 GFF3_file = sys.argv[4] | |
| 18 Triplets = [sys.argv[5:][i:i+3] for i in xrange(0, len(sys.argv[5:]), 3)] | |
| 19 MasterListOfGenomes = {} | |
| 20 FeatureDict = defaultdict(dict) | |
| 21 | |
| 22 for [filePath, FileExt, FileLabel] in Triplets: | |
| 23 MasterListOfGenomes[FileLabel] = HandleSmRNAwindows (filePath, FileExt, IndexSource, genomeRefFormat) | |
| 24 FeatureDict[FileLabel] = MasterListOfGenomes[FileLabel].CountFeatures(GFF3=GFF3_file) | |
| 25 | |
| 26 # add some code to pick up the GFF3 features in their order of appearence. | |
| 27 F = open(GFF3_file, "r") | |
| 28 featureList = [] | |
| 29 for line in F: | |
| 30 if line[0] == "#": continue | |
| 31 feature = line.split()[2] | |
| 32 if feature not in featureList: | |
| 33 featureList.append(feature) | |
| 34 F.close() | |
| 35 | |
| 36 header = ["#Feature"] | |
| 37 for [filePath, FileExt, FileLabel] in Triplets: | |
| 38 header.append(FileLabel) | |
| 39 | |
| 40 F = open (sys.argv[3], "w") | |
| 41 print >> F, "\t".join(header) | |
| 42 for feature in featureList: | |
| 43 line=[feature] | |
| 44 for sample in header[1:]: | |
| 45 count = str (FeatureDict[sample][feature]) | |
| 46 # uncomment to get percentage in addition to counts | |
| 47 # percent = float(FeatureDict[sample][feature]) / MasterListOfGenomes[sample].alignedReads | |
| 48 # value = "%s | %0.2f" % (count, percent) | |
| 49 # line.append(value) | |
| 50 line.append(count) | |
| 51 print >> F, "\t".join(line ) | |
| 52 line = ["Unfeatured"] | |
| 53 for sample in header[1:]: | |
| 54 matched = 0 | |
| 55 for feature in FeatureDict[sample]: | |
| 56 matched += FeatureDict[sample][feature] | |
| 57 unmatched = MasterListOfGenomes[sample].alignedReads - matched | |
| 58 # uncomment to get percentage in addition to counts | |
| 59 # percent = float (unmatched) / (matched + unmatched) | |
| 60 # value = "%s | %0.2f" % (unmatched, percent) | |
| 61 # line.append(value) | |
| 62 line.append("%s" % unmatched) | |
| 63 print >> F, "\t".join(line) | |
| 64 F.close() |
