Mercurial > repos > drosofff > msp_blastparser_and_hits
annotate BlastParser_and_hits.py @ 0:3959a271cf3f draft
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author | drosofff |
---|---|
date | Tue, 09 Jun 2015 04:15:34 -0400 |
parents | |
children | e0985bad7b92 |
rev | line source |
---|---|
0
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
1 #!/usr/bin/python |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
2 # blastn blastx parser revised debugged: 3-4-2015. Commit issue. |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
3 # drosofff@gmail.com |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
4 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
5 import sys |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
6 import argparse |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
7 from collections import defaultdict |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
8 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
9 def Parser(): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
10 the_parser = argparse.ArgumentParser() |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
11 the_parser.add_argument('--blast', action="store", type=str, help="Path to the blast output (tabular format, 12 column)") |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
12 the_parser.add_argument('--sequences', action="store", type=str, help="Path to the fasta file with blasted sequences") |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
13 the_parser.add_argument('--fastaOutput', action="store", type=str, help="fasta output file of blast hits") |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
14 the_parser.add_argument('--tabularOutput', action="store", type=str, help="tabular output file of blast analysis") |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
16 args = the_parser.parse_args() |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
17 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
18 the_parser.error('argument(s) missing, call the -h option of the script') |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
19 if not args.flanking: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
20 args.flanking = 0 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
21 return args |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
22 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
23 def median(lst): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
24 lst = sorted(lst) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
25 if len(lst) < 1: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
26 return None |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
27 if len(lst) %2 == 1: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
28 return lst[((len(lst)+1)/2)-1] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
29 if len(lst) %2 == 0: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
30 return float(sum(lst[(len(lst)/2)-1:(len(lst)/2)+1]))/2.0 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
31 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
32 def getfasta (fastafile): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
33 fastadic = {} |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
34 for line in open (fastafile): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
35 if line[0] == ">": |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
36 header = line[1:-1] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
37 fastadic[header] = "" |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
38 else: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
39 fastadic[header] += line |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
40 for header in fastadic: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
41 fastadic[header] = "".join(fastadic[header].split("\n")) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
42 return fastadic |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
43 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
44 def insert_newlines(string, every=60): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
45 lines = [] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
46 for i in xrange(0, len(string), every): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
47 lines.append(string[i:i+every]) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
48 return '\n'.join(lines) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
49 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
50 def getblast (blastfile): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
51 '''blastinfo [0] Percentage of identical matches |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
52 blastinfo [1] Alignment length |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
53 blastinfo [2] Number of mismatches |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
54 blastinfo [3] Number of gap openings |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
55 blastinfo [4] Start of alignment in query |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
56 blastinfo [5] End of alignment in query |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
57 blastinfo [6] Start of alignment in subject (database hit) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
58 blastinfo [7] End of alignment in subject (database hit) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
59 blastinfo [8] Expectation value (E-value) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
60 blastinfo [9] Bit score |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
61 blastinfo [10] Subject length (NEED TO BE SPECIFIED WHEN RUNNING BLAST) ''' |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
62 blastdic = defaultdict (dict) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
63 for line in open (blastfile): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
64 fields = line[:-1].split("\t") |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
65 transcript = fields[0] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
66 subject = fields[1] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
67 blastinfo = [float(fields[2]) ] # blastinfo[0] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
68 blastinfo = blastinfo + [int(i) for i in fields[3:10] ] # blastinfo[1:8] insets 1 to 7 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
69 blastinfo.append(fields[10]) # blastinfo[8] E-value remains as a string type |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
70 blastinfo.append(float(fields[11])) # blastinfo[9] Bit score |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
71 blastinfo.append(int(fields[12])) # blastinfo[10] Subject length MUST BE RETRIEVED THROUGH A 13 COLUMN BLAST OUTPUT |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
72 try: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
73 blastdic[subject][transcript].append(blastinfo) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
74 except: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
75 blastdic[subject][transcript] = [ blastinfo ] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
76 return blastdic |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
77 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
78 def getseq (fastadict, transcript, up, down, orientation="direct"): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
79 def reverse (seq): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
80 revdict = {"A":"T","T":"A","G":"C","C":"G","N":"N"} |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
81 revseq = [revdict[i] for i in seq[::-1]] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
82 return "".join(revseq) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
83 pickseq = fastadict[transcript][up-1:down] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
84 if orientation == "direct": |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
85 return pickseq |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
86 else: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
87 return reverse(pickseq) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
88 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
89 def subjectCoverage (fastadict, blastdict, subject, QueriesFlankingNucleotides=0): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
90 SubjectCoverageList = [] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
91 HitDic = {} |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
92 bitScores = [] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
93 for transcript in blastdict[subject]: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
94 prefix = "%s--%s_" % (subject, transcript) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
95 hitNumber = 0 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
96 for hit in blastdict[subject][transcript]: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
97 hitNumber += 1 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
98 suffix = "hit%s_IdMatch=%s,AligLength=%s,E-val=%s" % (hitNumber, hit[0], hit[1], hit[8]) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
99 HitDic[prefix+suffix] = GetHitSequence (fastadict, transcript, hit[4], hit[5], QueriesFlankingNucleotides) #query coverage by a hit is in hit[4:6] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
100 SubjectCoverageList += range (min([hit[6], hit[7]]), max([hit[6], hit[7]]) + 1) # subject coverage by a hit is in hit[6:8] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
101 bitScores.append(hit[9]) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
102 subjectLength = hit [10] # always the same value for a given subject. Stupid but simple |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
103 TotalSubjectCoverage = len ( set (SubjectCoverageList) ) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
104 RelativeSubjectCoverage = TotalSubjectCoverage/float(subjectLength) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
105 return HitDic, subjectLength, TotalSubjectCoverage, RelativeSubjectCoverage, max(bitScores), median(bitScores) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
106 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
107 def GetHitSequence (fastadict, FastaHeader, leftCoordinate, rightCoordinate, FlankingValue): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
108 if rightCoordinate > leftCoordinate: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
109 polarity = "direct" |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
110 else: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
111 polarity = "reverse" |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
112 leftCoordinate, rightCoordinate = rightCoordinate, leftCoordinate |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
113 if leftCoordinate - FlankingValue > 0: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
114 leftCoordinate -= FlankingValue |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
115 else: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
116 leftCoordinate = 1 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
117 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
118 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
119 def __main__ (): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
120 args = Parser() |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
121 fastadict = getfasta (args.sequences) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
122 Xblastdict = getblast (args.blast) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
123 results = defaultdict(dict) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
124 F = open(args.tabularOutput, "w") |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
125 Fasta = open(args.fastaOutput, "w") |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
126 for subject in Xblastdict: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
127 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["medianBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
128 ## data output |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
129 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
130 for subject in sorted (results, key=lambda x: results[x]["TotalCoverage"], reverse=True): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
131 print >> F, "#\n# %s" % subject |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
132 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
133 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
134 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"]) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
135 print >> F, "# Maximum Bit Score: %s" % (results[subject]["maxBitScores"]) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
136 print >> F, "# Median Bit Score: %s" % (results[subject]["medianBitScores"]) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
137 for header in results[subject]["HitDic"]: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
138 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) ) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
139 for transcript in Xblastdict[subject]: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
140 transcriptSize = float(len(fastadict[transcript])) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
141 for hit in Xblastdict[subject][transcript]: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
142 percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov = hit[0], hit[1], hit[6], hit[7], "%.1f" % (abs(hit[5]-hit[4])/transcriptSize*100) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
143 Eval, BitScore = hit[8], hit[9] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
144 info = [transcript] + [percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov, Eval, BitScore] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
145 info = [str(i) for i in info] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
146 info = "\t".join(info) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
147 print >> F, info |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
148 print >> Fasta, "" |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
149 F.close() |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
150 Fasta.close() |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
151 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
152 if __name__=="__main__": __main__() |