Mercurial > repos > drosofff > msp_blastparser_and_hits
annotate BlastParser_and_hits.py @ 4:22641bb68b91 draft
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author | drosofff |
---|---|
date | Mon, 29 Jun 2015 06:06:11 -0400 |
parents | e0985bad7b92 |
children | 3f7cfa1cf90c |
rev | line source |
---|---|
0
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
1 #!/usr/bin/python |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
2 # blastn blastx parser revised debugged: 3-4-2015. Commit issue. |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
3 # drosofff@gmail.com |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
4 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
5 import sys |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
6 import argparse |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
7 from collections import defaultdict |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
8 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
9 def Parser(): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
10 the_parser = argparse.ArgumentParser() |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
11 the_parser.add_argument('--blast', action="store", type=str, help="Path to the blast output (tabular format, 12 column)") |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
12 the_parser.add_argument('--sequences', action="store", type=str, help="Path to the fasta file with blasted sequences") |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
13 the_parser.add_argument('--fastaOutput', action="store", type=str, help="fasta output file of blast hits") |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
14 the_parser.add_argument('--tabularOutput', action="store", type=str, help="tabular output file of blast analysis") |
2
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
16 the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs") |
4
22641bb68b91
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
2
diff
changeset
|
17 the_parser.add_argument('--filter_relativeCov', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)") |
22641bb68b91
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
2
diff
changeset
|
18 the_parser.add_argument('--filter_maxScore', action="store", type=float, default=0, help="filter out maximum BitScore below the specified float number") |
22641bb68b91
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
2
diff
changeset
|
19 the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out maximum BitScore below the specified float number") |
0
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
20 args = the_parser.parse_args() |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
21 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
22 the_parser.error('argument(s) missing, call the -h option of the script') |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
23 if not args.flanking: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
24 args.flanking = 0 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
25 return args |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
26 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
27 def median(lst): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
28 lst = sorted(lst) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
29 if len(lst) < 1: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
30 return None |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
31 if len(lst) %2 == 1: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
32 return lst[((len(lst)+1)/2)-1] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
33 if len(lst) %2 == 0: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
34 return float(sum(lst[(len(lst)/2)-1:(len(lst)/2)+1]))/2.0 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
35 |
2
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
36 def mean(lst): |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
37 if len(lst) < 1: |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
38 return 0 |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
39 return sum(lst) / float(len(lst)) |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
40 |
0
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
41 def getfasta (fastafile): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
42 fastadic = {} |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
43 for line in open (fastafile): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
44 if line[0] == ">": |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
45 header = line[1:-1] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
46 fastadic[header] = "" |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
47 else: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
48 fastadic[header] += line |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
49 for header in fastadic: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
50 fastadic[header] = "".join(fastadic[header].split("\n")) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
51 return fastadic |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
52 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
53 def insert_newlines(string, every=60): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
54 lines = [] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
55 for i in xrange(0, len(string), every): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
56 lines.append(string[i:i+every]) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
57 return '\n'.join(lines) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
58 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
59 def getblast (blastfile): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
60 '''blastinfo [0] Percentage of identical matches |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
61 blastinfo [1] Alignment length |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
62 blastinfo [2] Number of mismatches |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
63 blastinfo [3] Number of gap openings |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
64 blastinfo [4] Start of alignment in query |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
65 blastinfo [5] End of alignment in query |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
66 blastinfo [6] Start of alignment in subject (database hit) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
67 blastinfo [7] End of alignment in subject (database hit) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
68 blastinfo [8] Expectation value (E-value) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
69 blastinfo [9] Bit score |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
70 blastinfo [10] Subject length (NEED TO BE SPECIFIED WHEN RUNNING BLAST) ''' |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
71 blastdic = defaultdict (dict) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
72 for line in open (blastfile): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
73 fields = line[:-1].split("\t") |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
74 transcript = fields[0] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
75 subject = fields[1] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
76 blastinfo = [float(fields[2]) ] # blastinfo[0] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
77 blastinfo = blastinfo + [int(i) for i in fields[3:10] ] # blastinfo[1:8] insets 1 to 7 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
78 blastinfo.append(fields[10]) # blastinfo[8] E-value remains as a string type |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
79 blastinfo.append(float(fields[11])) # blastinfo[9] Bit score |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
80 blastinfo.append(int(fields[12])) # blastinfo[10] Subject length MUST BE RETRIEVED THROUGH A 13 COLUMN BLAST OUTPUT |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
81 try: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
82 blastdic[subject][transcript].append(blastinfo) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
83 except: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
84 blastdic[subject][transcript] = [ blastinfo ] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
85 return blastdic |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
86 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
87 def getseq (fastadict, transcript, up, down, orientation="direct"): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
88 def reverse (seq): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
89 revdict = {"A":"T","T":"A","G":"C","C":"G","N":"N"} |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
90 revseq = [revdict[i] for i in seq[::-1]] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
91 return "".join(revseq) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
92 pickseq = fastadict[transcript][up-1:down] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
93 if orientation == "direct": |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
94 return pickseq |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
95 else: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
96 return reverse(pickseq) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
97 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
98 def subjectCoverage (fastadict, blastdict, subject, QueriesFlankingNucleotides=0): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
99 SubjectCoverageList = [] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
100 HitDic = {} |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
101 bitScores = [] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
102 for transcript in blastdict[subject]: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
103 prefix = "%s--%s_" % (subject, transcript) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
104 hitNumber = 0 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
105 for hit in blastdict[subject][transcript]: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
106 hitNumber += 1 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
107 suffix = "hit%s_IdMatch=%s,AligLength=%s,E-val=%s" % (hitNumber, hit[0], hit[1], hit[8]) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
108 HitDic[prefix+suffix] = GetHitSequence (fastadict, transcript, hit[4], hit[5], QueriesFlankingNucleotides) #query coverage by a hit is in hit[4:6] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
109 SubjectCoverageList += range (min([hit[6], hit[7]]), max([hit[6], hit[7]]) + 1) # subject coverage by a hit is in hit[6:8] |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
110 bitScores.append(hit[9]) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
111 subjectLength = hit [10] # always the same value for a given subject. Stupid but simple |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
112 TotalSubjectCoverage = len ( set (SubjectCoverageList) ) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
113 RelativeSubjectCoverage = TotalSubjectCoverage/float(subjectLength) |
2
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
114 return HitDic, subjectLength, TotalSubjectCoverage, RelativeSubjectCoverage, max(bitScores), mean(bitScores) |
0
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
115 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
116 def GetHitSequence (fastadict, FastaHeader, leftCoordinate, rightCoordinate, FlankingValue): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
117 if rightCoordinate > leftCoordinate: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
118 polarity = "direct" |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
119 else: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
120 polarity = "reverse" |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
121 leftCoordinate, rightCoordinate = rightCoordinate, leftCoordinate |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
122 if leftCoordinate - FlankingValue > 0: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
123 leftCoordinate -= FlankingValue |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
124 else: |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
125 leftCoordinate = 1 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
126 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) |
2
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
127 |
4
22641bb68b91
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
2
diff
changeset
|
128 def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, mode="verbose"): |
2
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
129 F= open(F, "w") |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
130 Fasta=open(Fasta, "w") |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
131 if mode == "verbose": |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
132 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
133 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): |
4
22641bb68b91
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
2
diff
changeset
|
134 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: |
22641bb68b91
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
2
diff
changeset
|
135 continue |
2
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
136 print >> F, "#\n# %s" % subject |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
137 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
138 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
139 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"]) |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
140 print >> F, "# Maximum Bit Score: %s" % (results[subject]["maxBitScores"]) |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
141 print >> F, "# Mean Bit Score: %s" % (results[subject]["meanBitScores"]) |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
142 for header in results[subject]["HitDic"]: |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
143 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) ) |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
144 print >> Fasta, "" # final carriage return for the sequence |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
145 for transcript in Xblastdict[subject]: |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
146 transcriptSize = float(len(fastadict[transcript])) |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
147 for hit in Xblastdict[subject][transcript]: |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
148 percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov = hit[0], hit[1], hit[6], hit[7], "%.1f" % (abs(hit[5]-hit[4])/transcriptSize*100) |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
149 Eval, BitScore = hit[8], hit[9] |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
150 info = [transcript] + [percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov, Eval, BitScore] |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
151 info = [str(i) for i in info] |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
152 info = "\t".join(info) |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
153 print >> F, info |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
154 else: |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
155 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tMaximum Bit Score\tMean Bit Score" |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
156 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): |
4
22641bb68b91
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
2
diff
changeset
|
157 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: |
22641bb68b91
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
2
diff
changeset
|
158 continue |
2
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
159 line = [] |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
160 line.append(subject) |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
161 line.append(results[subject]["subjectLength"]) |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
162 line.append(results[subject]["TotalCoverage"]) |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
163 line.append(results[subject]["RelativeSubjectCoverage"]) |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
164 line.append(results[subject]["maxBitScores"]) |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
165 line.append(results[subject]["meanBitScores"]) |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
166 line = [str(i) for i in line] |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
167 print >> F, "\t".join(line) |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
168 for header in results[subject]["HitDic"]: |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
169 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) ) |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
170 print >> Fasta, "" # final carriage return for the sequence |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
171 F.close() |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
172 Fasta.close() |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
173 |
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
174 |
0
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
175 |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
176 def __main__ (): |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
177 args = Parser() |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
178 fastadict = getfasta (args.sequences) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
179 Xblastdict = getblast (args.blast) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
180 results = defaultdict(dict) |
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
181 for subject in Xblastdict: |
2
e0985bad7b92
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
0
diff
changeset
|
182 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) |
4
22641bb68b91
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
2
diff
changeset
|
183 outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, |
22641bb68b91
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
2
diff
changeset
|
184 filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, |
22641bb68b91
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
2
diff
changeset
|
185 filter_meanScore=args.filter_meanScore, mode=args.mode) |
0
3959a271cf3f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
186 if __name__=="__main__": __main__() |