pep_pointer: pep_pointer.py comparison

comparison pep_pointer.py @ 3:a26f551d819b draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff

author	galaxyp
date	Fri, 06 Apr 2018 18:12:50 -0400
parents	aac535d694d4
children	3a3aff93ee9e

comparison

equal deleted inserted replaced

-:ac3494caea96
+:a26f551d819b
 #
 # Author: Praveen Kumar
-# Updated: Nov 8th, 2017
+# Updated: April 6th, 2018
 #
 #
 #
 import re
 end = a[4].strip()
 elif int(a[4].strip()) < int(a[3].strip()):
 start = a[4].strip()
 end = a[3].strip()
 else:
-print "Something fishy in start end coordinates"
+print "Please check the start end coordinates in the GTF file"
 else:
-print "Something fishy in reading"
+print "Please check the strand information in the GTF file. It should be '+' or '-'."
 if not gtf.has_key(strand):
 gtf[strand] = {}
 if not gtf[strand].has_key(type):
 gtf[strand][type] = []
 b = re.search("gene_id \"(.+?)\";", a[8].strip())
 if strand == "+":
 st = "positive"
 elif strand == "-":
 st = "negative"
 else:
-print "Something fishy in writing . . ."
+print "Please check the strand information in the GTF file. It should be '+' or '-'."
 for type in gtf[strand].keys():
 data = gtf[strand][type]
 c.executemany('INSERT INTO gtf_data VALUES (?,?,?,?,?,?,?)', data)
 # infh = open("Mouse_Data_All_peptides_withNewDBs.txt", "r")
 data = infh.readlines()
 # output file
 outfh = open(inputFile[3], 'w')
 # outfh = open("classified_1_Mouse_Data_All_peptides_withNewDBs.txt", "w")
 for each in data:
-a = each.split("\t")
+a = each.strip().split("\t")
 chr = a[0].strip()
-pep_start = a[1].strip()
+pep_start = str(int(a[1].strip())+1)
 pep_end = a[2].strip()
 strand = a[5].strip()
-c.execute("select * from gtf_data where type = 'CDS' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
+each = "\t".join(a[:6])
-rows = c.fetchall()
+if (len(a) == 12 and int(a[9]) == 1) or (len(a) == 6):
-if len(rows) > 0:
+c.execute("select * from gtf_data where type = 'CDS' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
-outfh.write(each.strip() + "\tCDS\n")
-else:
-c.execute("select * from gtf_data where type = 'five_prime_utr' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
 rows = c.fetchall()
 if len(rows) > 0:
-outfh.write(each.strip() + "\tfive_prime_utr\n")
+outfh.write(each.strip() + "\tCDS\n")
 else:
-c.execute("select * from gtf_data where type = 'three_prime_utr' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
+c.execute("select * from gtf_data where type = 'five_prime_utr' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
 rows = c.fetchall()
 if len(rows) > 0:
-outfh.write(each.strip() + "\tthree_prime_utr\n")
+outfh.write(each.strip() + "\tfive_prime_utr\n")
 else:
-c.execute("select * from gtf_data where type = 'exon' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
+c.execute("select * from gtf_data where type = 'three_prime_utr' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
 rows = c.fetchall()
 if len(rows) > 0:
-outfh.write(each.strip() + "\texon\n")
+outfh.write(each.strip() + "\tthree_prime_utr\n")
 else:
-c.execute("select * from gtf_data where type = 'intron' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
+c.execute("select * from gtf_data where type = 'exon' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
 rows = c.fetchall()
 if len(rows) > 0:
-outfh.write(each.strip() + "\tintron\n")
+outfh.write(each.strip() + "\texon\n")
 else:
-c.execute("select * from gtf_data where type = 'gene' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
+c.execute("select * from gtf_data where type = 'intron' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
 rows = c.fetchall()
 if len(rows) > 0:
-outfh.write(each.strip() + "\tgene\n")
+outfh.write(each.strip() + "\tintron\n")
 else:
-c.execute("select * from gtf_data where type = 'intergenic' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
+c.execute("select * from gtf_data where type = 'gene' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
 rows = c.fetchall()
 if len(rows) > 0:
-outfh.write(each.strip() + "\tintergene\n")
+outfh.write(each.strip() + "\tgene\n")
 else:
-outfh.write(each.strip() + "\tOVERLAPPING_ON_TWO_REGIONS: PLEASE_LOOK_MANUALLY (Will be updated in next version)\n")
+c.execute("select * from gtf_data where type = 'intergenic' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
+rows = c.fetchall()
+if len(rows) > 0:
+outfh.write(each.strip() + "\tintergene\n")
+else:
+outfh.write(each.strip() + "\tOVERLAPPING_ON_TWO_REGIONS: PLEASE_LOOK_MANUALLY (Will be updated in next version)\n")
+elif (len(a) == 12 and int(a[9]) == 2):
+outfh.write(each.strip() + "\tSpliceJunction\n")
+else:
+outfh.write(each.strip() + "\tPlease check\n")
 conn.close()
 outfh.close()
 else:
 print "USAGE: python pep_pointer.py <input GTF file> <input tblastn file> <name of output file>"

Mercurial > repos > galaxyp > pep_pointer

comparison pep_pointer.py @ 3:a26f551d819b draft