comparison readmap.py @ 24:bf7388df53cf draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 3effd45f45c37a6cdaf9b7b1da1ed4d10d3b0e38
author drosofff
date Sat, 08 Oct 2016 07:18:45 -0400
parents d6b93af0da55
children
comparison
equal deleted inserted replaced
23:d6b93af0da55 24:bf7388df53cf
50 print fileLabel[i] 50 print fileLabel[i]
51 MasterListOfGenomes[fileLabel[i]] = HandleSmRNAwindows (alignmentFile=filePath, alignmentFileFormat=fileExt[i], genomeRefFile=genomeRefFile, genomeRefFormat=genomeRefFormat,\ 51 MasterListOfGenomes[fileLabel[i]] = HandleSmRNAwindows (alignmentFile=filePath, alignmentFileFormat=fileExt[i], genomeRefFile=genomeRefFile, genomeRefFormat=genomeRefFormat,\
52 biosample=fileLabel[i], size_inf=minquery, size_sup=maxquery, norm=norm) 52 biosample=fileLabel[i], size_inf=minquery, size_sup=maxquery, norm=norm)
53 return MasterListOfGenomes 53 return MasterListOfGenomes
54 54
55 def dataframe_sanityzer (listofdatalines): 55 def remove_null_entries(listofdatalines):
56 Dict = defaultdict(float) 56 """
57 This function removes genes that have no reads aligned.
58 """
59 Dict = defaultdict(float)
57 for line in listofdatalines: 60 for line in listofdatalines:
58 fields= line.split("\t") 61 fields= line.split("\t")
59 Dict[fields[0]] += float (fields[2]) 62 Dict[fields[0]] += abs(float(fields[2]))
60 filtered_list = [] 63 filtered_list = []
61 for line in listofdatalines: 64 for line in listofdatalines:
62 fields= line.split("\t") 65 fields= line.split("\t")
63 if Dict[fields[0]] != 0: 66 if Dict[fields[0]] != 0:
64 filtered_list.append(line) 67 filtered_list.append(line)
65 return filtered_list 68 return filtered_list
66 69
67 70
68 def listify_plottable_item(item): 71 def listify_plottable_item(item):
69 """ 72 """
106 dict=readDict[sample].instanceDict 109 dict=readDict[sample].instanceDict
107 for gene in dict.keys(): 110 for gene in dict.keys():
108 plottable = dict[gene].readplot() 111 plottable = dict[gene].readplot()
109 plottable = handle_start_stop_coordinates(plottable, readDict) 112 plottable = handle_start_stop_coordinates(plottable, readDict)
110 for line in plottable: 113 for line in plottable:
111 #print >>readmap, "%s\t%s" % (line, sample)
112 listoflines.append ("%s\t%s" % (line, sample)) 114 listoflines.append ("%s\t%s" % (line, sample))
113 listoflines = dataframe_sanityzer(listoflines) 115 listoflines = remove_null_entries(listoflines)
114 for line in listoflines: 116 for line in listoflines:
115 print >>readmap, line 117 print >>readmap, line
116 118
117 def write_size_distribution_dataframe(readDict, size_distribution_file): 119 def write_size_distribution_dataframe(readDict, size_distribution_file):
118 listoflines = [] 120 listoflines = []
122 if args.gff: 124 if args.gff:
123 dict=readDict[sample] 125 dict=readDict[sample]
124 else: 126 else:
125 dict=readDict[sample].instanceDict 127 dict=readDict[sample].instanceDict
126 for gene in dict.keys(): 128 for gene in dict.keys():
127 histogram = dict[gene].size_histogram(minquery=args.minquery, maxquery=args.maxquery) 129 histogram = dict[gene].size_histogram(minquery=minquery, maxquery=maxquery)
128 for polarity in histogram.keys(): 130 for polarity in histogram.keys():
129 if polarity=='both': 131 if polarity=='both':
130 continue 132 continue
131 #for size in xrange(args.minquery, args.maxquery):
132 # if not size in histogram[polarity].keys():
133 # histogram[size]=0
134 for size, count in histogram[polarity].iteritems(): 133 for size, count in histogram[polarity].iteritems():
135 #print >>size_distrib, "%s\t%s\t%s\t%s\t%s" % (gene, size, count, polarity, sample) # test, changed the order accordingly
136 listoflines.append ("%s\t%s\t%s\t%s\t%s" % (gene, size, count, polarity, sample) ) 134 listoflines.append ("%s\t%s\t%s\t%s\t%s" % (gene, size, count, polarity, sample) )
137 listoflines = dataframe_sanityzer(listoflines) 135 listoflines = remove_null_entries(listoflines)
138 for line in listoflines: 136 for line in listoflines:
139 print >>size_distrib, line 137 print >>size_distrib, line
140 138
141 def gff_item_subinstances(readDict, gff3): 139 def gff_item_subinstances(readDict, gff3):
142 GFFinstanceDict=OrderedDict() 140 GFFinstanceDict=OrderedDict()
143 for sample in readDict.keys(): 141 for sample in readDict.keys():
144 GFFinstanceDict[sample]={} # to implement the 2nd level of directionary in an OrderedDict Class object (would not be required with defaultdict Class) 142 GFFinstanceDict[sample]={} # to implement the 2nd level of directionary in an OrderedDict Class object (would not be required with defaultdict Class)