Mercurial > repos > drosofff > msp_sr_readmap_and_size_histograms
comparison readmap.py @ 24:bf7388df53cf draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 3effd45f45c37a6cdaf9b7b1da1ed4d10d3b0e38
author | drosofff |
---|---|
date | Sat, 08 Oct 2016 07:18:45 -0400 |
parents | d6b93af0da55 |
children |
comparison
equal
deleted
inserted
replaced
23:d6b93af0da55 | 24:bf7388df53cf |
---|---|
50 print fileLabel[i] | 50 print fileLabel[i] |
51 MasterListOfGenomes[fileLabel[i]] = HandleSmRNAwindows (alignmentFile=filePath, alignmentFileFormat=fileExt[i], genomeRefFile=genomeRefFile, genomeRefFormat=genomeRefFormat,\ | 51 MasterListOfGenomes[fileLabel[i]] = HandleSmRNAwindows (alignmentFile=filePath, alignmentFileFormat=fileExt[i], genomeRefFile=genomeRefFile, genomeRefFormat=genomeRefFormat,\ |
52 biosample=fileLabel[i], size_inf=minquery, size_sup=maxquery, norm=norm) | 52 biosample=fileLabel[i], size_inf=minquery, size_sup=maxquery, norm=norm) |
53 return MasterListOfGenomes | 53 return MasterListOfGenomes |
54 | 54 |
55 def dataframe_sanityzer (listofdatalines): | 55 def remove_null_entries(listofdatalines): |
56 Dict = defaultdict(float) | 56 """ |
57 This function removes genes that have no reads aligned. | |
58 """ | |
59 Dict = defaultdict(float) | |
57 for line in listofdatalines: | 60 for line in listofdatalines: |
58 fields= line.split("\t") | 61 fields= line.split("\t") |
59 Dict[fields[0]] += float (fields[2]) | 62 Dict[fields[0]] += abs(float(fields[2])) |
60 filtered_list = [] | 63 filtered_list = [] |
61 for line in listofdatalines: | 64 for line in listofdatalines: |
62 fields= line.split("\t") | 65 fields= line.split("\t") |
63 if Dict[fields[0]] != 0: | 66 if Dict[fields[0]] != 0: |
64 filtered_list.append(line) | 67 filtered_list.append(line) |
65 return filtered_list | 68 return filtered_list |
66 | 69 |
67 | 70 |
68 def listify_plottable_item(item): | 71 def listify_plottable_item(item): |
69 """ | 72 """ |
106 dict=readDict[sample].instanceDict | 109 dict=readDict[sample].instanceDict |
107 for gene in dict.keys(): | 110 for gene in dict.keys(): |
108 plottable = dict[gene].readplot() | 111 plottable = dict[gene].readplot() |
109 plottable = handle_start_stop_coordinates(plottable, readDict) | 112 plottable = handle_start_stop_coordinates(plottable, readDict) |
110 for line in plottable: | 113 for line in plottable: |
111 #print >>readmap, "%s\t%s" % (line, sample) | |
112 listoflines.append ("%s\t%s" % (line, sample)) | 114 listoflines.append ("%s\t%s" % (line, sample)) |
113 listoflines = dataframe_sanityzer(listoflines) | 115 listoflines = remove_null_entries(listoflines) |
114 for line in listoflines: | 116 for line in listoflines: |
115 print >>readmap, line | 117 print >>readmap, line |
116 | 118 |
117 def write_size_distribution_dataframe(readDict, size_distribution_file): | 119 def write_size_distribution_dataframe(readDict, size_distribution_file): |
118 listoflines = [] | 120 listoflines = [] |
122 if args.gff: | 124 if args.gff: |
123 dict=readDict[sample] | 125 dict=readDict[sample] |
124 else: | 126 else: |
125 dict=readDict[sample].instanceDict | 127 dict=readDict[sample].instanceDict |
126 for gene in dict.keys(): | 128 for gene in dict.keys(): |
127 histogram = dict[gene].size_histogram(minquery=args.minquery, maxquery=args.maxquery) | 129 histogram = dict[gene].size_histogram(minquery=minquery, maxquery=maxquery) |
128 for polarity in histogram.keys(): | 130 for polarity in histogram.keys(): |
129 if polarity=='both': | 131 if polarity=='both': |
130 continue | 132 continue |
131 #for size in xrange(args.minquery, args.maxquery): | |
132 # if not size in histogram[polarity].keys(): | |
133 # histogram[size]=0 | |
134 for size, count in histogram[polarity].iteritems(): | 133 for size, count in histogram[polarity].iteritems(): |
135 #print >>size_distrib, "%s\t%s\t%s\t%s\t%s" % (gene, size, count, polarity, sample) # test, changed the order accordingly | |
136 listoflines.append ("%s\t%s\t%s\t%s\t%s" % (gene, size, count, polarity, sample) ) | 134 listoflines.append ("%s\t%s\t%s\t%s\t%s" % (gene, size, count, polarity, sample) ) |
137 listoflines = dataframe_sanityzer(listoflines) | 135 listoflines = remove_null_entries(listoflines) |
138 for line in listoflines: | 136 for line in listoflines: |
139 print >>size_distrib, line | 137 print >>size_distrib, line |
140 | 138 |
141 def gff_item_subinstances(readDict, gff3): | 139 def gff_item_subinstances(readDict, gff3): |
142 GFFinstanceDict=OrderedDict() | 140 GFFinstanceDict=OrderedDict() |
143 for sample in readDict.keys(): | 141 for sample in readDict.keys(): |
144 GFFinstanceDict[sample]={} # to implement the 2nd level of directionary in an OrderedDict Class object (would not be required with defaultdict Class) | 142 GFFinstanceDict[sample]={} # to implement the 2nd level of directionary in an OrderedDict Class object (would not be required with defaultdict Class) |