Mercurial > repos > mvdbeek > dapars
changeset 5:a5d8b08af089 draft
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
author | mvdbeek |
---|---|
date | Thu, 29 Oct 2015 15:51:10 -0400 |
parents | 73b932244237 |
children | 1cc6c9e327fb |
files | dapars.py dapars.xml test-data/100_3L.gtf test-data/breakpoint.bed test-data/c1.bam test-data/c2.bam test-data/c3.bam test-data/dapars.tab test-data/example.gtf test-data/t1.bam test-data/t2.bam test-data/t3.bam tool_dependencies.xml |
diffstat | 13 files changed, 291 insertions(+), 68 deletions(-) [+] |
line wrap: on
line diff
--- a/dapars.py Wed Oct 28 06:22:18 2015 -0400 +++ b/dapars.py Thu Oct 29 15:51:10 2015 -0400 @@ -2,19 +2,27 @@ import os import csv import numpy as np +from scipy import stats from collections import OrderedDict, namedtuple import filter_utr import subprocess from multiprocessing import Pool import warnings +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +from tabulate import tabulate +def directory_path(str): + if os.path.exists(str): + return str + else: + os.mkdir(str) + return str def parse_args(): """ Returns floating point values except for input files. My initial approach will not filter anything. (FDR. fold_change, PDUI, Num_least ...) - :param argv: - :return: """ parser = argparse.ArgumentParser(prog='DaPars', description='Determines the usage of proximal polyA usage') parser.add_argument("-c", "--control_alignments", nargs="+", required=True, @@ -33,7 +41,11 @@ help="minimum coverage in each aligment to be considered for determining breakpoints") parser.add_argument("-b", "--breakpoint_bed", required=False, type=argparse.FileType('w'), help="Write bedfile with coordinates of breakpoint positions to supplied path.") - parser.add_argument("-v", "--version", action='version', version='%(prog)s 0.1.5') + parser.add_argument("-v", "--version", action='version', version='%(prog)s 0.2.0') + parser.add_argument("-p", "--plot_path", default=None, required=False, type=directory_path, + help="If plot_path is specified will write a coverage plot for every UTR in that directory.") + parser.add_argument("-html", "--html_file", default=None, required=False, type=argparse.FileType('w'), + help="Write an html file to the specified location. Only to be used within a galaxy wrapper") return parser.parse_args() @@ -48,6 +60,8 @@ self.n_cpus = args.cpu self.search_start = args.search_start self.coverage_threshold = args.coverage_threshold + self.plot_path = args.plot_path + self.html_file = args.html_file self.utr = args.utr_bed_file self.gtf_fields = filter_utr.get_gtf_fields() self.result_file = args.output_file @@ -67,7 +81,8 @@ if args.breakpoint_bed: self.bed_output = args.breakpoint_bed self.write_bed() - + if self.plot_path: + self.write_html() def dump_utr_dict_to_bedfile(self): w = csv.writer(open("tmp_bedfile.bed", "w"), delimiter="\t") @@ -110,6 +125,10 @@ return coverage_dict def get_utr_dict(self, shift): + """ + The utr end is extended by UTR length * shift, to discover novel distal polyA sites. + Set to 0 to disable. + """ utr_dict = OrderedDict() for line in self.utr: if not line.startswith("#"): @@ -139,11 +158,11 @@ utr_coverage.append(np.sum(vector)) coverage_per_alignment.append(utr_coverage) coverages = np.array([ sum(x) for x in zip(*coverage_per_alignment) ]) - coverage_weights = coverages / np.mean(coverages) # TODO: proabably median is better suited? + coverage_weights = coverages / np.mean(coverages) # TODO: proabably median is better suited? Or even no normalization! return coverage_weights def get_result_tuple(self): - static_desc = ["chr", "start", "end", "strand", "gene", "breakpoint", + static_desc = ["chr", "start", "end", "strand", "gene", "t_stat", "p_value", "breakpoint", "breakpoint_type", "control_mean_percent", "treatment_mean_percent" ] samples_desc = [] for statistic in ["coverage_long", "coverage_short", "percent_long"]: @@ -162,18 +181,22 @@ "num_treatment":len(self.treatment_alignments), "result_d":result_d} pool = Pool(self.n_cpus) - tasks = [ (self.utr_coverages[utr], utr, utr_d, self.result_tuple._fields, self.coverage_weights, self.num_samples, - len(self.control_alignments), len(self.treatment_alignments), self.search_start, - self.coverage_threshold) for utr, utr_d in self.utr_dict.iteritems() ] + tasks = [ (self.utr_coverages[utr], self.plot_path, utr, utr_d, self.coverage_weights, len(self.control_alignments), + len(self.treatment_alignments), self.search_start, self.coverage_threshold) \ + for utr, utr_d in self.utr_dict.iteritems() ] processed_tasks = [ pool.apply_async(calculate_all_utr, t) for t in tasks] - result = [res.get() for res in processed_tasks] - for res_control, res_treatment in result: - if isinstance(res_control, dict): - t = self.result_tuple(**res_control) - result_d[res_control["gene"]+"_bp_control"] = t - if isinstance(res_treatment, dict): - t = self.result_tuple(**res_treatment) - result_d[res_treatment["gene"]+"_bp_treatment"] = t + result_list = [res.get() for res in processed_tasks] + for res_control, res_treatment in result_list: + if not res_control: + continue + for i, result in enumerate(res_control): + if isinstance(result, dict): + t = self.result_tuple(**result) + result_d[result["gene"]+"_bp_control_{i}".format(i=i)] = t + for i, result in enumerate(res_treatment): + if isinstance(result, dict): + t = self.result_tuple(**result) + result_d[result["gene"]+"_bp_treatment_{i}".format(i=i)] = t return result_d def write_results(self): @@ -183,51 +206,47 @@ w.writerow(header) # field header w.writerows( self.result_d.values()) + def write_html(self): + output_lines = [(gene_str_to_link(result.gene), result.breakpoint, result.breakpoint_type, result.p_value ) for result in self.result_d.itervalues()] + if self.html_file: + self.html_file.write(tabulate(output_lines, headers=["gene", "breakpoint", "breakpoint_type", "p_value"], tablefmt="html")) + else: + with open(os.path.join(self.plot_path, "index.html"), "w") as html_file: + html_file.write(tabulate(output_lines, headers=["gene", "breakpoint", "breakpoint_type", "p_value"], tablefmt="html")) + def write_bed(self): w = csv.writer(self.bed_output, delimiter='\t') bed = [(result.chr, result.breakpoint, int(result.breakpoint)+1, result.gene+"_"+result.breakpoint_type, 0, result.strand) for result in self.result_d.itervalues()] w.writerows(bed) -def calculate_all_utr(utr_coverage, utr, utr_d, result_tuple_fields, coverage_weights, num_samples, num_control, - num_treatment, search_start, coverage_threshold): - res_control = dict(zip(result_tuple_fields, result_tuple_fields)) - res_treatment = res_control.copy() +def calculate_all_utr(utr_coverage, plot_path, utr, utr_d, coverage_weights, num_control, num_treatment, search_start, coverage_threshold): if utr_d["strand"] == "+": is_reverse = False else: is_reverse = True - control_breakpoint, \ - control_abundance, \ - treatment_breakpoint, \ - treatment_abundance = optimize_breakpoint(utr_coverage, utr_d["new_start"], utr_d["new_end"], coverage_weights, - search_start, coverage_threshold, num_control) - if control_breakpoint: - breakpoint_to_result(res_control, utr, utr_d, control_breakpoint, "control_breakpoint", control_abundance, is_reverse, num_samples, + control_breakpoints, control_abundances, treatment_breakpoints, treatment_abundances = \ + optimize_breakpoint(plot_path, utr, utr_coverage, utr_d["new_start"], utr_d["new_end"], coverage_weights, search_start, coverage_threshold, num_control) + res_control = breakpoints_to_result(utr, utr_d, control_breakpoints, "control_breakpoint", control_abundances, is_reverse, num_control, num_treatment) - if treatment_breakpoint: - breakpoint_to_result(res_treatment, utr, utr_d, treatment_breakpoint, "treatment_breakpoint", treatment_abundance, is_reverse, - num_samples, num_control, num_treatment) - if res_control == dict(zip(result_tuple_fields, result_tuple_fields)): - res_control = False - if res_treatment == dict(zip(result_tuple_fields, result_tuple_fields)): - res_treatment == False + res_treatment = breakpoints_to_result(utr, utr_d, treatment_breakpoints, "treatment_breakpoint", treatment_abundances, is_reverse, + num_control, num_treatment) return res_control, res_treatment -def breakpoint_to_result(res, utr, utr_d, breakpoint, breakpoint_type, - abundances, is_reverse, num_samples, num_control, num_treatment): +def breakpoints_to_result(utr, utr_d, breakpoints, breakpoint_type, + abundances, is_reverse, num_control, num_treatment): """ Takes in a result dictionary res and fills the necessary fields """ - long_coverage_vector = abundances[0] - short_coverage_vector = abundances[1] - num_non_zero = sum((np.array(long_coverage_vector) + np.array(short_coverage_vector)) > 0) # TODO: This introduces bias - if num_non_zero == num_samples: - percentage_long = [] - for i in range(num_samples): - ratio = float(long_coverage_vector[i]) / (long_coverage_vector[i] + short_coverage_vector[i]) # long 3'UTR percentage - percentage_long.append(ratio) + if not breakpoints: + return False + result = [] + for breakpoint, abundance in zip(breakpoints, abundances): + res = {} + long_coverage_vector = abundance[0] + short_coverage_vector = abundance[1] + percentage_long = long_coverage_vector/(long_coverage_vector+short_coverage_vector) for i in range(num_control): res["control_{i}_coverage_long".format(i=i)] = float(long_coverage_vector[i]) res["control_{i}_coverage_short".format(i=i)] = float(short_coverage_vector[i]) @@ -237,6 +256,7 @@ res["treatment_{i}_coverage_long".format(i=k)] = float(long_coverage_vector[i]) res["treatment_{i}_coverage_short".format(i=k)] = float(short_coverage_vector[i]) res["treatment_{i}_percent_long".format(i=k)] = percentage_long[i] + res["t_stat"], res["p_value"] = stat_test(percentage_long[:num_control], percentage_long[num_control:]) control_mean_percent = np.mean(np.array(percentage_long[:num_control])) treatment_mean_percent = np.mean(np.array(percentage_long[num_control:])) res["chr"] = utr_d["chr"] @@ -252,43 +272,85 @@ res["control_mean_percent"] = control_mean_percent res["treatment_mean_percent"] = treatment_mean_percent res["gene"] = utr + result.append(res) + return result -def optimize_breakpoint(utr_coverage, UTR_start, UTR_end, coverage_weigths, search_start, coverage_threshold, num_control): +def optimize_breakpoint(plot_path, utr, utr_coverage, UTR_start, UTR_end, coverage_weigths, search_start, coverage_threshold, num_control): """ We are searching for a point within the UTR that minimizes the mean squared error, if the coverage vector was divided at that point. utr_coverage is a list with items corresponding to numpy arrays of coverage for a sample. """ - search_point_end = int(abs((UTR_end - UTR_start)) * 0.1) # TODO: This is 10% of total UTR end. Why? num_samples = len(utr_coverage) - normalized_utr_coverage = np.array([coverage/ coverage_weigths[i] for i, coverage in enumerate( utr_coverage.values() )]) + normalized_utr_coverage = np.array(utr_coverage.values())/np.expand_dims(coverage_weigths, axis=1) start_coverage = [np.mean(coverage[0:99]) for coverage in utr_coverage.values()] # filters threshold on mean coverage over first 100 nt is_above_threshold = sum(np.array(start_coverage) >= coverage_threshold) >= num_samples # This filters on the raw threshold. Why? is_above_length = UTR_end - UTR_start >= 150 if (is_above_threshold) and (is_above_length): - search_end = UTR_end - UTR_start - search_point_end + search_end = UTR_end - UTR_start breakpoints = range(search_start, search_end + 1) mse_list = [ estimate_mse(normalized_utr_coverage, bp, num_samples, num_control) for bp in breakpoints ] + mse_list = [mse_list[0] for i in xrange(search_start)] + mse_list + if plot_path: + plot_coverage_breakpoint(plot_path, utr, mse_list, normalized_utr_coverage, num_control) if len(mse_list) > 0: - return mse_to_breakpoint(mse_list, normalized_utr_coverage, breakpoints, num_samples) + return mse_to_breakpoint(mse_list, normalized_utr_coverage, num_samples) return False, False, False, False -def mse_to_breakpoint(mse_list, normalized_utr_coverage, breakpoints, num_samples): +def plot_coverage_breakpoint(plot_path, utr, mse_list, normalized_utr_coverage, num_control): """ - Take in mse_list with control and treatment mse and return breakpoint and utr abundance + """ - mse_control = [mse[0] for mse in mse_list] - mse_treatment = [mse[1] for mse in mse_list] - control_index = mse_control.index(min(mse_control)) - treatment_index = mse_treatment.index(min(mse_treatment)) - control_breakpoint = breakpoints[control_index] - treatment_breakpoint = breakpoints[treatment_index] - control_abundance = estimate_abundance(normalized_utr_coverage, control_breakpoint, num_samples) - treatment_abundance = estimate_abundance(normalized_utr_coverage, treatment_breakpoint, num_samples) - return control_breakpoint, control_abundance, treatment_breakpoint, treatment_abundance + fig = plt.figure(figsize=(8, 8)) + gs = gridspec.GridSpec(2, 1) + ax1 = plt.subplot(gs[0, :]) + ax2 = plt.subplot(gs[1, :]) + ax1.set_title("mean-squared error plot") + ax1.set_ylabel("mean-squared error") + ax1.set_xlabel("nt after UTR start") + ax2.set_title("coverage plot") + ax2.set_xlabel("nt after UTR start") + ax2.set_ylabel("normalized nucleotide coverage") + mse_control = [ condition[0] for condition in mse_list] + mse_treatment = [ condition[1] for condition in mse_list] + minima_control = get_minima(np.array(mse_control)) + minima_treatment = get_minima(np.array(mse_treatment)) + control = normalized_utr_coverage[:num_control] + treatment = normalized_utr_coverage[num_control:] + ax1.plot(mse_control, "b-") + ax1.plot(mse_treatment, "r-") + [ax2.plot(cov, "b-") for cov in control] + [ax2.plot(cov, "r-") for cov in treatment] + [ax2.axvline(val, color="b", alpha=0.25) for val in minima_control] + ax2.axvline(mse_control.index(min(mse_control)), color="b", alpha=1) + [ax2.axvline(val, color="r", alpha=0.25) for val in minima_treatment] + ax2.axvline(mse_treatment.index(min(mse_treatment)), color="r", alpha=1) + fig.add_subplot(ax1) + fig.add_subplot(ax2) + gs.tight_layout(fig) + fig.savefig(os.path.join(plot_path, "{utr}.svg".format(utr=utr))) +def mse_to_breakpoint(mse_list, normalized_utr_coverage, num_samples): + """ + Take in mse_list with control and treatment mse and return breakpoint and utr abundance for all local minima + in mse_list + """ + mse_control = np.array([mse[0] for mse in mse_list]) + mse_treatment = np.array([mse[1] for mse in mse_list]) + control_breakpoints = list(get_minima(mse_control)) + treatment_breakpoints = list(get_minima(mse_treatment)) + control_abundances = [estimate_abundance(normalized_utr_coverage, bp, num_samples) for bp in control_breakpoints] + treatment_abundances = [estimate_abundance(normalized_utr_coverage, bp, num_samples) for bp in treatment_breakpoints] + return control_breakpoints, control_abundances, treatment_breakpoints, treatment_abundances + +def get_minima(a): + """ + get minima for numpy array a + """ + return np.where(np.r_[True, a[1:] < a[:-1]] & np.r_[a[:-1] < a[1:], True])[0]+1 + def estimate_mse(cov, bp, num_samples, num_control): """ get abundance of long utr vs short utr with breakpoint specifying the position of long and short utr. @@ -315,6 +377,11 @@ mean_short_utr = np.mean(short_utr_vector, 1) return mean_long_utr, mean_short_utr +def stat_test(a,b): + return stats.ttest_ind(a,b) + +def gene_str_to_link(str): + return "<a href=\"{str}.svg\" type=\"image/svg+xml\" target=\"_blank\">{str}</a>".format(str=str) if __name__ == '__main__': args = parse_args()
--- a/dapars.xml Wed Oct 28 06:22:18 2015 -0400 +++ b/dapars.xml Thu Oct 29 15:51:10 2015 -0400 @@ -1,17 +1,16 @@ -<tool id="dapars" name="dapars" version="0.1.5"> +<tool id="dapars" name="dapars" version="0.2.0"> <description>infer de-novo alternative polyadenylation from rna-seq</description> <requirements> <requirement type="package" version="1.9">numpy</requirement> - <requirement type="package" version="2.22">bedtools</requirement> + <requirement type="package" version="0.14">scipy</requirement> + <requirement type="package" version="1.4">matplotlib</requirement> + <requirement type="package" version="0.7.5">tabulate</requirement> </requirements> <stdio> <exit_code range="1:" /> </stdio> <command interpreter="python"><![CDATA[ - dapars.py -c - #for $c in $controls: - "$c" - #end for + dapars.py -c $controls -t #for $t in $treatments: "$t" @@ -24,6 +23,10 @@ #if $make_breakpoint: -b "$breakpoint_bed" #end if + #if $make_html: + -p "$html_file.files_path" + -html "$html_file" + #end if ]]></command> <inputs> <param type="data" name="utr" format="gtf" label="GFF file containing 3prime UTRs" help="featureType of the UTRs @@ -33,15 +36,33 @@ <param type="integer" name="search_start" value="100" optional="False" min="1" label="Search start" help="Search start in nucleotides downstream of the start of the UTR. Necessary to correct for proximal drops in coverage. Select 200 for humans. Genomes with short UTRs may require more prpximal search start points."/> <param type="float" name="coverage_threshold" value="20" optional="False" label="Coverage threshold" help="Skip the analysis of UTRs whose mean coverage is below the Coverage Threshold in any of the alignment files."/> <param name="make_breakpoint" type="boolean" checked="False" label="Output bedfile with breakpoint positions?"/> + <param name="make_html" type="boolean" checked="False" label="Output HTML table with plot for every UTR?"/> </inputs> <outputs> <data name="apa_sites" format="tabular" /> <data name="breakpoint_bed" format="bed6"> <filter>(make_breakpoint == True)</filter> </data> + <data name="html_file" format="html"> + <filter>(make_html == True)</filter> + </data> </outputs> + <tests> + <test> + <param name="utr" value="example.gtf"></param> + <param name="controls" value="c1.bam,c2.bam,c3.bam"></param> + <param name="treatments" value="t1.bam,t2.bam,t3.bam"></param> + <param name="coverage_threshold" value="5"></param> + <param name="search_start" value="1"></param> + <param name="make_breakpoint" value="True"></param> + <param name="make_html" value="True"></param> + <output name="apa_sites" file="dapars.tab"></output> + <output name="breakpoint_bed" file="breakpoint.bed"></output> + </test> + </tests> <help><![CDATA[ - TODO: Fill in help. + DaPars works on RNAseq aligment files to find drops of coverage within UTRs. The coverage is then divided into + proximal and distal, and the ratio is calculated for each sample. ]]></help> <citations> <citation type="doi">10.1038/ncomms6274</citation>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/100_3L.gtf Thu Oct 29 15:51:10 2015 -0400 @@ -0,0 +1,100 @@ +3L FlyBase UTR 26079 26308 . + . gene_id "FBgn0052475"; gene_version "1"; transcript_id "FBtr0329896"; transcript_version "1"; gene_name "mthl8"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mthl8-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 53873 54480 . - . gene_id "FBgn0262679"; gene_version "1"; transcript_id "FBtr0305596"; transcript_version "1"; gene_name "CG43149"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG43149-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 68899 68995 . + . gene_id "FBgn0002564"; gene_version "1"; transcript_id "FBtr0072463"; transcript_version "1"; gene_name "Lsp1gamma"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Lsp1gamma-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 73945 74637 . - . gene_id "FBgn0035097"; gene_version "1"; transcript_id "FBtr0305595"; transcript_version "1"; gene_name "CG13405"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13405-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 97628 97922 . - . gene_id "FBgn0040688"; gene_version "1"; transcript_id "FBtr0072540"; transcript_version "1"; gene_name "CG12483"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG12483-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 143821 145895 . + . gene_id "FBgn0020386"; gene_version "1"; transcript_id "FBtr0072464"; transcript_version "1"; gene_name "Pdk1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Pdk1-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 148971 149056 . + . gene_id "FBgn0035099"; gene_version "1"; transcript_id "FBtr0273352"; transcript_version "1"; gene_name "CG6845"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG6845-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 149679 149807 . - . gene_id "FBgn0263988"; gene_version "1"; transcript_id "FBtr0072538"; transcript_version "1"; gene_name "Dic61B"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Dic61B-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 173909 174517 . + . gene_id "FBgn0035101"; gene_version "1"; transcript_id "FBtr0299864"; transcript_version "1"; gene_name "p130CAS"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "p130CAS-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 175112 175192 . - . gene_id "FBgn0035102"; gene_version "1"; transcript_id "FBtr0072537"; transcript_version "1"; gene_name "CG7049"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG7049-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 176306 176689 . - . gene_id "FBgn0035103"; gene_version "1"; transcript_id "FBtr0301890"; transcript_version "1"; gene_name "Vdup1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Vdup1-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 182417 182535 . + . gene_id "FBgn0035104"; gene_version "1"; transcript_id "FBtr0334068"; transcript_version "1"; gene_name "CG13875"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13875-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 182522 183272 . - . gene_id "FBgn0083976"; gene_version "1"; transcript_id "FBtr0110975"; transcript_version "1"; gene_name "CG34140"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34140-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 182522 182653 . - . gene_id "FBgn0083992"; gene_version "1"; transcript_id "FBtr0344094"; transcript_version "1"; gene_name "Mkp"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Mkp-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 185999 186310 . + . gene_id "FBgn0027786"; gene_version "1"; transcript_id "FBtr0305544"; transcript_version "1"; gene_name "Mtch"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Mtch-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 187207 188548 . - . gene_id "FBgn0035106"; gene_version "1"; transcript_id "FBtr0072532"; transcript_version "1"; gene_name "rno"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "rno-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 202032 202152 . + . gene_id "FBgn0035107"; gene_version "1"; transcript_id "FBtr0346614"; transcript_version "1"; gene_name "mri"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mri-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 202098 202213 . - . gene_id "FBgn0025592"; gene_version "1"; transcript_id "FBtr0072530"; transcript_version "1"; gene_name "Gyk"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Gyk-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 205243 205309 . + . gene_id "FBgn0024945"; gene_version "1"; transcript_id "FBtr0072483"; transcript_version "1"; gene_name "NitFhit"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "NitFhit-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 207113 207527 . + . gene_id "FBgn0035109"; gene_version "1"; transcript_id "FBtr0072484"; transcript_version "1"; gene_name "CG13876"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13876-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 207184 208113 . - . gene_id "FBgn0027587"; gene_version "1"; transcript_id "FBtr0344842"; transcript_version "1"; gene_name "CG7028"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG7028-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 211636 211697 . - . gene_id "FBgn0035110"; gene_version "1"; transcript_id "FBtr0072527"; transcript_version "1"; gene_name "thoc7"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "thoc7-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 216110 216479 . + . gene_id "FBgn0035111"; gene_version "1"; transcript_id "FBtr0072487"; transcript_version "1"; gene_name "CG16940"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG16940-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 217893 218058 . + . gene_id "FBgn0063923"; gene_version "1"; transcript_id "FBtr0100851"; transcript_version "1"; gene_name "Kaz1-ORFB"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Kaz1-ORFB-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 217932 218668 . - . gene_id "FBgn0035113"; gene_version "1"; transcript_id "FBtr0072525"; transcript_version "1"; gene_name "pyx"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "pyx-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 218591 218663 . + . gene_id "FBgn0035112"; gene_version "1"; transcript_id "FBtr0300710"; transcript_version "1"; gene_name "CG13877"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13877-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 224758 225605 . + . gene_id "FBgn0053229"; gene_version "1"; transcript_id "FBtr0345592"; transcript_version "1"; gene_name "CG33229"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG33229-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 230971 231015 . - . gene_id "FBgn0262035"; gene_version "1"; transcript_id "FBtr0303931"; transcript_version "1"; gene_name "CG42846"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG42846-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 231681 231833 . - . gene_id "FBgn0085483"; gene_version "1"; transcript_id "FBtr0112760"; transcript_version "1"; gene_name "CG34454"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34454-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 232480 232656 . - . gene_id "FBgn0085482"; gene_version "1"; transcript_id "FBtr0299518"; transcript_version "1"; gene_name "CG34453"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34453-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 233926 234298 . - . gene_id "FBgn0000541"; gene_version "1"; transcript_id "FBtr0301348"; transcript_version "1"; gene_name "E(bx)"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "E(bx)-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 249965 250305 . + . gene_id "FBgn0052476"; gene_version "1"; transcript_id "FBtr0072501"; transcript_version "1"; gene_name "mthl14"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mthl14-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 250229 250509 . - . gene_id "FBgn0024806"; gene_version "1"; transcript_id "FBtr0332109"; transcript_version "1"; gene_name "DIP2"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "DIP2-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 258998 259097 . + . gene_id "FBgn0035120"; gene_version "1"; transcript_id "FBtr0332106"; transcript_version "1"; gene_name "wac"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "wac-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 259043 259502 . - . gene_id "FBgn0035121"; gene_version "1"; transcript_id "FBtr0332107"; transcript_version "1"; gene_name "Tudor-SN"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Tudor-SN-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 265445 265932 . + . gene_id "FBgn0035122"; gene_version "1"; transcript_id "FBtr0310415"; transcript_version "1"; gene_name "mRpL17"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mRpL17-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 269120 270336 . + . gene_id "FBgn0027111"; gene_version "1"; transcript_id "FBtr0310416"; transcript_version "1"; gene_name "miple"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "miple-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 274890 275018 . + . gene_id "FBgn0029002"; gene_version "1"; transcript_id "FBtr0332839"; transcript_version "1"; gene_name "miple2"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "miple2-RG"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 271420 272209 . - . gene_id "FBgn0052845"; gene_version "1"; transcript_id "FBtr0072518"; transcript_version "1"; gene_name "CG32845"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG32845-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 276568 276667 . + . gene_id "FBgn0035124"; gene_version "1"; transcript_id "FBtr0072507"; transcript_version "1"; gene_name "ttm2"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "ttm2-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 303884 305292 . + . gene_id "FBgn0264707"; gene_version "1"; transcript_id "FBtr0333962"; transcript_version "1"; gene_name "RhoGEF3"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "RhoGEF3-RM"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 305393 306358 . - . gene_id "FBgn0004373"; gene_version "1"; transcript_id "FBtr0072516"; transcript_version "1"; gene_name "fwd"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "fwd-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 317622 317789 . + . gene_id "FBgn0085293"; gene_version "1"; transcript_id "FBtr0306849"; transcript_version "1"; gene_name "CG34264"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34264-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 321233 321928 . - . gene_id "FBgn0052344"; gene_version "1"; transcript_id "FBtr0072576"; transcript_version "1"; gene_name "CG32344"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG32344-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 324985 325235 . - . gene_id "FBgn0052343"; gene_version "1"; transcript_id "FBtr0299831"; transcript_version "1"; gene_name "Atac3"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Atac3-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 355528 356050 . + . gene_id "FBgn0261985"; gene_version "1"; transcript_id "FBtr0306563"; transcript_version "1"; gene_name "Ptpmeg"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ptpmeg-RK"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 329822 330039 . - . gene_id "FBgn0035131"; gene_version "1"; transcript_id "FBtr0072574"; transcript_version "1"; gene_name "mthl9"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mthl9-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 332801 333474 . - . gene_id "FBgn0035132"; gene_version "1"; transcript_id "FBtr0333904"; transcript_version "1"; gene_name "mthl10"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mthl10-RG"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 343378 343543 . - . gene_id "FBgn0023000"; gene_version "1"; transcript_id "FBtr0072571"; transcript_version "1"; gene_name "mth"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mth-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 357460 357664 . + . gene_id "FBgn0035134"; gene_version "1"; transcript_id "FBtr0072546"; transcript_version "1"; gene_name "CG1231"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG1231-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 357851 358496 . - . gene_id "FBgn0265574"; gene_version "1"; transcript_id "FBtr0072569"; transcript_version "1"; gene_name "Cdc5"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Cdc5-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 358400 358486 . + . gene_id "FBgn0040291"; gene_version "1"; transcript_id "FBtr0072547"; transcript_version "1"; gene_name "Roc1b"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Roc1b-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 365247 365681 . + . gene_id "FBgn0035137"; gene_version "1"; transcript_id "FBtr0072549"; transcript_version "1"; gene_name "CG1233"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG1233-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 365664 365830 . - . gene_id "FBgn0035138"; gene_version "1"; transcript_id "FBtr0072568"; transcript_version "1"; gene_name "CG13884"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13884-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 366524 367389 . - . gene_id "FBgn0262139"; gene_version "1"; transcript_id "FBtr0330146"; transcript_version "1"; gene_name "trh"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "trh-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 433146 433229 . - . gene_id "FBgn0035139"; gene_version "1"; transcript_id "FBtr0072566"; transcript_version "1"; gene_name "CG13891"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13891-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 434083 435117 . - . gene_id "FBgn0001316"; gene_version "1"; transcript_id "FBtr0110818"; transcript_version "1"; gene_name "klar"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "klar-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 500014 500076 . - . gene_id "FBgn0085296"; gene_version "1"; transcript_id "FBtr0112462"; transcript_version "1"; gene_name "CG34267"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34267-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 501340 501400 . + . gene_id "FBgn0085297"; gene_version "1"; transcript_id "FBtr0112463"; transcript_version "1"; gene_name "CG34268"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34268-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 528995 528996 . + . gene_id "FBgn0085298"; gene_version "1"; transcript_id "FBtr0112464"; transcript_version "1"; gene_name "CG34269"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34269-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 542063 542147 . + . gene_id "FBgn0035140"; gene_version "1"; transcript_id "FBtr0072550"; transcript_version "1"; gene_name "CG17180"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG17180-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 542122 542229 . - . gene_id "FBgn0035141"; gene_version "1"; transcript_id "FBtr0072564"; transcript_version "1"; gene_name "Cypl"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Cypl-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 574202 581099 . + . gene_id "FBgn0035142"; gene_version "1"; transcript_id "FBtr0072551"; transcript_version "1"; gene_name "hipk"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "hipk-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 582622 582810 . + . gene_id "FBgn0035143"; gene_version "1"; transcript_id "FBtr0072553"; transcript_version "1"; gene_name "Ppm1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ppm1-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 582824 583983 . - . gene_id "FBgn0035144"; gene_version "1"; transcript_id "FBtr0072563"; transcript_version "1"; gene_name "Kah"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Kah-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 598575 599239 . + . gene_id "FBgn0035145"; gene_version "1"; transcript_id "FBtr0344912"; transcript_version "1"; gene_name "MED14"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "MED14-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 599094 599483 . - . gene_id "FBgn0035146"; gene_version "1"; transcript_id "FBtr0072562"; transcript_version "1"; gene_name "CG13893"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13893-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 605165 605195 . - . gene_id "FBgn0016715"; gene_version "1"; transcript_id "FBtr0072561"; transcript_version "1"; gene_name "Reg-2"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Reg-2-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 620695 621645 . - . gene_id "FBgn0263042"; gene_version "1"; transcript_id "FBtr0306919"; transcript_version "1"; gene_name "CG43337"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG43337-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 651106 651311 . + . gene_id "FBgn0035147"; gene_version "1"; transcript_id "FBtr0072556"; transcript_version "1"; gene_name "Gale"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Gale-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 651542 652087 . - . gene_id "FBgn0035148"; gene_version "1"; transcript_id "FBtr0072559"; transcript_version "1"; gene_name "CG3402"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG3402-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 652987 653053 . - . gene_id "FBgn0035149"; gene_version "1"; transcript_id "FBtr0072558"; transcript_version "1"; gene_name "MED30"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "MED30-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 657795 657971 . + . gene_id "FBgn0035150"; gene_version "1"; transcript_id "FBtr0072557"; transcript_version "1"; gene_name "Rev1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Rev1-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 677879 677999 . - . gene_id "FBgn0035151"; gene_version "1"; transcript_id "FBtr0072612"; transcript_version "1"; gene_name "CG17129"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG17129-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 680436 680857 . - . gene_id "FBgn0035152"; gene_version "1"; transcript_id "FBtr0333379"; transcript_version "1"; gene_name "CG3386"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG3386-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 682885 683065 . - . gene_id "FBgn0035153"; gene_version "1"; transcript_id "FBtr0072610"; transcript_version "1"; gene_name "ebd1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "ebd1-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 685942 685990 . - . gene_id "FBgn0035154"; gene_version "1"; transcript_id "FBtr0072609"; transcript_version "1"; gene_name "CG3344"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG3344-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 688523 688584 . - . gene_id "FBgn0052483"; gene_version "1"; transcript_id "FBtr0072608"; transcript_version "1"; gene_name "CG32483"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG32483-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 691438 691951 . + . gene_id "FBgn0035155"; gene_version "1"; transcript_id "FBtr0330078"; transcript_version "1"; gene_name "RabX6"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "RabX6-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 691486 692021 . - . gene_id "FBgn0260862"; gene_version "1"; transcript_id "FBtr0072607"; transcript_version "1"; gene_name "Vti1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Vti1-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 693083 693282 . - . gene_id "FBgn0035157"; gene_version "1"; transcript_id "FBtr0072606"; transcript_version "1"; gene_name "CG13894"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13894-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 705948 706134 . - . gene_id "FBgn0035158"; gene_version "1"; transcript_id "FBtr0072605"; transcript_version "1"; gene_name "CG13895"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13895-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 708601 708709 . - . gene_id "FBgn0025676"; gene_version "1"; transcript_id "FBtr0072604"; transcript_version "1"; gene_name "CkIIalpha-i3"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CkIIalpha-i3-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 710820 710941 . - . gene_id "FBgn0035159"; gene_version "1"; transcript_id "FBtr0331770"; transcript_version "1"; gene_name "CG13896"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13896-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 737680 737863 . - . gene_id "FBgn0035160"; gene_version "1"; transcript_id "FBtr0072601"; transcript_version "1"; gene_name "hng3"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "hng3-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 752308 753492 . + . gene_id "FBgn0000575"; gene_version "1"; transcript_id "FBtr0072578"; transcript_version "1"; gene_name "emc"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "emc-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 803427 803517 . - . gene_id "FBgn0035161"; gene_version "1"; transcript_id "FBtr0072599"; transcript_version "1"; gene_name "CG13898"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13898-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 826720 827877 . - . gene_id "FBgn0035162"; gene_version "1"; transcript_id "FBtr0331404"; transcript_version "1"; gene_name "CG13900"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13900-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 835465 835515 . + . gene_id "FBgn0260755"; gene_version "1"; transcript_id "FBtr0301252"; transcript_version "1"; gene_name "CG42553"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG42553-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 836554 836770 . + . gene_id "FBgn0260756"; gene_version "1"; transcript_id "FBtr0301253"; transcript_version "1"; gene_name "CG42554"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG42554-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 836667 836782 . - . gene_id "FBgn0035164"; gene_version "1"; transcript_id "FBtr0331403"; transcript_version "1"; gene_name "CG13901"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13901-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 839433 839931 . + . gene_id "FBgn0035165"; gene_version "1"; transcript_id "FBtr0345588"; transcript_version "1"; gene_name "CG13887"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13887-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 839980 840057 . - . gene_id "FBgn0035166"; gene_version "1"; transcript_id "FBtr0331402"; transcript_version "1"; gene_name "CG13902"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13902-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 843412 843701 . + . gene_id "FBgn0035167"; gene_version "1"; transcript_id "FBtr0072585"; transcript_version "1"; gene_name "Gr61a"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Gr61a-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 851017 851080 . + . gene_id "FBgn0035168"; gene_version "1"; transcript_id "FBtr0332752"; transcript_version "1"; gene_name "CG13889"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13889-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 852349 852673 . + . gene_id "FBgn0035169"; gene_version "1"; transcript_id "FBtr0072587"; transcript_version "1"; gene_name "CG13890"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13890-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 852720 853280 . - . gene_id "FBgn0035170"; gene_version "1"; transcript_id "FBtr0072594"; transcript_version "1"; gene_name "dpr20"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "dpr20-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 870597 871701 . + . gene_id "FBgn0035171"; gene_version "1"; transcript_id "FBtr0300345"; transcript_version "1"; gene_name "CG12502"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG12502-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 892176 895313 . + . gene_id "FBgn0052479"; gene_version "1"; transcript_id "FBtr0072589"; transcript_version "1"; gene_name "CG32479"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG32479-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 877291 878452 . - . gene_id "FBgn0035173"; gene_version "1"; transcript_id "FBtr0072593"; transcript_version "1"; gene_name "CG13907"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13907-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/breakpoint.bed Thu Oct 29 15:51:10 2015 -0400 @@ -0,0 +1,12 @@ +3L 3371595 3371596 FBgn0027616_control_breakpoint 0 - +3L 3371497 3371498 FBgn0027616_control_breakpoint 0 - +3L 3371403 3371404 FBgn0027616_control_breakpoint 0 - +3L 3371303 3371304 FBgn0027616_control_breakpoint 0 - +3L 3371189 3371190 FBgn0027616_control_breakpoint 0 - +3L 3370936 3370937 FBgn0027616_control_breakpoint 0 - +3L 3371400 3371401 FBgn0027616_treatment_breakpoint 0 - +3L 3371361 3371362 FBgn0027616_treatment_breakpoint 0 - +3L 3371337 3371338 FBgn0027616_treatment_breakpoint 0 - +3L 3371318 3371319 FBgn0027616_treatment_breakpoint 0 - +3L 3371310 3371311 FBgn0027616_treatment_breakpoint 0 - +3L 3371195 3371196 FBgn0027616_treatment_breakpoint 0 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dapars.tab Thu Oct 29 15:51:10 2015 -0400 @@ -0,0 +1,13 @@ +#chr start end strand gene breakpoint breakpoint_type control_mean_percent treatment_mean_percent control_0_coverage_long control_1_coverage_long control_2_coverage_long treatment_0_coverage_long treatment_1_coverage_long control_0_coverage_short control_1_coverage_short control_2_coverage_short treatment_0_coverage_short treatment_1_coverage_short control_0_percent_long control_1_percent_long control_2_percent_long treatment_0_percent_long treatment_1_percent_long +3L 3370451 3371600 - FBgn0027616 3371595 control_breakpoint 0.60624805294465911 0.64703056230715239 24.737291739777078 24.785958478598335 24.72955959248966 24.77557161847185 24.75965846300016 19.115611620795107 10.209598416504893 20.530594574391877 12.110393819651023 15.022501270971024 0.56409701169337378 0.70826015293577238 0.54638699420483106 0.6716801722333523 0.62238095238095237 +3L 3370451 3371600 - FBgn0027616 3371497 control_breakpoint 0.53167446986389089 0.50722931382807834 25.17339084590076 24.83086675847875 25.108450107102474 25.34486540101996 24.121432697028514 21.00523960096197 23.72215396429967 21.520352063080335 19.645096770550406 29.349412490560265 0.54513073692966096 0.51141754702049802 0.53847512564151367 0.56334489245326924 0.45111373520288744 +3L 3370451 3371600 - FBgn0027616 3371403 control_breakpoint 0.42175237278876598 0.43968252706676059 21.43265545361876 23.24127468492176 23.840243530439217 24.668855460430212 21.796132549444817 36.722792421490546 30.085068034525737 27.88682196696674 24.845774122380487 35.388813029194914 0.36854080291232694 0.43583102646276056 0.46088528899121045 0.49821347081215273 0.38115158332136839 +3L 3370451 3371600 - FBgn0027616 3371303 control_breakpoint 0.39108916961365203 0.32605616375337437 18.144907864186806 22.269351498851368 22.18854207447699 20.899986765802232 15.578958905406301 38.471119193978524 29.819508472106396 29.989017803369812 32.691946952542786 43.853564316268944 0.32049065974808444 0.42752618335797105 0.42525066573490061 0.38998381502043045 0.2621285124863183 +3L 3370451 3371600 - FBgn0027616 3371189 control_breakpoint 0.36108879385216808 0.31348344456581922 16.42870568302662 19.697783365598664 19.006203838429983 18.29248418141787 13.92595176003268 34.958610236835646 30.910044445037187 31.76652614656724 32.650427132988575 38.05812786896196 0.31970351805583574 0.38922404334965222 0.37433882015101616 0.35907810742345286 0.26788878170818553 +3L 3370451 3371600 - FBgn0027616 3370936 control_breakpoint 0.28090300350568276 0.29742764206502253 11.46529404386468 11.683852531070386 11.717151025282648 13.766111724681437 11.199166362163194 29.811874585497957 29.72761107235841 29.714773098686205 28.924812347110773 29.914478029045522 0.27776357789494338 0.28214053584193971 0.28280489678016518 0.32245991446639399 0.27239536966365108 +3L 3370451 3371600 - FBgn0027616 3371400 treatment_breakpoint 0.42145916899796149 0.43851061373185951 21.401278818382607 23.215540467097224 23.801920635167825 24.63708242685255 21.731267864203808 36.60639625382263 30.075054318450007 27.96408571339583 24.957503263330818 35.418435688866296 0.36893874460135312 0.43564048328830879 0.45979827910422255 0.49676959861626907 0.38025162884745001 +3L 3370451 3371600 - FBgn0027616 3371361 treatment_breakpoint 0.41022686556727411 0.38864357642733532 20.50124466178987 22.696466576127253 23.216860024806003 23.358299973732187 19.390276970900924 36.68975893439792 30.434754232875903 28.951959510908406 28.548944426311216 39.85531959337436 0.3584697482587354 0.42717758467694211 0.44503326376614494 0.45000077048421888 0.32728638237045177 +3L 3370451 3371600 - FBgn0027616 3371337 treatment_breakpoint 0.40286144007500929 0.35591939407775497 19.63139192604695 22.507577656074748 22.83332278545971 22.312985873192638 17.50778335689563 37.385458192346604 30.200461900984376 29.386718364840192 30.686571411834358 42.69044233657633 0.34430860149732984 0.42702361623048329 0.4372521024972148 0.42100325014405965 0.29083553801145029 +3L 3370451 3371600 - FBgn0027616 3371318 treatment_breakpoint 0.39318546325416648 0.33313189900140355 18.647118432026687 22.269249189393072 22.414766533494532 21.35928000871836 16.143664556084484 38.416093760166575 30.221344032507872 29.892123941952075 32.28006863275775 44.079936217085454 0.32678003420525586 0.42425219115453905 0.42852416440270436 0.39820170359418766 0.26806209440861944 +3L 3370451 3371600 - FBgn0027616 3371310 treatment_breakpoint 0.39141424351796555 0.32838393490616724 18.343263676520557 22.241652724683444 22.284741160633022 21.08265781222581 15.796554450012511 38.530840978593275 30.061926839480794 29.968320926900674 32.579743373440486 44.06334722928318 0.32252399906345114 0.42524150182490184 0.42647722966554374 0.3928757816721995 0.26389208814013498 +3L 3370451 3371600 - FBgn0027616 3371195 treatment_breakpoint 0.36099881812575446 0.31267620725828044 16.37902538523203 19.824462944669694 19.116066163474617 18.379188290373293 13.918532544261875 35.29630105334693 30.91506563825953 31.81586648348289 32.752884025821615 38.4250759005065 0.31696026932118287 0.39071042830578101 0.37532575675029961 0.35944540203100878 0.2659070124855521
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/example.gtf Thu Oct 29 15:51:10 2015 -0400 @@ -0,0 +1,1 @@ +3L FlyBase UTR 3370451 3371600 . - . gene_id "FBgn0027616"; gene_version "1"; transcript_id "FBtr0073078"; transcript_version "1"; gene_name "YT521-B"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "YT521-B-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
--- a/tool_dependencies.xml Wed Oct 28 06:22:18 2015 -0400 +++ b/tool_dependencies.xml Thu Oct 29 15:51:10 2015 -0400 @@ -6,4 +6,13 @@ <package name="numpy" version="1.9"> <repository changeset_revision="816d3480b0b1" name="package_numpy_1_9" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> </package> + <package name="scipy" version="0.14"> + <repository changeset_revision="01f0415771cf" name="package_scipy_0_14" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> + <package name="matplotlib" version="1.4"> + <repository changeset_revision="fb53242ebfbe" name="package_matplotlib_1_4" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> + <package name="tabulate" version="0.7.5"> + <repository changeset_revision="78951742defa" name="package_tabulate_0_7_5" owner="mvdbeek" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> </tool_dependency>