# HG changeset patch # User mvdbeek # Date 1446148270 14400 # Node ID a5d8b08af08984885ba79ef988c914377d5985ff # Parent 73b932244237182286674051d50c246f8b1a72be planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42 diff -r 73b932244237 -r a5d8b08af089 dapars.py --- a/dapars.py Wed Oct 28 06:22:18 2015 -0400 +++ b/dapars.py Thu Oct 29 15:51:10 2015 -0400 @@ -2,19 +2,27 @@ import os import csv import numpy as np +from scipy import stats from collections import OrderedDict, namedtuple import filter_utr import subprocess from multiprocessing import Pool import warnings +import matplotlib.pyplot as plt +import matplotlib.gridspec as gridspec +from tabulate import tabulate +def directory_path(str): + if os.path.exists(str): + return str + else: + os.mkdir(str) + return str def parse_args(): """ Returns floating point values except for input files. My initial approach will not filter anything. (FDR. fold_change, PDUI, Num_least ...) - :param argv: - :return: """ parser = argparse.ArgumentParser(prog='DaPars', description='Determines the usage of proximal polyA usage') parser.add_argument("-c", "--control_alignments", nargs="+", required=True, @@ -33,7 +41,11 @@ help="minimum coverage in each aligment to be considered for determining breakpoints") parser.add_argument("-b", "--breakpoint_bed", required=False, type=argparse.FileType('w'), help="Write bedfile with coordinates of breakpoint positions to supplied path.") - parser.add_argument("-v", "--version", action='version', version='%(prog)s 0.1.5') + parser.add_argument("-v", "--version", action='version', version='%(prog)s 0.2.0') + parser.add_argument("-p", "--plot_path", default=None, required=False, type=directory_path, + help="If plot_path is specified will write a coverage plot for every UTR in that directory.") + parser.add_argument("-html", "--html_file", default=None, required=False, type=argparse.FileType('w'), + help="Write an html file to the specified location. Only to be used within a galaxy wrapper") return parser.parse_args() @@ -48,6 +60,8 @@ self.n_cpus = args.cpu self.search_start = args.search_start self.coverage_threshold = args.coverage_threshold + self.plot_path = args.plot_path + self.html_file = args.html_file self.utr = args.utr_bed_file self.gtf_fields = filter_utr.get_gtf_fields() self.result_file = args.output_file @@ -67,7 +81,8 @@ if args.breakpoint_bed: self.bed_output = args.breakpoint_bed self.write_bed() - + if self.plot_path: + self.write_html() def dump_utr_dict_to_bedfile(self): w = csv.writer(open("tmp_bedfile.bed", "w"), delimiter="\t") @@ -110,6 +125,10 @@ return coverage_dict def get_utr_dict(self, shift): + """ + The utr end is extended by UTR length * shift, to discover novel distal polyA sites. + Set to 0 to disable. + """ utr_dict = OrderedDict() for line in self.utr: if not line.startswith("#"): @@ -139,11 +158,11 @@ utr_coverage.append(np.sum(vector)) coverage_per_alignment.append(utr_coverage) coverages = np.array([ sum(x) for x in zip(*coverage_per_alignment) ]) - coverage_weights = coverages / np.mean(coverages) # TODO: proabably median is better suited? + coverage_weights = coverages / np.mean(coverages) # TODO: proabably median is better suited? Or even no normalization! return coverage_weights def get_result_tuple(self): - static_desc = ["chr", "start", "end", "strand", "gene", "breakpoint", + static_desc = ["chr", "start", "end", "strand", "gene", "t_stat", "p_value", "breakpoint", "breakpoint_type", "control_mean_percent", "treatment_mean_percent" ] samples_desc = [] for statistic in ["coverage_long", "coverage_short", "percent_long"]: @@ -162,18 +181,22 @@ "num_treatment":len(self.treatment_alignments), "result_d":result_d} pool = Pool(self.n_cpus) - tasks = [ (self.utr_coverages[utr], utr, utr_d, self.result_tuple._fields, self.coverage_weights, self.num_samples, - len(self.control_alignments), len(self.treatment_alignments), self.search_start, - self.coverage_threshold) for utr, utr_d in self.utr_dict.iteritems() ] + tasks = [ (self.utr_coverages[utr], self.plot_path, utr, utr_d, self.coverage_weights, len(self.control_alignments), + len(self.treatment_alignments), self.search_start, self.coverage_threshold) \ + for utr, utr_d in self.utr_dict.iteritems() ] processed_tasks = [ pool.apply_async(calculate_all_utr, t) for t in tasks] - result = [res.get() for res in processed_tasks] - for res_control, res_treatment in result: - if isinstance(res_control, dict): - t = self.result_tuple(**res_control) - result_d[res_control["gene"]+"_bp_control"] = t - if isinstance(res_treatment, dict): - t = self.result_tuple(**res_treatment) - result_d[res_treatment["gene"]+"_bp_treatment"] = t + result_list = [res.get() for res in processed_tasks] + for res_control, res_treatment in result_list: + if not res_control: + continue + for i, result in enumerate(res_control): + if isinstance(result, dict): + t = self.result_tuple(**result) + result_d[result["gene"]+"_bp_control_{i}".format(i=i)] = t + for i, result in enumerate(res_treatment): + if isinstance(result, dict): + t = self.result_tuple(**result) + result_d[result["gene"]+"_bp_treatment_{i}".format(i=i)] = t return result_d def write_results(self): @@ -183,51 +206,47 @@ w.writerow(header) # field header w.writerows( self.result_d.values()) + def write_html(self): + output_lines = [(gene_str_to_link(result.gene), result.breakpoint, result.breakpoint_type, result.p_value ) for result in self.result_d.itervalues()] + if self.html_file: + self.html_file.write(tabulate(output_lines, headers=["gene", "breakpoint", "breakpoint_type", "p_value"], tablefmt="html")) + else: + with open(os.path.join(self.plot_path, "index.html"), "w") as html_file: + html_file.write(tabulate(output_lines, headers=["gene", "breakpoint", "breakpoint_type", "p_value"], tablefmt="html")) + def write_bed(self): w = csv.writer(self.bed_output, delimiter='\t') bed = [(result.chr, result.breakpoint, int(result.breakpoint)+1, result.gene+"_"+result.breakpoint_type, 0, result.strand) for result in self.result_d.itervalues()] w.writerows(bed) -def calculate_all_utr(utr_coverage, utr, utr_d, result_tuple_fields, coverage_weights, num_samples, num_control, - num_treatment, search_start, coverage_threshold): - res_control = dict(zip(result_tuple_fields, result_tuple_fields)) - res_treatment = res_control.copy() +def calculate_all_utr(utr_coverage, plot_path, utr, utr_d, coverage_weights, num_control, num_treatment, search_start, coverage_threshold): if utr_d["strand"] == "+": is_reverse = False else: is_reverse = True - control_breakpoint, \ - control_abundance, \ - treatment_breakpoint, \ - treatment_abundance = optimize_breakpoint(utr_coverage, utr_d["new_start"], utr_d["new_end"], coverage_weights, - search_start, coverage_threshold, num_control) - if control_breakpoint: - breakpoint_to_result(res_control, utr, utr_d, control_breakpoint, "control_breakpoint", control_abundance, is_reverse, num_samples, + control_breakpoints, control_abundances, treatment_breakpoints, treatment_abundances = \ + optimize_breakpoint(plot_path, utr, utr_coverage, utr_d["new_start"], utr_d["new_end"], coverage_weights, search_start, coverage_threshold, num_control) + res_control = breakpoints_to_result(utr, utr_d, control_breakpoints, "control_breakpoint", control_abundances, is_reverse, num_control, num_treatment) - if treatment_breakpoint: - breakpoint_to_result(res_treatment, utr, utr_d, treatment_breakpoint, "treatment_breakpoint", treatment_abundance, is_reverse, - num_samples, num_control, num_treatment) - if res_control == dict(zip(result_tuple_fields, result_tuple_fields)): - res_control = False - if res_treatment == dict(zip(result_tuple_fields, result_tuple_fields)): - res_treatment == False + res_treatment = breakpoints_to_result(utr, utr_d, treatment_breakpoints, "treatment_breakpoint", treatment_abundances, is_reverse, + num_control, num_treatment) return res_control, res_treatment -def breakpoint_to_result(res, utr, utr_d, breakpoint, breakpoint_type, - abundances, is_reverse, num_samples, num_control, num_treatment): +def breakpoints_to_result(utr, utr_d, breakpoints, breakpoint_type, + abundances, is_reverse, num_control, num_treatment): """ Takes in a result dictionary res and fills the necessary fields """ - long_coverage_vector = abundances[0] - short_coverage_vector = abundances[1] - num_non_zero = sum((np.array(long_coverage_vector) + np.array(short_coverage_vector)) > 0) # TODO: This introduces bias - if num_non_zero == num_samples: - percentage_long = [] - for i in range(num_samples): - ratio = float(long_coverage_vector[i]) / (long_coverage_vector[i] + short_coverage_vector[i]) # long 3'UTR percentage - percentage_long.append(ratio) + if not breakpoints: + return False + result = [] + for breakpoint, abundance in zip(breakpoints, abundances): + res = {} + long_coverage_vector = abundance[0] + short_coverage_vector = abundance[1] + percentage_long = long_coverage_vector/(long_coverage_vector+short_coverage_vector) for i in range(num_control): res["control_{i}_coverage_long".format(i=i)] = float(long_coverage_vector[i]) res["control_{i}_coverage_short".format(i=i)] = float(short_coverage_vector[i]) @@ -237,6 +256,7 @@ res["treatment_{i}_coverage_long".format(i=k)] = float(long_coverage_vector[i]) res["treatment_{i}_coverage_short".format(i=k)] = float(short_coverage_vector[i]) res["treatment_{i}_percent_long".format(i=k)] = percentage_long[i] + res["t_stat"], res["p_value"] = stat_test(percentage_long[:num_control], percentage_long[num_control:]) control_mean_percent = np.mean(np.array(percentage_long[:num_control])) treatment_mean_percent = np.mean(np.array(percentage_long[num_control:])) res["chr"] = utr_d["chr"] @@ -252,43 +272,85 @@ res["control_mean_percent"] = control_mean_percent res["treatment_mean_percent"] = treatment_mean_percent res["gene"] = utr + result.append(res) + return result -def optimize_breakpoint(utr_coverage, UTR_start, UTR_end, coverage_weigths, search_start, coverage_threshold, num_control): +def optimize_breakpoint(plot_path, utr, utr_coverage, UTR_start, UTR_end, coverage_weigths, search_start, coverage_threshold, num_control): """ We are searching for a point within the UTR that minimizes the mean squared error, if the coverage vector was divided at that point. utr_coverage is a list with items corresponding to numpy arrays of coverage for a sample. """ - search_point_end = int(abs((UTR_end - UTR_start)) * 0.1) # TODO: This is 10% of total UTR end. Why? num_samples = len(utr_coverage) - normalized_utr_coverage = np.array([coverage/ coverage_weigths[i] for i, coverage in enumerate( utr_coverage.values() )]) + normalized_utr_coverage = np.array(utr_coverage.values())/np.expand_dims(coverage_weigths, axis=1) start_coverage = [np.mean(coverage[0:99]) for coverage in utr_coverage.values()] # filters threshold on mean coverage over first 100 nt is_above_threshold = sum(np.array(start_coverage) >= coverage_threshold) >= num_samples # This filters on the raw threshold. Why? is_above_length = UTR_end - UTR_start >= 150 if (is_above_threshold) and (is_above_length): - search_end = UTR_end - UTR_start - search_point_end + search_end = UTR_end - UTR_start breakpoints = range(search_start, search_end + 1) mse_list = [ estimate_mse(normalized_utr_coverage, bp, num_samples, num_control) for bp in breakpoints ] + mse_list = [mse_list[0] for i in xrange(search_start)] + mse_list + if plot_path: + plot_coverage_breakpoint(plot_path, utr, mse_list, normalized_utr_coverage, num_control) if len(mse_list) > 0: - return mse_to_breakpoint(mse_list, normalized_utr_coverage, breakpoints, num_samples) + return mse_to_breakpoint(mse_list, normalized_utr_coverage, num_samples) return False, False, False, False -def mse_to_breakpoint(mse_list, normalized_utr_coverage, breakpoints, num_samples): +def plot_coverage_breakpoint(plot_path, utr, mse_list, normalized_utr_coverage, num_control): """ - Take in mse_list with control and treatment mse and return breakpoint and utr abundance + """ - mse_control = [mse[0] for mse in mse_list] - mse_treatment = [mse[1] for mse in mse_list] - control_index = mse_control.index(min(mse_control)) - treatment_index = mse_treatment.index(min(mse_treatment)) - control_breakpoint = breakpoints[control_index] - treatment_breakpoint = breakpoints[treatment_index] - control_abundance = estimate_abundance(normalized_utr_coverage, control_breakpoint, num_samples) - treatment_abundance = estimate_abundance(normalized_utr_coverage, treatment_breakpoint, num_samples) - return control_breakpoint, control_abundance, treatment_breakpoint, treatment_abundance + fig = plt.figure(figsize=(8, 8)) + gs = gridspec.GridSpec(2, 1) + ax1 = plt.subplot(gs[0, :]) + ax2 = plt.subplot(gs[1, :]) + ax1.set_title("mean-squared error plot") + ax1.set_ylabel("mean-squared error") + ax1.set_xlabel("nt after UTR start") + ax2.set_title("coverage plot") + ax2.set_xlabel("nt after UTR start") + ax2.set_ylabel("normalized nucleotide coverage") + mse_control = [ condition[0] for condition in mse_list] + mse_treatment = [ condition[1] for condition in mse_list] + minima_control = get_minima(np.array(mse_control)) + minima_treatment = get_minima(np.array(mse_treatment)) + control = normalized_utr_coverage[:num_control] + treatment = normalized_utr_coverage[num_control:] + ax1.plot(mse_control, "b-") + ax1.plot(mse_treatment, "r-") + [ax2.plot(cov, "b-") for cov in control] + [ax2.plot(cov, "r-") for cov in treatment] + [ax2.axvline(val, color="b", alpha=0.25) for val in minima_control] + ax2.axvline(mse_control.index(min(mse_control)), color="b", alpha=1) + [ax2.axvline(val, color="r", alpha=0.25) for val in minima_treatment] + ax2.axvline(mse_treatment.index(min(mse_treatment)), color="r", alpha=1) + fig.add_subplot(ax1) + fig.add_subplot(ax2) + gs.tight_layout(fig) + fig.savefig(os.path.join(plot_path, "{utr}.svg".format(utr=utr))) +def mse_to_breakpoint(mse_list, normalized_utr_coverage, num_samples): + """ + Take in mse_list with control and treatment mse and return breakpoint and utr abundance for all local minima + in mse_list + """ + mse_control = np.array([mse[0] for mse in mse_list]) + mse_treatment = np.array([mse[1] for mse in mse_list]) + control_breakpoints = list(get_minima(mse_control)) + treatment_breakpoints = list(get_minima(mse_treatment)) + control_abundances = [estimate_abundance(normalized_utr_coverage, bp, num_samples) for bp in control_breakpoints] + treatment_abundances = [estimate_abundance(normalized_utr_coverage, bp, num_samples) for bp in treatment_breakpoints] + return control_breakpoints, control_abundances, treatment_breakpoints, treatment_abundances + +def get_minima(a): + """ + get minima for numpy array a + """ + return np.where(np.r_[True, a[1:] < a[:-1]] & np.r_[a[:-1] < a[1:], True])[0]+1 + def estimate_mse(cov, bp, num_samples, num_control): """ get abundance of long utr vs short utr with breakpoint specifying the position of long and short utr. @@ -315,6 +377,11 @@ mean_short_utr = np.mean(short_utr_vector, 1) return mean_long_utr, mean_short_utr +def stat_test(a,b): + return stats.ttest_ind(a,b) + +def gene_str_to_link(str): + return "{str}".format(str=str) if __name__ == '__main__': args = parse_args() diff -r 73b932244237 -r a5d8b08af089 dapars.xml --- a/dapars.xml Wed Oct 28 06:22:18 2015 -0400 +++ b/dapars.xml Thu Oct 29 15:51:10 2015 -0400 @@ -1,17 +1,16 @@ - + infer de-novo alternative polyadenylation from rna-seq numpy - bedtools + scipy + matplotlib + tabulate + (make_breakpoint == True) + + (make_html == True) + + + + + + + + + + + + + + 10.1038/ncomms6274 diff -r 73b932244237 -r a5d8b08af089 test-data/100_3L.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/100_3L.gtf Thu Oct 29 15:51:10 2015 -0400 @@ -0,0 +1,100 @@ +3L FlyBase UTR 26079 26308 . + . gene_id "FBgn0052475"; gene_version "1"; transcript_id "FBtr0329896"; transcript_version "1"; gene_name "mthl8"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mthl8-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 53873 54480 . - . gene_id "FBgn0262679"; gene_version "1"; transcript_id "FBtr0305596"; transcript_version "1"; gene_name "CG43149"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG43149-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 68899 68995 . + . gene_id "FBgn0002564"; gene_version "1"; transcript_id "FBtr0072463"; transcript_version "1"; gene_name "Lsp1gamma"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Lsp1gamma-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 73945 74637 . - . gene_id "FBgn0035097"; gene_version "1"; transcript_id "FBtr0305595"; transcript_version "1"; gene_name "CG13405"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13405-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 97628 97922 . - . gene_id "FBgn0040688"; gene_version "1"; transcript_id "FBtr0072540"; transcript_version "1"; gene_name "CG12483"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG12483-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 143821 145895 . + . gene_id "FBgn0020386"; gene_version "1"; transcript_id "FBtr0072464"; transcript_version "1"; gene_name "Pdk1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Pdk1-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 148971 149056 . + . gene_id "FBgn0035099"; gene_version "1"; transcript_id "FBtr0273352"; transcript_version "1"; gene_name "CG6845"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG6845-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 149679 149807 . - . gene_id "FBgn0263988"; gene_version "1"; transcript_id "FBtr0072538"; transcript_version "1"; gene_name "Dic61B"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Dic61B-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 173909 174517 . + . gene_id "FBgn0035101"; gene_version "1"; transcript_id "FBtr0299864"; transcript_version "1"; gene_name "p130CAS"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "p130CAS-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 175112 175192 . - . gene_id "FBgn0035102"; gene_version "1"; transcript_id "FBtr0072537"; transcript_version "1"; gene_name "CG7049"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG7049-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 176306 176689 . - . gene_id "FBgn0035103"; gene_version "1"; transcript_id "FBtr0301890"; transcript_version "1"; gene_name "Vdup1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Vdup1-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 182417 182535 . + . gene_id "FBgn0035104"; gene_version "1"; transcript_id "FBtr0334068"; transcript_version "1"; gene_name "CG13875"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13875-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 182522 183272 . - . gene_id "FBgn0083976"; gene_version "1"; transcript_id "FBtr0110975"; transcript_version "1"; gene_name "CG34140"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34140-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 182522 182653 . - . gene_id "FBgn0083992"; gene_version "1"; transcript_id "FBtr0344094"; transcript_version "1"; gene_name "Mkp"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Mkp-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 185999 186310 . + . gene_id "FBgn0027786"; gene_version "1"; transcript_id "FBtr0305544"; transcript_version "1"; gene_name "Mtch"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Mtch-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 187207 188548 . - . gene_id "FBgn0035106"; gene_version "1"; transcript_id "FBtr0072532"; transcript_version "1"; gene_name "rno"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "rno-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 202032 202152 . + . gene_id "FBgn0035107"; gene_version "1"; transcript_id "FBtr0346614"; transcript_version "1"; gene_name "mri"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mri-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 202098 202213 . - . gene_id "FBgn0025592"; gene_version "1"; transcript_id "FBtr0072530"; transcript_version "1"; gene_name "Gyk"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Gyk-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 205243 205309 . + . gene_id "FBgn0024945"; gene_version "1"; transcript_id "FBtr0072483"; transcript_version "1"; gene_name "NitFhit"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "NitFhit-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 207113 207527 . + . gene_id "FBgn0035109"; gene_version "1"; transcript_id "FBtr0072484"; transcript_version "1"; gene_name "CG13876"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13876-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 207184 208113 . - . gene_id "FBgn0027587"; gene_version "1"; transcript_id "FBtr0344842"; transcript_version "1"; gene_name "CG7028"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG7028-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 211636 211697 . - . gene_id "FBgn0035110"; gene_version "1"; transcript_id "FBtr0072527"; transcript_version "1"; gene_name "thoc7"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "thoc7-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 216110 216479 . + . gene_id "FBgn0035111"; gene_version "1"; transcript_id "FBtr0072487"; transcript_version "1"; gene_name "CG16940"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG16940-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 217893 218058 . + . gene_id "FBgn0063923"; gene_version "1"; transcript_id "FBtr0100851"; transcript_version "1"; gene_name "Kaz1-ORFB"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Kaz1-ORFB-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 217932 218668 . - . gene_id "FBgn0035113"; gene_version "1"; transcript_id "FBtr0072525"; transcript_version "1"; gene_name "pyx"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "pyx-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 218591 218663 . + . gene_id "FBgn0035112"; gene_version "1"; transcript_id "FBtr0300710"; transcript_version "1"; gene_name "CG13877"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13877-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 224758 225605 . + . gene_id "FBgn0053229"; gene_version "1"; transcript_id "FBtr0345592"; transcript_version "1"; gene_name "CG33229"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG33229-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 230971 231015 . - . gene_id "FBgn0262035"; gene_version "1"; transcript_id "FBtr0303931"; transcript_version "1"; gene_name "CG42846"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG42846-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 231681 231833 . - . gene_id "FBgn0085483"; gene_version "1"; transcript_id "FBtr0112760"; transcript_version "1"; gene_name "CG34454"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34454-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 232480 232656 . - . gene_id "FBgn0085482"; gene_version "1"; transcript_id "FBtr0299518"; transcript_version "1"; gene_name "CG34453"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34453-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 233926 234298 . - . gene_id "FBgn0000541"; gene_version "1"; transcript_id "FBtr0301348"; transcript_version "1"; gene_name "E(bx)"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "E(bx)-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 249965 250305 . + . gene_id "FBgn0052476"; gene_version "1"; transcript_id "FBtr0072501"; transcript_version "1"; gene_name "mthl14"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mthl14-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 250229 250509 . - . gene_id "FBgn0024806"; gene_version "1"; transcript_id "FBtr0332109"; transcript_version "1"; gene_name "DIP2"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "DIP2-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 258998 259097 . + . gene_id "FBgn0035120"; gene_version "1"; transcript_id "FBtr0332106"; transcript_version "1"; gene_name "wac"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "wac-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 259043 259502 . - . gene_id "FBgn0035121"; gene_version "1"; transcript_id "FBtr0332107"; transcript_version "1"; gene_name "Tudor-SN"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Tudor-SN-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 265445 265932 . + . gene_id "FBgn0035122"; gene_version "1"; transcript_id "FBtr0310415"; transcript_version "1"; gene_name "mRpL17"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mRpL17-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 269120 270336 . + . gene_id "FBgn0027111"; gene_version "1"; transcript_id "FBtr0310416"; transcript_version "1"; gene_name "miple"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "miple-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 274890 275018 . + . gene_id "FBgn0029002"; gene_version "1"; transcript_id "FBtr0332839"; transcript_version "1"; gene_name "miple2"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "miple2-RG"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 271420 272209 . - . gene_id "FBgn0052845"; gene_version "1"; transcript_id "FBtr0072518"; transcript_version "1"; gene_name "CG32845"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG32845-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 276568 276667 . + . gene_id "FBgn0035124"; gene_version "1"; transcript_id "FBtr0072507"; transcript_version "1"; gene_name "ttm2"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "ttm2-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 303884 305292 . + . gene_id "FBgn0264707"; gene_version "1"; transcript_id "FBtr0333962"; transcript_version "1"; gene_name "RhoGEF3"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "RhoGEF3-RM"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 305393 306358 . - . gene_id "FBgn0004373"; gene_version "1"; transcript_id "FBtr0072516"; transcript_version "1"; gene_name "fwd"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "fwd-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 317622 317789 . + . gene_id "FBgn0085293"; gene_version "1"; transcript_id "FBtr0306849"; transcript_version "1"; gene_name "CG34264"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34264-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 321233 321928 . - . gene_id "FBgn0052344"; gene_version "1"; transcript_id "FBtr0072576"; transcript_version "1"; gene_name "CG32344"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG32344-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 324985 325235 . - . gene_id "FBgn0052343"; gene_version "1"; transcript_id "FBtr0299831"; transcript_version "1"; gene_name "Atac3"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Atac3-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 355528 356050 . + . gene_id "FBgn0261985"; gene_version "1"; transcript_id "FBtr0306563"; transcript_version "1"; gene_name "Ptpmeg"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ptpmeg-RK"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 329822 330039 . - . gene_id "FBgn0035131"; gene_version "1"; transcript_id "FBtr0072574"; transcript_version "1"; gene_name "mthl9"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mthl9-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 332801 333474 . - . gene_id "FBgn0035132"; gene_version "1"; transcript_id "FBtr0333904"; transcript_version "1"; gene_name "mthl10"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mthl10-RG"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 343378 343543 . - . gene_id "FBgn0023000"; gene_version "1"; transcript_id "FBtr0072571"; transcript_version "1"; gene_name "mth"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mth-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 357460 357664 . + . gene_id "FBgn0035134"; gene_version "1"; transcript_id "FBtr0072546"; transcript_version "1"; gene_name "CG1231"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG1231-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 357851 358496 . - . gene_id "FBgn0265574"; gene_version "1"; transcript_id "FBtr0072569"; transcript_version "1"; gene_name "Cdc5"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Cdc5-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 358400 358486 . + . gene_id "FBgn0040291"; gene_version "1"; transcript_id "FBtr0072547"; transcript_version "1"; gene_name "Roc1b"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Roc1b-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 365247 365681 . + . gene_id "FBgn0035137"; gene_version "1"; transcript_id "FBtr0072549"; transcript_version "1"; gene_name "CG1233"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG1233-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 365664 365830 . - . gene_id "FBgn0035138"; gene_version "1"; transcript_id "FBtr0072568"; transcript_version "1"; gene_name "CG13884"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13884-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 366524 367389 . - . gene_id "FBgn0262139"; gene_version "1"; transcript_id "FBtr0330146"; transcript_version "1"; gene_name "trh"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "trh-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 433146 433229 . - . gene_id "FBgn0035139"; gene_version "1"; transcript_id "FBtr0072566"; transcript_version "1"; gene_name "CG13891"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13891-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 434083 435117 . - . gene_id "FBgn0001316"; gene_version "1"; transcript_id "FBtr0110818"; transcript_version "1"; gene_name "klar"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "klar-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 500014 500076 . - . gene_id "FBgn0085296"; gene_version "1"; transcript_id "FBtr0112462"; transcript_version "1"; gene_name "CG34267"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34267-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 501340 501400 . + . gene_id "FBgn0085297"; gene_version "1"; transcript_id "FBtr0112463"; transcript_version "1"; gene_name "CG34268"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34268-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 528995 528996 . + . gene_id "FBgn0085298"; gene_version "1"; transcript_id "FBtr0112464"; transcript_version "1"; gene_name "CG34269"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34269-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 542063 542147 . + . gene_id "FBgn0035140"; gene_version "1"; transcript_id "FBtr0072550"; transcript_version "1"; gene_name "CG17180"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG17180-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 542122 542229 . - . gene_id "FBgn0035141"; gene_version "1"; transcript_id "FBtr0072564"; transcript_version "1"; gene_name "Cypl"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Cypl-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 574202 581099 . + . gene_id "FBgn0035142"; gene_version "1"; transcript_id "FBtr0072551"; transcript_version "1"; gene_name "hipk"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "hipk-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 582622 582810 . + . gene_id "FBgn0035143"; gene_version "1"; transcript_id "FBtr0072553"; transcript_version "1"; gene_name "Ppm1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ppm1-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 582824 583983 . - . gene_id "FBgn0035144"; gene_version "1"; transcript_id "FBtr0072563"; transcript_version "1"; gene_name "Kah"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Kah-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 598575 599239 . + . gene_id "FBgn0035145"; gene_version "1"; transcript_id "FBtr0344912"; transcript_version "1"; gene_name "MED14"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "MED14-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 599094 599483 . - . gene_id "FBgn0035146"; gene_version "1"; transcript_id "FBtr0072562"; transcript_version "1"; gene_name "CG13893"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13893-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 605165 605195 . - . gene_id "FBgn0016715"; gene_version "1"; transcript_id "FBtr0072561"; transcript_version "1"; gene_name "Reg-2"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Reg-2-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 620695 621645 . - . gene_id "FBgn0263042"; gene_version "1"; transcript_id "FBtr0306919"; transcript_version "1"; gene_name "CG43337"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG43337-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 651106 651311 . + . gene_id "FBgn0035147"; gene_version "1"; transcript_id "FBtr0072556"; transcript_version "1"; gene_name "Gale"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Gale-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 651542 652087 . - . gene_id "FBgn0035148"; gene_version "1"; transcript_id "FBtr0072559"; transcript_version "1"; gene_name "CG3402"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG3402-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 652987 653053 . - . gene_id "FBgn0035149"; gene_version "1"; transcript_id "FBtr0072558"; transcript_version "1"; gene_name "MED30"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "MED30-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 657795 657971 . + . gene_id "FBgn0035150"; gene_version "1"; transcript_id "FBtr0072557"; transcript_version "1"; gene_name "Rev1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Rev1-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 677879 677999 . - . gene_id "FBgn0035151"; gene_version "1"; transcript_id "FBtr0072612"; transcript_version "1"; gene_name "CG17129"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG17129-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 680436 680857 . - . gene_id "FBgn0035152"; gene_version "1"; transcript_id "FBtr0333379"; transcript_version "1"; gene_name "CG3386"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG3386-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 682885 683065 . - . gene_id "FBgn0035153"; gene_version "1"; transcript_id "FBtr0072610"; transcript_version "1"; gene_name "ebd1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "ebd1-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 685942 685990 . - . gene_id "FBgn0035154"; gene_version "1"; transcript_id "FBtr0072609"; transcript_version "1"; gene_name "CG3344"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG3344-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 688523 688584 . - . gene_id "FBgn0052483"; gene_version "1"; transcript_id "FBtr0072608"; transcript_version "1"; gene_name "CG32483"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG32483-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 691438 691951 . + . gene_id "FBgn0035155"; gene_version "1"; transcript_id "FBtr0330078"; transcript_version "1"; gene_name "RabX6"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "RabX6-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 691486 692021 . - . gene_id "FBgn0260862"; gene_version "1"; transcript_id "FBtr0072607"; transcript_version "1"; gene_name "Vti1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Vti1-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 693083 693282 . - . gene_id "FBgn0035157"; gene_version "1"; transcript_id "FBtr0072606"; transcript_version "1"; gene_name "CG13894"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13894-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 705948 706134 . - . gene_id "FBgn0035158"; gene_version "1"; transcript_id "FBtr0072605"; transcript_version "1"; gene_name "CG13895"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13895-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 708601 708709 . - . gene_id "FBgn0025676"; gene_version "1"; transcript_id "FBtr0072604"; transcript_version "1"; gene_name "CkIIalpha-i3"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CkIIalpha-i3-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 710820 710941 . - . gene_id "FBgn0035159"; gene_version "1"; transcript_id "FBtr0331770"; transcript_version "1"; gene_name "CG13896"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13896-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 737680 737863 . - . gene_id "FBgn0035160"; gene_version "1"; transcript_id "FBtr0072601"; transcript_version "1"; gene_name "hng3"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "hng3-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 752308 753492 . + . gene_id "FBgn0000575"; gene_version "1"; transcript_id "FBtr0072578"; transcript_version "1"; gene_name "emc"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "emc-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 803427 803517 . - . gene_id "FBgn0035161"; gene_version "1"; transcript_id "FBtr0072599"; transcript_version "1"; gene_name "CG13898"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13898-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 826720 827877 . - . gene_id "FBgn0035162"; gene_version "1"; transcript_id "FBtr0331404"; transcript_version "1"; gene_name "CG13900"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13900-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 835465 835515 . + . gene_id "FBgn0260755"; gene_version "1"; transcript_id "FBtr0301252"; transcript_version "1"; gene_name "CG42553"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG42553-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 836554 836770 . + . gene_id "FBgn0260756"; gene_version "1"; transcript_id "FBtr0301253"; transcript_version "1"; gene_name "CG42554"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG42554-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 836667 836782 . - . gene_id "FBgn0035164"; gene_version "1"; transcript_id "FBtr0331403"; transcript_version "1"; gene_name "CG13901"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13901-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 839433 839931 . + . gene_id "FBgn0035165"; gene_version "1"; transcript_id "FBtr0345588"; transcript_version "1"; gene_name "CG13887"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13887-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 839980 840057 . - . gene_id "FBgn0035166"; gene_version "1"; transcript_id "FBtr0331402"; transcript_version "1"; gene_name "CG13902"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13902-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 843412 843701 . + . gene_id "FBgn0035167"; gene_version "1"; transcript_id "FBtr0072585"; transcript_version "1"; gene_name "Gr61a"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Gr61a-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 851017 851080 . + . gene_id "FBgn0035168"; gene_version "1"; transcript_id "FBtr0332752"; transcript_version "1"; gene_name "CG13889"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13889-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 852349 852673 . + . gene_id "FBgn0035169"; gene_version "1"; transcript_id "FBtr0072587"; transcript_version "1"; gene_name "CG13890"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13890-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 852720 853280 . - . gene_id "FBgn0035170"; gene_version "1"; transcript_id "FBtr0072594"; transcript_version "1"; gene_name "dpr20"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "dpr20-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 870597 871701 . + . gene_id "FBgn0035171"; gene_version "1"; transcript_id "FBtr0300345"; transcript_version "1"; gene_name "CG12502"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG12502-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 892176 895313 . + . gene_id "FBgn0052479"; gene_version "1"; transcript_id "FBtr0072589"; transcript_version "1"; gene_name "CG32479"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG32479-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; +3L FlyBase UTR 877291 878452 . - . gene_id "FBgn0035173"; gene_version "1"; transcript_id "FBtr0072593"; transcript_version "1"; gene_name "CG13907"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13907-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; diff -r 73b932244237 -r a5d8b08af089 test-data/breakpoint.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/breakpoint.bed Thu Oct 29 15:51:10 2015 -0400 @@ -0,0 +1,12 @@ +3L 3371595 3371596 FBgn0027616_control_breakpoint 0 - +3L 3371497 3371498 FBgn0027616_control_breakpoint 0 - +3L 3371403 3371404 FBgn0027616_control_breakpoint 0 - +3L 3371303 3371304 FBgn0027616_control_breakpoint 0 - +3L 3371189 3371190 FBgn0027616_control_breakpoint 0 - +3L 3370936 3370937 FBgn0027616_control_breakpoint 0 - +3L 3371400 3371401 FBgn0027616_treatment_breakpoint 0 - +3L 3371361 3371362 FBgn0027616_treatment_breakpoint 0 - +3L 3371337 3371338 FBgn0027616_treatment_breakpoint 0 - +3L 3371318 3371319 FBgn0027616_treatment_breakpoint 0 - +3L 3371310 3371311 FBgn0027616_treatment_breakpoint 0 - +3L 3371195 3371196 FBgn0027616_treatment_breakpoint 0 - diff -r 73b932244237 -r a5d8b08af089 test-data/c1.bam Binary file test-data/c1.bam has changed diff -r 73b932244237 -r a5d8b08af089 test-data/c2.bam Binary file test-data/c2.bam has changed diff -r 73b932244237 -r a5d8b08af089 test-data/c3.bam Binary file test-data/c3.bam has changed diff -r 73b932244237 -r a5d8b08af089 test-data/dapars.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dapars.tab Thu Oct 29 15:51:10 2015 -0400 @@ -0,0 +1,13 @@ +#chr start end strand gene breakpoint breakpoint_type control_mean_percent treatment_mean_percent control_0_coverage_long control_1_coverage_long control_2_coverage_long treatment_0_coverage_long treatment_1_coverage_long control_0_coverage_short control_1_coverage_short control_2_coverage_short treatment_0_coverage_short treatment_1_coverage_short control_0_percent_long control_1_percent_long control_2_percent_long treatment_0_percent_long treatment_1_percent_long +3L 3370451 3371600 - FBgn0027616 3371595 control_breakpoint 0.60624805294465911 0.64703056230715239 24.737291739777078 24.785958478598335 24.72955959248966 24.77557161847185 24.75965846300016 19.115611620795107 10.209598416504893 20.530594574391877 12.110393819651023 15.022501270971024 0.56409701169337378 0.70826015293577238 0.54638699420483106 0.6716801722333523 0.62238095238095237 +3L 3370451 3371600 - FBgn0027616 3371497 control_breakpoint 0.53167446986389089 0.50722931382807834 25.17339084590076 24.83086675847875 25.108450107102474 25.34486540101996 24.121432697028514 21.00523960096197 23.72215396429967 21.520352063080335 19.645096770550406 29.349412490560265 0.54513073692966096 0.51141754702049802 0.53847512564151367 0.56334489245326924 0.45111373520288744 +3L 3370451 3371600 - FBgn0027616 3371403 control_breakpoint 0.42175237278876598 0.43968252706676059 21.43265545361876 23.24127468492176 23.840243530439217 24.668855460430212 21.796132549444817 36.722792421490546 30.085068034525737 27.88682196696674 24.845774122380487 35.388813029194914 0.36854080291232694 0.43583102646276056 0.46088528899121045 0.49821347081215273 0.38115158332136839 +3L 3370451 3371600 - FBgn0027616 3371303 control_breakpoint 0.39108916961365203 0.32605616375337437 18.144907864186806 22.269351498851368 22.18854207447699 20.899986765802232 15.578958905406301 38.471119193978524 29.819508472106396 29.989017803369812 32.691946952542786 43.853564316268944 0.32049065974808444 0.42752618335797105 0.42525066573490061 0.38998381502043045 0.2621285124863183 +3L 3370451 3371600 - FBgn0027616 3371189 control_breakpoint 0.36108879385216808 0.31348344456581922 16.42870568302662 19.697783365598664 19.006203838429983 18.29248418141787 13.92595176003268 34.958610236835646 30.910044445037187 31.76652614656724 32.650427132988575 38.05812786896196 0.31970351805583574 0.38922404334965222 0.37433882015101616 0.35907810742345286 0.26788878170818553 +3L 3370451 3371600 - FBgn0027616 3370936 control_breakpoint 0.28090300350568276 0.29742764206502253 11.46529404386468 11.683852531070386 11.717151025282648 13.766111724681437 11.199166362163194 29.811874585497957 29.72761107235841 29.714773098686205 28.924812347110773 29.914478029045522 0.27776357789494338 0.28214053584193971 0.28280489678016518 0.32245991446639399 0.27239536966365108 +3L 3370451 3371600 - FBgn0027616 3371400 treatment_breakpoint 0.42145916899796149 0.43851061373185951 21.401278818382607 23.215540467097224 23.801920635167825 24.63708242685255 21.731267864203808 36.60639625382263 30.075054318450007 27.96408571339583 24.957503263330818 35.418435688866296 0.36893874460135312 0.43564048328830879 0.45979827910422255 0.49676959861626907 0.38025162884745001 +3L 3370451 3371600 - FBgn0027616 3371361 treatment_breakpoint 0.41022686556727411 0.38864357642733532 20.50124466178987 22.696466576127253 23.216860024806003 23.358299973732187 19.390276970900924 36.68975893439792 30.434754232875903 28.951959510908406 28.548944426311216 39.85531959337436 0.3584697482587354 0.42717758467694211 0.44503326376614494 0.45000077048421888 0.32728638237045177 +3L 3370451 3371600 - FBgn0027616 3371337 treatment_breakpoint 0.40286144007500929 0.35591939407775497 19.63139192604695 22.507577656074748 22.83332278545971 22.312985873192638 17.50778335689563 37.385458192346604 30.200461900984376 29.386718364840192 30.686571411834358 42.69044233657633 0.34430860149732984 0.42702361623048329 0.4372521024972148 0.42100325014405965 0.29083553801145029 +3L 3370451 3371600 - FBgn0027616 3371318 treatment_breakpoint 0.39318546325416648 0.33313189900140355 18.647118432026687 22.269249189393072 22.414766533494532 21.35928000871836 16.143664556084484 38.416093760166575 30.221344032507872 29.892123941952075 32.28006863275775 44.079936217085454 0.32678003420525586 0.42425219115453905 0.42852416440270436 0.39820170359418766 0.26806209440861944 +3L 3370451 3371600 - FBgn0027616 3371310 treatment_breakpoint 0.39141424351796555 0.32838393490616724 18.343263676520557 22.241652724683444 22.284741160633022 21.08265781222581 15.796554450012511 38.530840978593275 30.061926839480794 29.968320926900674 32.579743373440486 44.06334722928318 0.32252399906345114 0.42524150182490184 0.42647722966554374 0.3928757816721995 0.26389208814013498 +3L 3370451 3371600 - FBgn0027616 3371195 treatment_breakpoint 0.36099881812575446 0.31267620725828044 16.37902538523203 19.824462944669694 19.116066163474617 18.379188290373293 13.918532544261875 35.29630105334693 30.91506563825953 31.81586648348289 32.752884025821615 38.4250759005065 0.31696026932118287 0.39071042830578101 0.37532575675029961 0.35944540203100878 0.2659070124855521 diff -r 73b932244237 -r a5d8b08af089 test-data/example.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/example.gtf Thu Oct 29 15:51:10 2015 -0400 @@ -0,0 +1,1 @@ +3L FlyBase UTR 3370451 3371600 . - . gene_id "FBgn0027616"; gene_version "1"; transcript_id "FBtr0073078"; transcript_version "1"; gene_name "YT521-B"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "YT521-B-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding"; diff -r 73b932244237 -r a5d8b08af089 test-data/t1.bam Binary file test-data/t1.bam has changed diff -r 73b932244237 -r a5d8b08af089 test-data/t2.bam Binary file test-data/t2.bam has changed diff -r 73b932244237 -r a5d8b08af089 test-data/t3.bam Binary file test-data/t3.bam has changed diff -r 73b932244237 -r a5d8b08af089 tool_dependencies.xml --- a/tool_dependencies.xml Wed Oct 28 06:22:18 2015 -0400 +++ b/tool_dependencies.xml Thu Oct 29 15:51:10 2015 -0400 @@ -6,4 +6,13 @@ + + + + + + + + +