# HG changeset patch
# User mvdbeek
# Date 1446148270 14400
# Node ID a5d8b08af08984885ba79ef988c914377d5985ff
# Parent 73b932244237182286674051d50c246f8b1a72be
planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
diff -r 73b932244237 -r a5d8b08af089 dapars.py
--- a/dapars.py Wed Oct 28 06:22:18 2015 -0400
+++ b/dapars.py Thu Oct 29 15:51:10 2015 -0400
@@ -2,19 +2,27 @@
import os
import csv
import numpy as np
+from scipy import stats
from collections import OrderedDict, namedtuple
import filter_utr
import subprocess
from multiprocessing import Pool
import warnings
+import matplotlib.pyplot as plt
+import matplotlib.gridspec as gridspec
+from tabulate import tabulate
+def directory_path(str):
+ if os.path.exists(str):
+ return str
+ else:
+ os.mkdir(str)
+ return str
def parse_args():
"""
Returns floating point values except for input files.
My initial approach will not filter anything. (FDR. fold_change, PDUI, Num_least ...)
- :param argv:
- :return:
"""
parser = argparse.ArgumentParser(prog='DaPars', description='Determines the usage of proximal polyA usage')
parser.add_argument("-c", "--control_alignments", nargs="+", required=True,
@@ -33,7 +41,11 @@
help="minimum coverage in each aligment to be considered for determining breakpoints")
parser.add_argument("-b", "--breakpoint_bed", required=False, type=argparse.FileType('w'),
help="Write bedfile with coordinates of breakpoint positions to supplied path.")
- parser.add_argument("-v", "--version", action='version', version='%(prog)s 0.1.5')
+ parser.add_argument("-v", "--version", action='version', version='%(prog)s 0.2.0')
+ parser.add_argument("-p", "--plot_path", default=None, required=False, type=directory_path,
+ help="If plot_path is specified will write a coverage plot for every UTR in that directory.")
+ parser.add_argument("-html", "--html_file", default=None, required=False, type=argparse.FileType('w'),
+ help="Write an html file to the specified location. Only to be used within a galaxy wrapper")
return parser.parse_args()
@@ -48,6 +60,8 @@
self.n_cpus = args.cpu
self.search_start = args.search_start
self.coverage_threshold = args.coverage_threshold
+ self.plot_path = args.plot_path
+ self.html_file = args.html_file
self.utr = args.utr_bed_file
self.gtf_fields = filter_utr.get_gtf_fields()
self.result_file = args.output_file
@@ -67,7 +81,8 @@
if args.breakpoint_bed:
self.bed_output = args.breakpoint_bed
self.write_bed()
-
+ if self.plot_path:
+ self.write_html()
def dump_utr_dict_to_bedfile(self):
w = csv.writer(open("tmp_bedfile.bed", "w"), delimiter="\t")
@@ -110,6 +125,10 @@
return coverage_dict
def get_utr_dict(self, shift):
+ """
+ The utr end is extended by UTR length * shift, to discover novel distal polyA sites.
+ Set to 0 to disable.
+ """
utr_dict = OrderedDict()
for line in self.utr:
if not line.startswith("#"):
@@ -139,11 +158,11 @@
utr_coverage.append(np.sum(vector))
coverage_per_alignment.append(utr_coverage)
coverages = np.array([ sum(x) for x in zip(*coverage_per_alignment) ])
- coverage_weights = coverages / np.mean(coverages) # TODO: proabably median is better suited?
+ coverage_weights = coverages / np.mean(coverages) # TODO: proabably median is better suited? Or even no normalization!
return coverage_weights
def get_result_tuple(self):
- static_desc = ["chr", "start", "end", "strand", "gene", "breakpoint",
+ static_desc = ["chr", "start", "end", "strand", "gene", "t_stat", "p_value", "breakpoint",
"breakpoint_type", "control_mean_percent", "treatment_mean_percent" ]
samples_desc = []
for statistic in ["coverage_long", "coverage_short", "percent_long"]:
@@ -162,18 +181,22 @@
"num_treatment":len(self.treatment_alignments),
"result_d":result_d}
pool = Pool(self.n_cpus)
- tasks = [ (self.utr_coverages[utr], utr, utr_d, self.result_tuple._fields, self.coverage_weights, self.num_samples,
- len(self.control_alignments), len(self.treatment_alignments), self.search_start,
- self.coverage_threshold) for utr, utr_d in self.utr_dict.iteritems() ]
+ tasks = [ (self.utr_coverages[utr], self.plot_path, utr, utr_d, self.coverage_weights, len(self.control_alignments),
+ len(self.treatment_alignments), self.search_start, self.coverage_threshold) \
+ for utr, utr_d in self.utr_dict.iteritems() ]
processed_tasks = [ pool.apply_async(calculate_all_utr, t) for t in tasks]
- result = [res.get() for res in processed_tasks]
- for res_control, res_treatment in result:
- if isinstance(res_control, dict):
- t = self.result_tuple(**res_control)
- result_d[res_control["gene"]+"_bp_control"] = t
- if isinstance(res_treatment, dict):
- t = self.result_tuple(**res_treatment)
- result_d[res_treatment["gene"]+"_bp_treatment"] = t
+ result_list = [res.get() for res in processed_tasks]
+ for res_control, res_treatment in result_list:
+ if not res_control:
+ continue
+ for i, result in enumerate(res_control):
+ if isinstance(result, dict):
+ t = self.result_tuple(**result)
+ result_d[result["gene"]+"_bp_control_{i}".format(i=i)] = t
+ for i, result in enumerate(res_treatment):
+ if isinstance(result, dict):
+ t = self.result_tuple(**result)
+ result_d[result["gene"]+"_bp_treatment_{i}".format(i=i)] = t
return result_d
def write_results(self):
@@ -183,51 +206,47 @@
w.writerow(header) # field header
w.writerows( self.result_d.values())
+ def write_html(self):
+ output_lines = [(gene_str_to_link(result.gene), result.breakpoint, result.breakpoint_type, result.p_value ) for result in self.result_d.itervalues()]
+ if self.html_file:
+ self.html_file.write(tabulate(output_lines, headers=["gene", "breakpoint", "breakpoint_type", "p_value"], tablefmt="html"))
+ else:
+ with open(os.path.join(self.plot_path, "index.html"), "w") as html_file:
+ html_file.write(tabulate(output_lines, headers=["gene", "breakpoint", "breakpoint_type", "p_value"], tablefmt="html"))
+
def write_bed(self):
w = csv.writer(self.bed_output, delimiter='\t')
bed = [(result.chr, result.breakpoint, int(result.breakpoint)+1, result.gene+"_"+result.breakpoint_type, 0, result.strand) for result in self.result_d.itervalues()]
w.writerows(bed)
-def calculate_all_utr(utr_coverage, utr, utr_d, result_tuple_fields, coverage_weights, num_samples, num_control,
- num_treatment, search_start, coverage_threshold):
- res_control = dict(zip(result_tuple_fields, result_tuple_fields))
- res_treatment = res_control.copy()
+def calculate_all_utr(utr_coverage, plot_path, utr, utr_d, coverage_weights, num_control, num_treatment, search_start, coverage_threshold):
if utr_d["strand"] == "+":
is_reverse = False
else:
is_reverse = True
- control_breakpoint, \
- control_abundance, \
- treatment_breakpoint, \
- treatment_abundance = optimize_breakpoint(utr_coverage, utr_d["new_start"], utr_d["new_end"], coverage_weights,
- search_start, coverage_threshold, num_control)
- if control_breakpoint:
- breakpoint_to_result(res_control, utr, utr_d, control_breakpoint, "control_breakpoint", control_abundance, is_reverse, num_samples,
+ control_breakpoints, control_abundances, treatment_breakpoints, treatment_abundances = \
+ optimize_breakpoint(plot_path, utr, utr_coverage, utr_d["new_start"], utr_d["new_end"], coverage_weights, search_start, coverage_threshold, num_control)
+ res_control = breakpoints_to_result(utr, utr_d, control_breakpoints, "control_breakpoint", control_abundances, is_reverse,
num_control, num_treatment)
- if treatment_breakpoint:
- breakpoint_to_result(res_treatment, utr, utr_d, treatment_breakpoint, "treatment_breakpoint", treatment_abundance, is_reverse,
- num_samples, num_control, num_treatment)
- if res_control == dict(zip(result_tuple_fields, result_tuple_fields)):
- res_control = False
- if res_treatment == dict(zip(result_tuple_fields, result_tuple_fields)):
- res_treatment == False
+ res_treatment = breakpoints_to_result(utr, utr_d, treatment_breakpoints, "treatment_breakpoint", treatment_abundances, is_reverse,
+ num_control, num_treatment)
return res_control, res_treatment
-def breakpoint_to_result(res, utr, utr_d, breakpoint, breakpoint_type,
- abundances, is_reverse, num_samples, num_control, num_treatment):
+def breakpoints_to_result(utr, utr_d, breakpoints, breakpoint_type,
+ abundances, is_reverse, num_control, num_treatment):
"""
Takes in a result dictionary res and fills the necessary fields
"""
- long_coverage_vector = abundances[0]
- short_coverage_vector = abundances[1]
- num_non_zero = sum((np.array(long_coverage_vector) + np.array(short_coverage_vector)) > 0) # TODO: This introduces bias
- if num_non_zero == num_samples:
- percentage_long = []
- for i in range(num_samples):
- ratio = float(long_coverage_vector[i]) / (long_coverage_vector[i] + short_coverage_vector[i]) # long 3'UTR percentage
- percentage_long.append(ratio)
+ if not breakpoints:
+ return False
+ result = []
+ for breakpoint, abundance in zip(breakpoints, abundances):
+ res = {}
+ long_coverage_vector = abundance[0]
+ short_coverage_vector = abundance[1]
+ percentage_long = long_coverage_vector/(long_coverage_vector+short_coverage_vector)
for i in range(num_control):
res["control_{i}_coverage_long".format(i=i)] = float(long_coverage_vector[i])
res["control_{i}_coverage_short".format(i=i)] = float(short_coverage_vector[i])
@@ -237,6 +256,7 @@
res["treatment_{i}_coverage_long".format(i=k)] = float(long_coverage_vector[i])
res["treatment_{i}_coverage_short".format(i=k)] = float(short_coverage_vector[i])
res["treatment_{i}_percent_long".format(i=k)] = percentage_long[i]
+ res["t_stat"], res["p_value"] = stat_test(percentage_long[:num_control], percentage_long[num_control:])
control_mean_percent = np.mean(np.array(percentage_long[:num_control]))
treatment_mean_percent = np.mean(np.array(percentage_long[num_control:]))
res["chr"] = utr_d["chr"]
@@ -252,43 +272,85 @@
res["control_mean_percent"] = control_mean_percent
res["treatment_mean_percent"] = treatment_mean_percent
res["gene"] = utr
+ result.append(res)
+ return result
-def optimize_breakpoint(utr_coverage, UTR_start, UTR_end, coverage_weigths, search_start, coverage_threshold, num_control):
+def optimize_breakpoint(plot_path, utr, utr_coverage, UTR_start, UTR_end, coverage_weigths, search_start, coverage_threshold, num_control):
"""
We are searching for a point within the UTR that minimizes the mean squared error, if the coverage vector was divided
at that point. utr_coverage is a list with items corresponding to numpy arrays of coverage for a sample.
"""
- search_point_end = int(abs((UTR_end - UTR_start)) * 0.1) # TODO: This is 10% of total UTR end. Why?
num_samples = len(utr_coverage)
- normalized_utr_coverage = np.array([coverage/ coverage_weigths[i] for i, coverage in enumerate( utr_coverage.values() )])
+ normalized_utr_coverage = np.array(utr_coverage.values())/np.expand_dims(coverage_weigths, axis=1)
start_coverage = [np.mean(coverage[0:99]) for coverage in utr_coverage.values()] # filters threshold on mean coverage over first 100 nt
is_above_threshold = sum(np.array(start_coverage) >= coverage_threshold) >= num_samples # This filters on the raw threshold. Why?
is_above_length = UTR_end - UTR_start >= 150
if (is_above_threshold) and (is_above_length):
- search_end = UTR_end - UTR_start - search_point_end
+ search_end = UTR_end - UTR_start
breakpoints = range(search_start, search_end + 1)
mse_list = [ estimate_mse(normalized_utr_coverage, bp, num_samples, num_control) for bp in breakpoints ]
+ mse_list = [mse_list[0] for i in xrange(search_start)] + mse_list
+ if plot_path:
+ plot_coverage_breakpoint(plot_path, utr, mse_list, normalized_utr_coverage, num_control)
if len(mse_list) > 0:
- return mse_to_breakpoint(mse_list, normalized_utr_coverage, breakpoints, num_samples)
+ return mse_to_breakpoint(mse_list, normalized_utr_coverage, num_samples)
return False, False, False, False
-def mse_to_breakpoint(mse_list, normalized_utr_coverage, breakpoints, num_samples):
+def plot_coverage_breakpoint(plot_path, utr, mse_list, normalized_utr_coverage, num_control):
"""
- Take in mse_list with control and treatment mse and return breakpoint and utr abundance
+
"""
- mse_control = [mse[0] for mse in mse_list]
- mse_treatment = [mse[1] for mse in mse_list]
- control_index = mse_control.index(min(mse_control))
- treatment_index = mse_treatment.index(min(mse_treatment))
- control_breakpoint = breakpoints[control_index]
- treatment_breakpoint = breakpoints[treatment_index]
- control_abundance = estimate_abundance(normalized_utr_coverage, control_breakpoint, num_samples)
- treatment_abundance = estimate_abundance(normalized_utr_coverage, treatment_breakpoint, num_samples)
- return control_breakpoint, control_abundance, treatment_breakpoint, treatment_abundance
+ fig = plt.figure(figsize=(8, 8))
+ gs = gridspec.GridSpec(2, 1)
+ ax1 = plt.subplot(gs[0, :])
+ ax2 = plt.subplot(gs[1, :])
+ ax1.set_title("mean-squared error plot")
+ ax1.set_ylabel("mean-squared error")
+ ax1.set_xlabel("nt after UTR start")
+ ax2.set_title("coverage plot")
+ ax2.set_xlabel("nt after UTR start")
+ ax2.set_ylabel("normalized nucleotide coverage")
+ mse_control = [ condition[0] for condition in mse_list]
+ mse_treatment = [ condition[1] for condition in mse_list]
+ minima_control = get_minima(np.array(mse_control))
+ minima_treatment = get_minima(np.array(mse_treatment))
+ control = normalized_utr_coverage[:num_control]
+ treatment = normalized_utr_coverage[num_control:]
+ ax1.plot(mse_control, "b-")
+ ax1.plot(mse_treatment, "r-")
+ [ax2.plot(cov, "b-") for cov in control]
+ [ax2.plot(cov, "r-") for cov in treatment]
+ [ax2.axvline(val, color="b", alpha=0.25) for val in minima_control]
+ ax2.axvline(mse_control.index(min(mse_control)), color="b", alpha=1)
+ [ax2.axvline(val, color="r", alpha=0.25) for val in minima_treatment]
+ ax2.axvline(mse_treatment.index(min(mse_treatment)), color="r", alpha=1)
+ fig.add_subplot(ax1)
+ fig.add_subplot(ax2)
+ gs.tight_layout(fig)
+ fig.savefig(os.path.join(plot_path, "{utr}.svg".format(utr=utr)))
+def mse_to_breakpoint(mse_list, normalized_utr_coverage, num_samples):
+ """
+ Take in mse_list with control and treatment mse and return breakpoint and utr abundance for all local minima
+ in mse_list
+ """
+ mse_control = np.array([mse[0] for mse in mse_list])
+ mse_treatment = np.array([mse[1] for mse in mse_list])
+ control_breakpoints = list(get_minima(mse_control))
+ treatment_breakpoints = list(get_minima(mse_treatment))
+ control_abundances = [estimate_abundance(normalized_utr_coverage, bp, num_samples) for bp in control_breakpoints]
+ treatment_abundances = [estimate_abundance(normalized_utr_coverage, bp, num_samples) for bp in treatment_breakpoints]
+ return control_breakpoints, control_abundances, treatment_breakpoints, treatment_abundances
+
+def get_minima(a):
+ """
+ get minima for numpy array a
+ """
+ return np.where(np.r_[True, a[1:] < a[:-1]] & np.r_[a[:-1] < a[1:], True])[0]+1
+
def estimate_mse(cov, bp, num_samples, num_control):
"""
get abundance of long utr vs short utr with breakpoint specifying the position of long and short utr.
@@ -315,6 +377,11 @@
mean_short_utr = np.mean(short_utr_vector, 1)
return mean_long_utr, mean_short_utr
+def stat_test(a,b):
+ return stats.ttest_ind(a,b)
+
+def gene_str_to_link(str):
+ return "{str}".format(str=str)
if __name__ == '__main__':
args = parse_args()
diff -r 73b932244237 -r a5d8b08af089 dapars.xml
--- a/dapars.xml Wed Oct 28 06:22:18 2015 -0400
+++ b/dapars.xml Thu Oct 29 15:51:10 2015 -0400
@@ -1,17 +1,16 @@
-
+
infer de-novo alternative polyadenylation from rna-seq
numpy
- bedtools
+ scipy
+ matplotlib
+ tabulate
+
(make_breakpoint == True)
+
+ (make_html == True)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
10.1038/ncomms6274
diff -r 73b932244237 -r a5d8b08af089 test-data/100_3L.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/100_3L.gtf Thu Oct 29 15:51:10 2015 -0400
@@ -0,0 +1,100 @@
+3L FlyBase UTR 26079 26308 . + . gene_id "FBgn0052475"; gene_version "1"; transcript_id "FBtr0329896"; transcript_version "1"; gene_name "mthl8"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mthl8-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 53873 54480 . - . gene_id "FBgn0262679"; gene_version "1"; transcript_id "FBtr0305596"; transcript_version "1"; gene_name "CG43149"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG43149-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 68899 68995 . + . gene_id "FBgn0002564"; gene_version "1"; transcript_id "FBtr0072463"; transcript_version "1"; gene_name "Lsp1gamma"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Lsp1gamma-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 73945 74637 . - . gene_id "FBgn0035097"; gene_version "1"; transcript_id "FBtr0305595"; transcript_version "1"; gene_name "CG13405"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13405-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 97628 97922 . - . gene_id "FBgn0040688"; gene_version "1"; transcript_id "FBtr0072540"; transcript_version "1"; gene_name "CG12483"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG12483-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 143821 145895 . + . gene_id "FBgn0020386"; gene_version "1"; transcript_id "FBtr0072464"; transcript_version "1"; gene_name "Pdk1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Pdk1-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 148971 149056 . + . gene_id "FBgn0035099"; gene_version "1"; transcript_id "FBtr0273352"; transcript_version "1"; gene_name "CG6845"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG6845-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 149679 149807 . - . gene_id "FBgn0263988"; gene_version "1"; transcript_id "FBtr0072538"; transcript_version "1"; gene_name "Dic61B"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Dic61B-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 173909 174517 . + . gene_id "FBgn0035101"; gene_version "1"; transcript_id "FBtr0299864"; transcript_version "1"; gene_name "p130CAS"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "p130CAS-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 175112 175192 . - . gene_id "FBgn0035102"; gene_version "1"; transcript_id "FBtr0072537"; transcript_version "1"; gene_name "CG7049"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG7049-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 176306 176689 . - . gene_id "FBgn0035103"; gene_version "1"; transcript_id "FBtr0301890"; transcript_version "1"; gene_name "Vdup1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Vdup1-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 182417 182535 . + . gene_id "FBgn0035104"; gene_version "1"; transcript_id "FBtr0334068"; transcript_version "1"; gene_name "CG13875"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13875-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 182522 183272 . - . gene_id "FBgn0083976"; gene_version "1"; transcript_id "FBtr0110975"; transcript_version "1"; gene_name "CG34140"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34140-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 182522 182653 . - . gene_id "FBgn0083992"; gene_version "1"; transcript_id "FBtr0344094"; transcript_version "1"; gene_name "Mkp"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Mkp-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 185999 186310 . + . gene_id "FBgn0027786"; gene_version "1"; transcript_id "FBtr0305544"; transcript_version "1"; gene_name "Mtch"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Mtch-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 187207 188548 . - . gene_id "FBgn0035106"; gene_version "1"; transcript_id "FBtr0072532"; transcript_version "1"; gene_name "rno"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "rno-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 202032 202152 . + . gene_id "FBgn0035107"; gene_version "1"; transcript_id "FBtr0346614"; transcript_version "1"; gene_name "mri"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mri-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 202098 202213 . - . gene_id "FBgn0025592"; gene_version "1"; transcript_id "FBtr0072530"; transcript_version "1"; gene_name "Gyk"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Gyk-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 205243 205309 . + . gene_id "FBgn0024945"; gene_version "1"; transcript_id "FBtr0072483"; transcript_version "1"; gene_name "NitFhit"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "NitFhit-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 207113 207527 . + . gene_id "FBgn0035109"; gene_version "1"; transcript_id "FBtr0072484"; transcript_version "1"; gene_name "CG13876"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13876-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 207184 208113 . - . gene_id "FBgn0027587"; gene_version "1"; transcript_id "FBtr0344842"; transcript_version "1"; gene_name "CG7028"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG7028-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 211636 211697 . - . gene_id "FBgn0035110"; gene_version "1"; transcript_id "FBtr0072527"; transcript_version "1"; gene_name "thoc7"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "thoc7-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 216110 216479 . + . gene_id "FBgn0035111"; gene_version "1"; transcript_id "FBtr0072487"; transcript_version "1"; gene_name "CG16940"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG16940-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 217893 218058 . + . gene_id "FBgn0063923"; gene_version "1"; transcript_id "FBtr0100851"; transcript_version "1"; gene_name "Kaz1-ORFB"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Kaz1-ORFB-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 217932 218668 . - . gene_id "FBgn0035113"; gene_version "1"; transcript_id "FBtr0072525"; transcript_version "1"; gene_name "pyx"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "pyx-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 218591 218663 . + . gene_id "FBgn0035112"; gene_version "1"; transcript_id "FBtr0300710"; transcript_version "1"; gene_name "CG13877"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13877-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 224758 225605 . + . gene_id "FBgn0053229"; gene_version "1"; transcript_id "FBtr0345592"; transcript_version "1"; gene_name "CG33229"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG33229-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 230971 231015 . - . gene_id "FBgn0262035"; gene_version "1"; transcript_id "FBtr0303931"; transcript_version "1"; gene_name "CG42846"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG42846-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 231681 231833 . - . gene_id "FBgn0085483"; gene_version "1"; transcript_id "FBtr0112760"; transcript_version "1"; gene_name "CG34454"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34454-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 232480 232656 . - . gene_id "FBgn0085482"; gene_version "1"; transcript_id "FBtr0299518"; transcript_version "1"; gene_name "CG34453"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34453-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 233926 234298 . - . gene_id "FBgn0000541"; gene_version "1"; transcript_id "FBtr0301348"; transcript_version "1"; gene_name "E(bx)"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "E(bx)-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 249965 250305 . + . gene_id "FBgn0052476"; gene_version "1"; transcript_id "FBtr0072501"; transcript_version "1"; gene_name "mthl14"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mthl14-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 250229 250509 . - . gene_id "FBgn0024806"; gene_version "1"; transcript_id "FBtr0332109"; transcript_version "1"; gene_name "DIP2"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "DIP2-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 258998 259097 . + . gene_id "FBgn0035120"; gene_version "1"; transcript_id "FBtr0332106"; transcript_version "1"; gene_name "wac"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "wac-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 259043 259502 . - . gene_id "FBgn0035121"; gene_version "1"; transcript_id "FBtr0332107"; transcript_version "1"; gene_name "Tudor-SN"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Tudor-SN-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 265445 265932 . + . gene_id "FBgn0035122"; gene_version "1"; transcript_id "FBtr0310415"; transcript_version "1"; gene_name "mRpL17"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mRpL17-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 269120 270336 . + . gene_id "FBgn0027111"; gene_version "1"; transcript_id "FBtr0310416"; transcript_version "1"; gene_name "miple"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "miple-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 274890 275018 . + . gene_id "FBgn0029002"; gene_version "1"; transcript_id "FBtr0332839"; transcript_version "1"; gene_name "miple2"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "miple2-RG"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 271420 272209 . - . gene_id "FBgn0052845"; gene_version "1"; transcript_id "FBtr0072518"; transcript_version "1"; gene_name "CG32845"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG32845-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 276568 276667 . + . gene_id "FBgn0035124"; gene_version "1"; transcript_id "FBtr0072507"; transcript_version "1"; gene_name "ttm2"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "ttm2-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 303884 305292 . + . gene_id "FBgn0264707"; gene_version "1"; transcript_id "FBtr0333962"; transcript_version "1"; gene_name "RhoGEF3"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "RhoGEF3-RM"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 305393 306358 . - . gene_id "FBgn0004373"; gene_version "1"; transcript_id "FBtr0072516"; transcript_version "1"; gene_name "fwd"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "fwd-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 317622 317789 . + . gene_id "FBgn0085293"; gene_version "1"; transcript_id "FBtr0306849"; transcript_version "1"; gene_name "CG34264"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34264-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 321233 321928 . - . gene_id "FBgn0052344"; gene_version "1"; transcript_id "FBtr0072576"; transcript_version "1"; gene_name "CG32344"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG32344-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 324985 325235 . - . gene_id "FBgn0052343"; gene_version "1"; transcript_id "FBtr0299831"; transcript_version "1"; gene_name "Atac3"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Atac3-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 355528 356050 . + . gene_id "FBgn0261985"; gene_version "1"; transcript_id "FBtr0306563"; transcript_version "1"; gene_name "Ptpmeg"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ptpmeg-RK"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 329822 330039 . - . gene_id "FBgn0035131"; gene_version "1"; transcript_id "FBtr0072574"; transcript_version "1"; gene_name "mthl9"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mthl9-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 332801 333474 . - . gene_id "FBgn0035132"; gene_version "1"; transcript_id "FBtr0333904"; transcript_version "1"; gene_name "mthl10"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mthl10-RG"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 343378 343543 . - . gene_id "FBgn0023000"; gene_version "1"; transcript_id "FBtr0072571"; transcript_version "1"; gene_name "mth"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mth-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 357460 357664 . + . gene_id "FBgn0035134"; gene_version "1"; transcript_id "FBtr0072546"; transcript_version "1"; gene_name "CG1231"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG1231-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 357851 358496 . - . gene_id "FBgn0265574"; gene_version "1"; transcript_id "FBtr0072569"; transcript_version "1"; gene_name "Cdc5"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Cdc5-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 358400 358486 . + . gene_id "FBgn0040291"; gene_version "1"; transcript_id "FBtr0072547"; transcript_version "1"; gene_name "Roc1b"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Roc1b-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 365247 365681 . + . gene_id "FBgn0035137"; gene_version "1"; transcript_id "FBtr0072549"; transcript_version "1"; gene_name "CG1233"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG1233-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 365664 365830 . - . gene_id "FBgn0035138"; gene_version "1"; transcript_id "FBtr0072568"; transcript_version "1"; gene_name "CG13884"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13884-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 366524 367389 . - . gene_id "FBgn0262139"; gene_version "1"; transcript_id "FBtr0330146"; transcript_version "1"; gene_name "trh"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "trh-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 433146 433229 . - . gene_id "FBgn0035139"; gene_version "1"; transcript_id "FBtr0072566"; transcript_version "1"; gene_name "CG13891"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13891-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 434083 435117 . - . gene_id "FBgn0001316"; gene_version "1"; transcript_id "FBtr0110818"; transcript_version "1"; gene_name "klar"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "klar-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 500014 500076 . - . gene_id "FBgn0085296"; gene_version "1"; transcript_id "FBtr0112462"; transcript_version "1"; gene_name "CG34267"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34267-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 501340 501400 . + . gene_id "FBgn0085297"; gene_version "1"; transcript_id "FBtr0112463"; transcript_version "1"; gene_name "CG34268"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34268-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 528995 528996 . + . gene_id "FBgn0085298"; gene_version "1"; transcript_id "FBtr0112464"; transcript_version "1"; gene_name "CG34269"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34269-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 542063 542147 . + . gene_id "FBgn0035140"; gene_version "1"; transcript_id "FBtr0072550"; transcript_version "1"; gene_name "CG17180"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG17180-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 542122 542229 . - . gene_id "FBgn0035141"; gene_version "1"; transcript_id "FBtr0072564"; transcript_version "1"; gene_name "Cypl"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Cypl-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 574202 581099 . + . gene_id "FBgn0035142"; gene_version "1"; transcript_id "FBtr0072551"; transcript_version "1"; gene_name "hipk"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "hipk-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 582622 582810 . + . gene_id "FBgn0035143"; gene_version "1"; transcript_id "FBtr0072553"; transcript_version "1"; gene_name "Ppm1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ppm1-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 582824 583983 . - . gene_id "FBgn0035144"; gene_version "1"; transcript_id "FBtr0072563"; transcript_version "1"; gene_name "Kah"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Kah-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 598575 599239 . + . gene_id "FBgn0035145"; gene_version "1"; transcript_id "FBtr0344912"; transcript_version "1"; gene_name "MED14"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "MED14-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 599094 599483 . - . gene_id "FBgn0035146"; gene_version "1"; transcript_id "FBtr0072562"; transcript_version "1"; gene_name "CG13893"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13893-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 605165 605195 . - . gene_id "FBgn0016715"; gene_version "1"; transcript_id "FBtr0072561"; transcript_version "1"; gene_name "Reg-2"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Reg-2-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 620695 621645 . - . gene_id "FBgn0263042"; gene_version "1"; transcript_id "FBtr0306919"; transcript_version "1"; gene_name "CG43337"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG43337-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 651106 651311 . + . gene_id "FBgn0035147"; gene_version "1"; transcript_id "FBtr0072556"; transcript_version "1"; gene_name "Gale"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Gale-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 651542 652087 . - . gene_id "FBgn0035148"; gene_version "1"; transcript_id "FBtr0072559"; transcript_version "1"; gene_name "CG3402"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG3402-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 652987 653053 . - . gene_id "FBgn0035149"; gene_version "1"; transcript_id "FBtr0072558"; transcript_version "1"; gene_name "MED30"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "MED30-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 657795 657971 . + . gene_id "FBgn0035150"; gene_version "1"; transcript_id "FBtr0072557"; transcript_version "1"; gene_name "Rev1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Rev1-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 677879 677999 . - . gene_id "FBgn0035151"; gene_version "1"; transcript_id "FBtr0072612"; transcript_version "1"; gene_name "CG17129"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG17129-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 680436 680857 . - . gene_id "FBgn0035152"; gene_version "1"; transcript_id "FBtr0333379"; transcript_version "1"; gene_name "CG3386"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG3386-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 682885 683065 . - . gene_id "FBgn0035153"; gene_version "1"; transcript_id "FBtr0072610"; transcript_version "1"; gene_name "ebd1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "ebd1-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 685942 685990 . - . gene_id "FBgn0035154"; gene_version "1"; transcript_id "FBtr0072609"; transcript_version "1"; gene_name "CG3344"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG3344-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 688523 688584 . - . gene_id "FBgn0052483"; gene_version "1"; transcript_id "FBtr0072608"; transcript_version "1"; gene_name "CG32483"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG32483-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 691438 691951 . + . gene_id "FBgn0035155"; gene_version "1"; transcript_id "FBtr0330078"; transcript_version "1"; gene_name "RabX6"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "RabX6-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 691486 692021 . - . gene_id "FBgn0260862"; gene_version "1"; transcript_id "FBtr0072607"; transcript_version "1"; gene_name "Vti1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Vti1-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 693083 693282 . - . gene_id "FBgn0035157"; gene_version "1"; transcript_id "FBtr0072606"; transcript_version "1"; gene_name "CG13894"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13894-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 705948 706134 . - . gene_id "FBgn0035158"; gene_version "1"; transcript_id "FBtr0072605"; transcript_version "1"; gene_name "CG13895"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13895-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 708601 708709 . - . gene_id "FBgn0025676"; gene_version "1"; transcript_id "FBtr0072604"; transcript_version "1"; gene_name "CkIIalpha-i3"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CkIIalpha-i3-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 710820 710941 . - . gene_id "FBgn0035159"; gene_version "1"; transcript_id "FBtr0331770"; transcript_version "1"; gene_name "CG13896"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13896-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 737680 737863 . - . gene_id "FBgn0035160"; gene_version "1"; transcript_id "FBtr0072601"; transcript_version "1"; gene_name "hng3"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "hng3-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 752308 753492 . + . gene_id "FBgn0000575"; gene_version "1"; transcript_id "FBtr0072578"; transcript_version "1"; gene_name "emc"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "emc-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 803427 803517 . - . gene_id "FBgn0035161"; gene_version "1"; transcript_id "FBtr0072599"; transcript_version "1"; gene_name "CG13898"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13898-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 826720 827877 . - . gene_id "FBgn0035162"; gene_version "1"; transcript_id "FBtr0331404"; transcript_version "1"; gene_name "CG13900"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13900-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 835465 835515 . + . gene_id "FBgn0260755"; gene_version "1"; transcript_id "FBtr0301252"; transcript_version "1"; gene_name "CG42553"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG42553-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 836554 836770 . + . gene_id "FBgn0260756"; gene_version "1"; transcript_id "FBtr0301253"; transcript_version "1"; gene_name "CG42554"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG42554-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 836667 836782 . - . gene_id "FBgn0035164"; gene_version "1"; transcript_id "FBtr0331403"; transcript_version "1"; gene_name "CG13901"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13901-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 839433 839931 . + . gene_id "FBgn0035165"; gene_version "1"; transcript_id "FBtr0345588"; transcript_version "1"; gene_name "CG13887"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13887-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 839980 840057 . - . gene_id "FBgn0035166"; gene_version "1"; transcript_id "FBtr0331402"; transcript_version "1"; gene_name "CG13902"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13902-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 843412 843701 . + . gene_id "FBgn0035167"; gene_version "1"; transcript_id "FBtr0072585"; transcript_version "1"; gene_name "Gr61a"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Gr61a-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 851017 851080 . + . gene_id "FBgn0035168"; gene_version "1"; transcript_id "FBtr0332752"; transcript_version "1"; gene_name "CG13889"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13889-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 852349 852673 . + . gene_id "FBgn0035169"; gene_version "1"; transcript_id "FBtr0072587"; transcript_version "1"; gene_name "CG13890"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13890-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 852720 853280 . - . gene_id "FBgn0035170"; gene_version "1"; transcript_id "FBtr0072594"; transcript_version "1"; gene_name "dpr20"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "dpr20-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 870597 871701 . + . gene_id "FBgn0035171"; gene_version "1"; transcript_id "FBtr0300345"; transcript_version "1"; gene_name "CG12502"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG12502-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 892176 895313 . + . gene_id "FBgn0052479"; gene_version "1"; transcript_id "FBtr0072589"; transcript_version "1"; gene_name "CG32479"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG32479-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L FlyBase UTR 877291 878452 . - . gene_id "FBgn0035173"; gene_version "1"; transcript_id "FBtr0072593"; transcript_version "1"; gene_name "CG13907"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13907-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
diff -r 73b932244237 -r a5d8b08af089 test-data/breakpoint.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/breakpoint.bed Thu Oct 29 15:51:10 2015 -0400
@@ -0,0 +1,12 @@
+3L 3371595 3371596 FBgn0027616_control_breakpoint 0 -
+3L 3371497 3371498 FBgn0027616_control_breakpoint 0 -
+3L 3371403 3371404 FBgn0027616_control_breakpoint 0 -
+3L 3371303 3371304 FBgn0027616_control_breakpoint 0 -
+3L 3371189 3371190 FBgn0027616_control_breakpoint 0 -
+3L 3370936 3370937 FBgn0027616_control_breakpoint 0 -
+3L 3371400 3371401 FBgn0027616_treatment_breakpoint 0 -
+3L 3371361 3371362 FBgn0027616_treatment_breakpoint 0 -
+3L 3371337 3371338 FBgn0027616_treatment_breakpoint 0 -
+3L 3371318 3371319 FBgn0027616_treatment_breakpoint 0 -
+3L 3371310 3371311 FBgn0027616_treatment_breakpoint 0 -
+3L 3371195 3371196 FBgn0027616_treatment_breakpoint 0 -
diff -r 73b932244237 -r a5d8b08af089 test-data/c1.bam
Binary file test-data/c1.bam has changed
diff -r 73b932244237 -r a5d8b08af089 test-data/c2.bam
Binary file test-data/c2.bam has changed
diff -r 73b932244237 -r a5d8b08af089 test-data/c3.bam
Binary file test-data/c3.bam has changed
diff -r 73b932244237 -r a5d8b08af089 test-data/dapars.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dapars.tab Thu Oct 29 15:51:10 2015 -0400
@@ -0,0 +1,13 @@
+#chr start end strand gene breakpoint breakpoint_type control_mean_percent treatment_mean_percent control_0_coverage_long control_1_coverage_long control_2_coverage_long treatment_0_coverage_long treatment_1_coverage_long control_0_coverage_short control_1_coverage_short control_2_coverage_short treatment_0_coverage_short treatment_1_coverage_short control_0_percent_long control_1_percent_long control_2_percent_long treatment_0_percent_long treatment_1_percent_long
+3L 3370451 3371600 - FBgn0027616 3371595 control_breakpoint 0.60624805294465911 0.64703056230715239 24.737291739777078 24.785958478598335 24.72955959248966 24.77557161847185 24.75965846300016 19.115611620795107 10.209598416504893 20.530594574391877 12.110393819651023 15.022501270971024 0.56409701169337378 0.70826015293577238 0.54638699420483106 0.6716801722333523 0.62238095238095237
+3L 3370451 3371600 - FBgn0027616 3371497 control_breakpoint 0.53167446986389089 0.50722931382807834 25.17339084590076 24.83086675847875 25.108450107102474 25.34486540101996 24.121432697028514 21.00523960096197 23.72215396429967 21.520352063080335 19.645096770550406 29.349412490560265 0.54513073692966096 0.51141754702049802 0.53847512564151367 0.56334489245326924 0.45111373520288744
+3L 3370451 3371600 - FBgn0027616 3371403 control_breakpoint 0.42175237278876598 0.43968252706676059 21.43265545361876 23.24127468492176 23.840243530439217 24.668855460430212 21.796132549444817 36.722792421490546 30.085068034525737 27.88682196696674 24.845774122380487 35.388813029194914 0.36854080291232694 0.43583102646276056 0.46088528899121045 0.49821347081215273 0.38115158332136839
+3L 3370451 3371600 - FBgn0027616 3371303 control_breakpoint 0.39108916961365203 0.32605616375337437 18.144907864186806 22.269351498851368 22.18854207447699 20.899986765802232 15.578958905406301 38.471119193978524 29.819508472106396 29.989017803369812 32.691946952542786 43.853564316268944 0.32049065974808444 0.42752618335797105 0.42525066573490061 0.38998381502043045 0.2621285124863183
+3L 3370451 3371600 - FBgn0027616 3371189 control_breakpoint 0.36108879385216808 0.31348344456581922 16.42870568302662 19.697783365598664 19.006203838429983 18.29248418141787 13.92595176003268 34.958610236835646 30.910044445037187 31.76652614656724 32.650427132988575 38.05812786896196 0.31970351805583574 0.38922404334965222 0.37433882015101616 0.35907810742345286 0.26788878170818553
+3L 3370451 3371600 - FBgn0027616 3370936 control_breakpoint 0.28090300350568276 0.29742764206502253 11.46529404386468 11.683852531070386 11.717151025282648 13.766111724681437 11.199166362163194 29.811874585497957 29.72761107235841 29.714773098686205 28.924812347110773 29.914478029045522 0.27776357789494338 0.28214053584193971 0.28280489678016518 0.32245991446639399 0.27239536966365108
+3L 3370451 3371600 - FBgn0027616 3371400 treatment_breakpoint 0.42145916899796149 0.43851061373185951 21.401278818382607 23.215540467097224 23.801920635167825 24.63708242685255 21.731267864203808 36.60639625382263 30.075054318450007 27.96408571339583 24.957503263330818 35.418435688866296 0.36893874460135312 0.43564048328830879 0.45979827910422255 0.49676959861626907 0.38025162884745001
+3L 3370451 3371600 - FBgn0027616 3371361 treatment_breakpoint 0.41022686556727411 0.38864357642733532 20.50124466178987 22.696466576127253 23.216860024806003 23.358299973732187 19.390276970900924 36.68975893439792 30.434754232875903 28.951959510908406 28.548944426311216 39.85531959337436 0.3584697482587354 0.42717758467694211 0.44503326376614494 0.45000077048421888 0.32728638237045177
+3L 3370451 3371600 - FBgn0027616 3371337 treatment_breakpoint 0.40286144007500929 0.35591939407775497 19.63139192604695 22.507577656074748 22.83332278545971 22.312985873192638 17.50778335689563 37.385458192346604 30.200461900984376 29.386718364840192 30.686571411834358 42.69044233657633 0.34430860149732984 0.42702361623048329 0.4372521024972148 0.42100325014405965 0.29083553801145029
+3L 3370451 3371600 - FBgn0027616 3371318 treatment_breakpoint 0.39318546325416648 0.33313189900140355 18.647118432026687 22.269249189393072 22.414766533494532 21.35928000871836 16.143664556084484 38.416093760166575 30.221344032507872 29.892123941952075 32.28006863275775 44.079936217085454 0.32678003420525586 0.42425219115453905 0.42852416440270436 0.39820170359418766 0.26806209440861944
+3L 3370451 3371600 - FBgn0027616 3371310 treatment_breakpoint 0.39141424351796555 0.32838393490616724 18.343263676520557 22.241652724683444 22.284741160633022 21.08265781222581 15.796554450012511 38.530840978593275 30.061926839480794 29.968320926900674 32.579743373440486 44.06334722928318 0.32252399906345114 0.42524150182490184 0.42647722966554374 0.3928757816721995 0.26389208814013498
+3L 3370451 3371600 - FBgn0027616 3371195 treatment_breakpoint 0.36099881812575446 0.31267620725828044 16.37902538523203 19.824462944669694 19.116066163474617 18.379188290373293 13.918532544261875 35.29630105334693 30.91506563825953 31.81586648348289 32.752884025821615 38.4250759005065 0.31696026932118287 0.39071042830578101 0.37532575675029961 0.35944540203100878 0.2659070124855521
diff -r 73b932244237 -r a5d8b08af089 test-data/example.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/example.gtf Thu Oct 29 15:51:10 2015 -0400
@@ -0,0 +1,1 @@
+3L FlyBase UTR 3370451 3371600 . - . gene_id "FBgn0027616"; gene_version "1"; transcript_id "FBtr0073078"; transcript_version "1"; gene_name "YT521-B"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "YT521-B-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
diff -r 73b932244237 -r a5d8b08af089 test-data/t1.bam
Binary file test-data/t1.bam has changed
diff -r 73b932244237 -r a5d8b08af089 test-data/t2.bam
Binary file test-data/t2.bam has changed
diff -r 73b932244237 -r a5d8b08af089 test-data/t3.bam
Binary file test-data/t3.bam has changed
diff -r 73b932244237 -r a5d8b08af089 tool_dependencies.xml
--- a/tool_dependencies.xml Wed Oct 28 06:22:18 2015 -0400
+++ b/tool_dependencies.xml Thu Oct 29 15:51:10 2015 -0400
@@ -6,4 +6,13 @@
+
+
+
+
+
+
+
+
+