changeset 5:a5d8b08af089 draft

planemo upload for repository https://github.com/mvdbeek/dapars commit deab588a5d5ec7022de63a395fbd04e415ba0a42
author mvdbeek
date Thu, 29 Oct 2015 15:51:10 -0400
parents 73b932244237
children 1cc6c9e327fb
files dapars.py dapars.xml test-data/100_3L.gtf test-data/breakpoint.bed test-data/c1.bam test-data/c2.bam test-data/c3.bam test-data/dapars.tab test-data/example.gtf test-data/t1.bam test-data/t2.bam test-data/t3.bam tool_dependencies.xml
diffstat 13 files changed, 291 insertions(+), 68 deletions(-) [+]
line wrap: on
line diff
--- a/dapars.py	Wed Oct 28 06:22:18 2015 -0400
+++ b/dapars.py	Thu Oct 29 15:51:10 2015 -0400
@@ -2,19 +2,27 @@
 import os
 import csv
 import numpy as np
+from scipy import stats
 from collections import OrderedDict, namedtuple
 import filter_utr
 import subprocess
 from multiprocessing import Pool
 import warnings
+import matplotlib.pyplot as plt
+import matplotlib.gridspec as gridspec
+from tabulate import tabulate
 
+def directory_path(str):
+    if os.path.exists(str):
+        return str
+    else:
+        os.mkdir(str)
+        return str
 
 def parse_args():
     """
     Returns floating point values except for input files.
     My initial approach will not filter anything. (FDR. fold_change, PDUI, Num_least ...)
-    :param argv:
-    :return:
     """
     parser = argparse.ArgumentParser(prog='DaPars', description='Determines the usage of proximal polyA usage')
     parser.add_argument("-c", "--control_alignments", nargs="+", required=True,
@@ -33,7 +41,11 @@
                         help="minimum coverage in each aligment to be considered for determining breakpoints")
     parser.add_argument("-b", "--breakpoint_bed", required=False, type=argparse.FileType('w'),
                         help="Write bedfile with coordinates of breakpoint positions to supplied path.")
-    parser.add_argument("-v", "--version", action='version', version='%(prog)s 0.1.5')
+    parser.add_argument("-v", "--version", action='version', version='%(prog)s 0.2.0')
+    parser.add_argument("-p", "--plot_path", default=None, required=False, type=directory_path,
+                        help="If plot_path is specified will write a coverage plot for every UTR in that directory.")
+    parser.add_argument("-html", "--html_file", default=None, required=False, type=argparse.FileType('w'),
+                        help="Write an html file to the specified location. Only to be used within a galaxy wrapper")
     return parser.parse_args()
 
 
@@ -48,6 +60,8 @@
         self.n_cpus = args.cpu
         self.search_start = args.search_start
         self.coverage_threshold = args.coverage_threshold
+        self.plot_path = args.plot_path
+        self.html_file = args.html_file
         self.utr = args.utr_bed_file
         self.gtf_fields = filter_utr.get_gtf_fields()
         self.result_file = args.output_file
@@ -67,7 +81,8 @@
         if args.breakpoint_bed:
             self.bed_output = args.breakpoint_bed
             self.write_bed()
-
+        if self.plot_path:
+            self.write_html()
 
     def dump_utr_dict_to_bedfile(self):
         w = csv.writer(open("tmp_bedfile.bed", "w"), delimiter="\t")
@@ -110,6 +125,10 @@
         return coverage_dict
 
     def get_utr_dict(self, shift):
+        """
+        The utr end is extended by UTR length * shift, to discover novel distal polyA sites.
+        Set to 0 to disable.
+        """
         utr_dict = OrderedDict()
         for line in self.utr:
             if not line.startswith("#"):
@@ -139,11 +158,11 @@
                 utr_coverage.append(np.sum(vector))
             coverage_per_alignment.append(utr_coverage)
         coverages = np.array([ sum(x) for x in zip(*coverage_per_alignment) ])
-        coverage_weights = coverages / np.mean(coverages)  # TODO: proabably median is better suited?
+        coverage_weights = coverages / np.mean(coverages)  # TODO: proabably median is better suited? Or even no normalization!
         return coverage_weights
 
     def get_result_tuple(self):
-        static_desc = ["chr", "start", "end", "strand", "gene", "breakpoint",
+        static_desc = ["chr", "start", "end", "strand", "gene", "t_stat", "p_value", "breakpoint",
                        "breakpoint_type", "control_mean_percent", "treatment_mean_percent" ]
         samples_desc = []
         for statistic in ["coverage_long", "coverage_short", "percent_long"]:
@@ -162,18 +181,22 @@
                  "num_treatment":len(self.treatment_alignments),
                  "result_d":result_d}
         pool = Pool(self.n_cpus)
-        tasks = [ (self.utr_coverages[utr], utr, utr_d, self.result_tuple._fields, self.coverage_weights, self.num_samples,
-                    len(self.control_alignments), len(self.treatment_alignments), self.search_start,
-                   self.coverage_threshold) for utr, utr_d in self.utr_dict.iteritems() ]
+        tasks = [ (self.utr_coverages[utr], self.plot_path, utr, utr_d, self.coverage_weights, len(self.control_alignments),
+                   len(self.treatment_alignments), self.search_start, self.coverage_threshold) \
+                  for utr, utr_d in self.utr_dict.iteritems() ]
         processed_tasks = [ pool.apply_async(calculate_all_utr, t) for t in tasks]
-        result = [res.get() for res in processed_tasks]
-        for res_control, res_treatment in result:
-            if isinstance(res_control, dict):
-                t = self.result_tuple(**res_control)
-                result_d[res_control["gene"]+"_bp_control"] = t
-            if isinstance(res_treatment, dict):
-                t = self.result_tuple(**res_treatment)
-                result_d[res_treatment["gene"]+"_bp_treatment"] = t
+        result_list = [res.get() for res in processed_tasks]
+        for res_control, res_treatment in result_list:
+            if not res_control:
+                continue
+            for i, result in enumerate(res_control):
+                if isinstance(result, dict):
+                    t = self.result_tuple(**result)
+                    result_d[result["gene"]+"_bp_control_{i}".format(i=i)] = t
+            for i, result in enumerate(res_treatment):
+                if isinstance(result, dict):
+                    t = self.result_tuple(**result)
+                    result_d[result["gene"]+"_bp_treatment_{i}".format(i=i)] = t
         return result_d
 
     def write_results(self):
@@ -183,51 +206,47 @@
         w.writerow(header)    # field header
         w.writerows( self.result_d.values())
 
+    def write_html(self):
+        output_lines = [(gene_str_to_link(result.gene), result.breakpoint, result.breakpoint_type, result.p_value ) for result in self.result_d.itervalues()]
+        if self.html_file:
+            self.html_file.write(tabulate(output_lines, headers=["gene", "breakpoint", "breakpoint_type", "p_value"], tablefmt="html"))
+        else:
+            with open(os.path.join(self.plot_path, "index.html"), "w") as html_file:
+                html_file.write(tabulate(output_lines, headers=["gene", "breakpoint", "breakpoint_type", "p_value"], tablefmt="html"))
+
     def write_bed(self):
         w = csv.writer(self.bed_output, delimiter='\t')
         bed = [(result.chr, result.breakpoint, int(result.breakpoint)+1, result.gene+"_"+result.breakpoint_type, 0, result.strand) for result in self.result_d.itervalues()]
         w.writerows(bed)
 
 
-def calculate_all_utr(utr_coverage, utr, utr_d, result_tuple_fields, coverage_weights, num_samples, num_control,
-                      num_treatment, search_start, coverage_threshold):
-    res_control = dict(zip(result_tuple_fields, result_tuple_fields))
-    res_treatment = res_control.copy()
+def calculate_all_utr(utr_coverage, plot_path, utr, utr_d, coverage_weights, num_control, num_treatment, search_start, coverage_threshold):
     if utr_d["strand"] == "+":
         is_reverse = False
     else:
         is_reverse = True
-    control_breakpoint, \
-    control_abundance, \
-    treatment_breakpoint, \
-    treatment_abundance  = optimize_breakpoint(utr_coverage, utr_d["new_start"], utr_d["new_end"], coverage_weights,
-                                                 search_start, coverage_threshold, num_control)
-    if control_breakpoint:
-        breakpoint_to_result(res_control, utr, utr_d, control_breakpoint, "control_breakpoint", control_abundance, is_reverse, num_samples,
+    control_breakpoints, control_abundances, treatment_breakpoints, treatment_abundances  = \
+        optimize_breakpoint(plot_path, utr, utr_coverage, utr_d["new_start"], utr_d["new_end"], coverage_weights, search_start, coverage_threshold, num_control)
+    res_control = breakpoints_to_result(utr, utr_d, control_breakpoints, "control_breakpoint", control_abundances, is_reverse,
                              num_control, num_treatment)
-    if treatment_breakpoint:
-        breakpoint_to_result(res_treatment, utr, utr_d, treatment_breakpoint, "treatment_breakpoint", treatment_abundance, is_reverse,
-                             num_samples, num_control, num_treatment)
-    if res_control == dict(zip(result_tuple_fields, result_tuple_fields)):
-        res_control = False
-    if res_treatment == dict(zip(result_tuple_fields, result_tuple_fields)):
-        res_treatment == False
+    res_treatment = breakpoints_to_result(utr, utr_d, treatment_breakpoints, "treatment_breakpoint", treatment_abundances, is_reverse,
+                             num_control, num_treatment)
     return res_control, res_treatment
 
 
-def breakpoint_to_result(res, utr, utr_d, breakpoint, breakpoint_type,
-                         abundances, is_reverse, num_samples, num_control, num_treatment):
+def breakpoints_to_result(utr, utr_d, breakpoints, breakpoint_type,
+                         abundances, is_reverse, num_control, num_treatment):
     """
     Takes in a result dictionary res and fills the necessary fields
     """
-    long_coverage_vector = abundances[0]
-    short_coverage_vector = abundances[1]
-    num_non_zero = sum((np.array(long_coverage_vector) + np.array(short_coverage_vector)) > 0)  # TODO: This introduces bias
-    if num_non_zero == num_samples:
-        percentage_long = []
-        for i in range(num_samples):
-            ratio = float(long_coverage_vector[i]) / (long_coverage_vector[i] + short_coverage_vector[i])  # long 3'UTR percentage
-            percentage_long.append(ratio)
+    if not breakpoints:
+        return False
+    result = []
+    for breakpoint, abundance in zip(breakpoints, abundances):
+        res = {}
+        long_coverage_vector = abundance[0]
+        short_coverage_vector = abundance[1]
+        percentage_long = long_coverage_vector/(long_coverage_vector+short_coverage_vector)
         for i in range(num_control):
             res["control_{i}_coverage_long".format(i=i)] = float(long_coverage_vector[i])
             res["control_{i}_coverage_short".format(i=i)] = float(short_coverage_vector[i])
@@ -237,6 +256,7 @@
             res["treatment_{i}_coverage_long".format(i=k)] = float(long_coverage_vector[i])
             res["treatment_{i}_coverage_short".format(i=k)] = float(short_coverage_vector[i])
             res["treatment_{i}_percent_long".format(i=k)] = percentage_long[i]
+        res["t_stat"], res["p_value"] = stat_test(percentage_long[:num_control], percentage_long[num_control:])
         control_mean_percent = np.mean(np.array(percentage_long[:num_control]))
         treatment_mean_percent = np.mean(np.array(percentage_long[num_control:]))
         res["chr"] = utr_d["chr"]
@@ -252,43 +272,85 @@
         res["control_mean_percent"] = control_mean_percent
         res["treatment_mean_percent"] = treatment_mean_percent
         res["gene"] = utr
+        result.append(res)
+    return result
 
 
-def optimize_breakpoint(utr_coverage, UTR_start, UTR_end, coverage_weigths, search_start, coverage_threshold, num_control):
+def optimize_breakpoint(plot_path, utr, utr_coverage, UTR_start, UTR_end, coverage_weigths, search_start, coverage_threshold, num_control):
     """
     We are searching for a point within the UTR that minimizes the mean squared error, if the coverage vector was divided
     at that point. utr_coverage is a list with items corresponding to numpy arrays of coverage for a sample.
     """
-    search_point_end = int(abs((UTR_end - UTR_start)) * 0.1)  # TODO: This is 10% of total UTR end. Why?
     num_samples = len(utr_coverage)
-    normalized_utr_coverage = np.array([coverage/ coverage_weigths[i] for i, coverage in enumerate( utr_coverage.values() )])
+    normalized_utr_coverage = np.array(utr_coverage.values())/np.expand_dims(coverage_weigths, axis=1)
     start_coverage = [np.mean(coverage[0:99]) for coverage in utr_coverage.values()]  # filters threshold on mean coverage over first 100 nt
     is_above_threshold = sum(np.array(start_coverage) >= coverage_threshold) >= num_samples  # This filters on the raw threshold. Why?
     is_above_length = UTR_end - UTR_start >= 150
     if (is_above_threshold) and (is_above_length):
-        search_end = UTR_end - UTR_start - search_point_end
+        search_end = UTR_end - UTR_start
         breakpoints = range(search_start, search_end + 1)
         mse_list = [ estimate_mse(normalized_utr_coverage, bp, num_samples, num_control) for bp in breakpoints ]
+        mse_list = [mse_list[0] for i in xrange(search_start)] + mse_list
+        if plot_path:
+            plot_coverage_breakpoint(plot_path, utr, mse_list, normalized_utr_coverage, num_control)
         if len(mse_list) > 0:
-            return mse_to_breakpoint(mse_list, normalized_utr_coverage, breakpoints, num_samples)
+            return mse_to_breakpoint(mse_list, normalized_utr_coverage, num_samples)
     return False, False, False, False
 
 
-def mse_to_breakpoint(mse_list, normalized_utr_coverage, breakpoints, num_samples):
+def plot_coverage_breakpoint(plot_path, utr, mse_list, normalized_utr_coverage, num_control):
     """
-    Take in mse_list with control and treatment mse and return breakpoint and utr abundance
+
     """
-    mse_control = [mse[0] for mse in mse_list]
-    mse_treatment = [mse[1] for mse in mse_list]
-    control_index = mse_control.index(min(mse_control))
-    treatment_index = mse_treatment.index(min(mse_treatment))
-    control_breakpoint = breakpoints[control_index]
-    treatment_breakpoint = breakpoints[treatment_index]
-    control_abundance = estimate_abundance(normalized_utr_coverage, control_breakpoint, num_samples)
-    treatment_abundance = estimate_abundance(normalized_utr_coverage, treatment_breakpoint, num_samples)
-    return control_breakpoint, control_abundance, treatment_breakpoint, treatment_abundance
+    fig = plt.figure(figsize=(8, 8))
+    gs = gridspec.GridSpec(2, 1)
+    ax1 = plt.subplot(gs[0, :])
+    ax2 = plt.subplot(gs[1, :])
+    ax1.set_title("mean-squared error plot")
+    ax1.set_ylabel("mean-squared error")
+    ax1.set_xlabel("nt after UTR start")
+    ax2.set_title("coverage plot")
+    ax2.set_xlabel("nt after UTR start")
+    ax2.set_ylabel("normalized nucleotide coverage")
+    mse_control = [ condition[0] for condition in mse_list]
+    mse_treatment = [ condition[1] for condition in mse_list]
+    minima_control = get_minima(np.array(mse_control))
+    minima_treatment = get_minima(np.array(mse_treatment))
+    control = normalized_utr_coverage[:num_control]
+    treatment = normalized_utr_coverage[num_control:]
+    ax1.plot(mse_control, "b-")
+    ax1.plot(mse_treatment, "r-")
+    [ax2.plot(cov, "b-") for cov in control]
+    [ax2.plot(cov, "r-") for cov in treatment]
+    [ax2.axvline(val, color="b", alpha=0.25) for val in minima_control]
+    ax2.axvline(mse_control.index(min(mse_control)), color="b", alpha=1)
+    [ax2.axvline(val, color="r", alpha=0.25) for val in minima_treatment]
+    ax2.axvline(mse_treatment.index(min(mse_treatment)), color="r", alpha=1)
+    fig.add_subplot(ax1)
+    fig.add_subplot(ax2)
+    gs.tight_layout(fig)
+    fig.savefig(os.path.join(plot_path, "{utr}.svg".format(utr=utr)))
 
 
+def mse_to_breakpoint(mse_list, normalized_utr_coverage, num_samples):
+    """
+    Take in mse_list with control and treatment mse and return breakpoint and utr abundance for all local minima
+    in mse_list
+    """
+    mse_control = np.array([mse[0] for mse in mse_list])
+    mse_treatment = np.array([mse[1] for mse in mse_list])
+    control_breakpoints = list(get_minima(mse_control))
+    treatment_breakpoints = list(get_minima(mse_treatment))
+    control_abundances = [estimate_abundance(normalized_utr_coverage, bp, num_samples) for bp in control_breakpoints]
+    treatment_abundances = [estimate_abundance(normalized_utr_coverage, bp, num_samples) for bp in treatment_breakpoints]
+    return control_breakpoints, control_abundances, treatment_breakpoints, treatment_abundances
+
+def get_minima(a):
+    """
+    get minima for numpy array a
+    """
+    return np.where(np.r_[True, a[1:] < a[:-1]] & np.r_[a[:-1] < a[1:], True])[0]+1
+
 def estimate_mse(cov, bp, num_samples, num_control):
     """
     get abundance of long utr vs short utr with breakpoint specifying the position of long and short utr.
@@ -315,6 +377,11 @@
         mean_short_utr = np.mean(short_utr_vector, 1)
         return mean_long_utr, mean_short_utr
 
+def stat_test(a,b):
+    return stats.ttest_ind(a,b)
+
+def gene_str_to_link(str):
+    return "<a href=\"{str}.svg\" type=\"image/svg+xml\" target=\"_blank\">{str}</a>".format(str=str)
 
 if __name__ == '__main__':
     args = parse_args()
--- a/dapars.xml	Wed Oct 28 06:22:18 2015 -0400
+++ b/dapars.xml	Thu Oct 29 15:51:10 2015 -0400
@@ -1,17 +1,16 @@
-<tool id="dapars" name="dapars" version="0.1.5">
+<tool id="dapars" name="dapars" version="0.2.0">
     <description>infer de-novo alternative polyadenylation from rna-seq</description>
     <requirements>
         <requirement type="package" version="1.9">numpy</requirement>
-        <requirement type="package" version="2.22">bedtools</requirement>
+        <requirement type="package" version="0.14">scipy</requirement>
+        <requirement type="package" version="1.4">matplotlib</requirement>
+        <requirement type="package" version="0.7.5">tabulate</requirement>
     </requirements>
     <stdio>
         <exit_code range="1:" />
     </stdio>
     <command interpreter="python"><![CDATA[
-        dapars.py -c
-        #for $c in $controls:
-            "$c"
-        #end for
+        dapars.py -c $controls
         -t
         #for $t in $treatments:
             "$t"
@@ -24,6 +23,10 @@
         #if $make_breakpoint:
             -b "$breakpoint_bed"
         #end if
+        #if $make_html:
+            -p "$html_file.files_path"
+            -html "$html_file"
+        #end if
     ]]></command>
     <inputs>
         <param type="data" name="utr" format="gtf" label="GFF file containing 3prime UTRs" help="featureType of the UTRs
@@ -33,15 +36,33 @@
         <param type="integer" name="search_start" value="100" optional="False" min="1" label="Search start" help="Search start in nucleotides downstream of the start of the UTR. Necessary to correct for proximal drops in coverage. Select 200 for humans. Genomes with short UTRs may require more prpximal search start points."/>
         <param type="float" name="coverage_threshold" value="20" optional="False" label="Coverage threshold" help="Skip the analysis of UTRs whose mean coverage is below the Coverage Threshold in any of the alignment files."/>
         <param name="make_breakpoint" type="boolean" checked="False" label="Output bedfile with breakpoint positions?"/>
+        <param name="make_html" type="boolean" checked="False" label="Output HTML table with plot for every UTR?"/>
     </inputs>
     <outputs>
         <data name="apa_sites" format="tabular" />
         <data name="breakpoint_bed" format="bed6">
             <filter>(make_breakpoint == True)</filter>
         </data>
+        <data name="html_file" format="html">
+            <filter>(make_html == True)</filter>
+        </data>
     </outputs>
+    <tests>
+        <test>
+            <param name="utr" value="example.gtf"></param>
+            <param name="controls" value="c1.bam,c2.bam,c3.bam"></param>
+            <param name="treatments" value="t1.bam,t2.bam,t3.bam"></param>
+            <param name="coverage_threshold" value="5"></param>
+            <param name="search_start" value="1"></param>
+            <param name="make_breakpoint" value="True"></param>
+            <param name="make_html" value="True"></param>
+            <output name="apa_sites" file="dapars.tab"></output>
+            <output name="breakpoint_bed" file="breakpoint.bed"></output>
+        </test>
+    </tests>
     <help><![CDATA[
-        TODO: Fill in help.
+        DaPars works on RNAseq aligment files to find drops of coverage within UTRs. The coverage is then divided into
+        proximal and distal, and the ratio is calculated for each sample.
     ]]></help>
     <citations>
         <citation type="doi">10.1038/ncomms6274</citation>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/100_3L.gtf	Thu Oct 29 15:51:10 2015 -0400
@@ -0,0 +1,100 @@
+3L	FlyBase	UTR	26079	26308	.	+	.	gene_id "FBgn0052475"; gene_version "1"; transcript_id "FBtr0329896"; transcript_version "1"; gene_name "mthl8"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mthl8-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	53873	54480	.	-	.	gene_id "FBgn0262679"; gene_version "1"; transcript_id "FBtr0305596"; transcript_version "1"; gene_name "CG43149"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG43149-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	68899	68995	.	+	.	gene_id "FBgn0002564"; gene_version "1"; transcript_id "FBtr0072463"; transcript_version "1"; gene_name "Lsp1gamma"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Lsp1gamma-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	73945	74637	.	-	.	gene_id "FBgn0035097"; gene_version "1"; transcript_id "FBtr0305595"; transcript_version "1"; gene_name "CG13405"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13405-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	97628	97922	.	-	.	gene_id "FBgn0040688"; gene_version "1"; transcript_id "FBtr0072540"; transcript_version "1"; gene_name "CG12483"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG12483-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	143821	145895	.	+	.	gene_id "FBgn0020386"; gene_version "1"; transcript_id "FBtr0072464"; transcript_version "1"; gene_name "Pdk1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Pdk1-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	148971	149056	.	+	.	gene_id "FBgn0035099"; gene_version "1"; transcript_id "FBtr0273352"; transcript_version "1"; gene_name "CG6845"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG6845-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	149679	149807	.	-	.	gene_id "FBgn0263988"; gene_version "1"; transcript_id "FBtr0072538"; transcript_version "1"; gene_name "Dic61B"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Dic61B-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	173909	174517	.	+	.	gene_id "FBgn0035101"; gene_version "1"; transcript_id "FBtr0299864"; transcript_version "1"; gene_name "p130CAS"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "p130CAS-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	175112	175192	.	-	.	gene_id "FBgn0035102"; gene_version "1"; transcript_id "FBtr0072537"; transcript_version "1"; gene_name "CG7049"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG7049-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	176306	176689	.	-	.	gene_id "FBgn0035103"; gene_version "1"; transcript_id "FBtr0301890"; transcript_version "1"; gene_name "Vdup1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Vdup1-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	182417	182535	.	+	.	gene_id "FBgn0035104"; gene_version "1"; transcript_id "FBtr0334068"; transcript_version "1"; gene_name "CG13875"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13875-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	182522	183272	.	-	.	gene_id "FBgn0083976"; gene_version "1"; transcript_id "FBtr0110975"; transcript_version "1"; gene_name "CG34140"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34140-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	182522	182653	.	-	.	gene_id "FBgn0083992"; gene_version "1"; transcript_id "FBtr0344094"; transcript_version "1"; gene_name "Mkp"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Mkp-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	185999	186310	.	+	.	gene_id "FBgn0027786"; gene_version "1"; transcript_id "FBtr0305544"; transcript_version "1"; gene_name "Mtch"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Mtch-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	187207	188548	.	-	.	gene_id "FBgn0035106"; gene_version "1"; transcript_id "FBtr0072532"; transcript_version "1"; gene_name "rno"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "rno-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	202032	202152	.	+	.	gene_id "FBgn0035107"; gene_version "1"; transcript_id "FBtr0346614"; transcript_version "1"; gene_name "mri"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mri-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	202098	202213	.	-	.	gene_id "FBgn0025592"; gene_version "1"; transcript_id "FBtr0072530"; transcript_version "1"; gene_name "Gyk"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Gyk-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	205243	205309	.	+	.	gene_id "FBgn0024945"; gene_version "1"; transcript_id "FBtr0072483"; transcript_version "1"; gene_name "NitFhit"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "NitFhit-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	207113	207527	.	+	.	gene_id "FBgn0035109"; gene_version "1"; transcript_id "FBtr0072484"; transcript_version "1"; gene_name "CG13876"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13876-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	207184	208113	.	-	.	gene_id "FBgn0027587"; gene_version "1"; transcript_id "FBtr0344842"; transcript_version "1"; gene_name "CG7028"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG7028-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	211636	211697	.	-	.	gene_id "FBgn0035110"; gene_version "1"; transcript_id "FBtr0072527"; transcript_version "1"; gene_name "thoc7"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "thoc7-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	216110	216479	.	+	.	gene_id "FBgn0035111"; gene_version "1"; transcript_id "FBtr0072487"; transcript_version "1"; gene_name "CG16940"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG16940-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	217893	218058	.	+	.	gene_id "FBgn0063923"; gene_version "1"; transcript_id "FBtr0100851"; transcript_version "1"; gene_name "Kaz1-ORFB"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Kaz1-ORFB-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	217932	218668	.	-	.	gene_id "FBgn0035113"; gene_version "1"; transcript_id "FBtr0072525"; transcript_version "1"; gene_name "pyx"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "pyx-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	218591	218663	.	+	.	gene_id "FBgn0035112"; gene_version "1"; transcript_id "FBtr0300710"; transcript_version "1"; gene_name "CG13877"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13877-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	224758	225605	.	+	.	gene_id "FBgn0053229"; gene_version "1"; transcript_id "FBtr0345592"; transcript_version "1"; gene_name "CG33229"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG33229-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	230971	231015	.	-	.	gene_id "FBgn0262035"; gene_version "1"; transcript_id "FBtr0303931"; transcript_version "1"; gene_name "CG42846"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG42846-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	231681	231833	.	-	.	gene_id "FBgn0085483"; gene_version "1"; transcript_id "FBtr0112760"; transcript_version "1"; gene_name "CG34454"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34454-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	232480	232656	.	-	.	gene_id "FBgn0085482"; gene_version "1"; transcript_id "FBtr0299518"; transcript_version "1"; gene_name "CG34453"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34453-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	233926	234298	.	-	.	gene_id "FBgn0000541"; gene_version "1"; transcript_id "FBtr0301348"; transcript_version "1"; gene_name "E(bx)"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "E(bx)-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	249965	250305	.	+	.	gene_id "FBgn0052476"; gene_version "1"; transcript_id "FBtr0072501"; transcript_version "1"; gene_name "mthl14"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mthl14-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	250229	250509	.	-	.	gene_id "FBgn0024806"; gene_version "1"; transcript_id "FBtr0332109"; transcript_version "1"; gene_name "DIP2"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "DIP2-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	258998	259097	.	+	.	gene_id "FBgn0035120"; gene_version "1"; transcript_id "FBtr0332106"; transcript_version "1"; gene_name "wac"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "wac-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	259043	259502	.	-	.	gene_id "FBgn0035121"; gene_version "1"; transcript_id "FBtr0332107"; transcript_version "1"; gene_name "Tudor-SN"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Tudor-SN-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	265445	265932	.	+	.	gene_id "FBgn0035122"; gene_version "1"; transcript_id "FBtr0310415"; transcript_version "1"; gene_name "mRpL17"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mRpL17-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	269120	270336	.	+	.	gene_id "FBgn0027111"; gene_version "1"; transcript_id "FBtr0310416"; transcript_version "1"; gene_name "miple"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "miple-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	274890	275018	.	+	.	gene_id "FBgn0029002"; gene_version "1"; transcript_id "FBtr0332839"; transcript_version "1"; gene_name "miple2"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "miple2-RG"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	271420	272209	.	-	.	gene_id "FBgn0052845"; gene_version "1"; transcript_id "FBtr0072518"; transcript_version "1"; gene_name "CG32845"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG32845-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	276568	276667	.	+	.	gene_id "FBgn0035124"; gene_version "1"; transcript_id "FBtr0072507"; transcript_version "1"; gene_name "ttm2"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "ttm2-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	303884	305292	.	+	.	gene_id "FBgn0264707"; gene_version "1"; transcript_id "FBtr0333962"; transcript_version "1"; gene_name "RhoGEF3"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "RhoGEF3-RM"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	305393	306358	.	-	.	gene_id "FBgn0004373"; gene_version "1"; transcript_id "FBtr0072516"; transcript_version "1"; gene_name "fwd"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "fwd-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	317622	317789	.	+	.	gene_id "FBgn0085293"; gene_version "1"; transcript_id "FBtr0306849"; transcript_version "1"; gene_name "CG34264"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34264-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	321233	321928	.	-	.	gene_id "FBgn0052344"; gene_version "1"; transcript_id "FBtr0072576"; transcript_version "1"; gene_name "CG32344"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG32344-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	324985	325235	.	-	.	gene_id "FBgn0052343"; gene_version "1"; transcript_id "FBtr0299831"; transcript_version "1"; gene_name "Atac3"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Atac3-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	355528	356050	.	+	.	gene_id "FBgn0261985"; gene_version "1"; transcript_id "FBtr0306563"; transcript_version "1"; gene_name "Ptpmeg"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ptpmeg-RK"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	329822	330039	.	-	.	gene_id "FBgn0035131"; gene_version "1"; transcript_id "FBtr0072574"; transcript_version "1"; gene_name "mthl9"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mthl9-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	332801	333474	.	-	.	gene_id "FBgn0035132"; gene_version "1"; transcript_id "FBtr0333904"; transcript_version "1"; gene_name "mthl10"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mthl10-RG"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	343378	343543	.	-	.	gene_id "FBgn0023000"; gene_version "1"; transcript_id "FBtr0072571"; transcript_version "1"; gene_name "mth"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "mth-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	357460	357664	.	+	.	gene_id "FBgn0035134"; gene_version "1"; transcript_id "FBtr0072546"; transcript_version "1"; gene_name "CG1231"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG1231-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	357851	358496	.	-	.	gene_id "FBgn0265574"; gene_version "1"; transcript_id "FBtr0072569"; transcript_version "1"; gene_name "Cdc5"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Cdc5-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	358400	358486	.	+	.	gene_id "FBgn0040291"; gene_version "1"; transcript_id "FBtr0072547"; transcript_version "1"; gene_name "Roc1b"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Roc1b-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	365247	365681	.	+	.	gene_id "FBgn0035137"; gene_version "1"; transcript_id "FBtr0072549"; transcript_version "1"; gene_name "CG1233"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG1233-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	365664	365830	.	-	.	gene_id "FBgn0035138"; gene_version "1"; transcript_id "FBtr0072568"; transcript_version "1"; gene_name "CG13884"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13884-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	366524	367389	.	-	.	gene_id "FBgn0262139"; gene_version "1"; transcript_id "FBtr0330146"; transcript_version "1"; gene_name "trh"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "trh-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	433146	433229	.	-	.	gene_id "FBgn0035139"; gene_version "1"; transcript_id "FBtr0072566"; transcript_version "1"; gene_name "CG13891"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13891-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	434083	435117	.	-	.	gene_id "FBgn0001316"; gene_version "1"; transcript_id "FBtr0110818"; transcript_version "1"; gene_name "klar"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "klar-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	500014	500076	.	-	.	gene_id "FBgn0085296"; gene_version "1"; transcript_id "FBtr0112462"; transcript_version "1"; gene_name "CG34267"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34267-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	501340	501400	.	+	.	gene_id "FBgn0085297"; gene_version "1"; transcript_id "FBtr0112463"; transcript_version "1"; gene_name "CG34268"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34268-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	528995	528996	.	+	.	gene_id "FBgn0085298"; gene_version "1"; transcript_id "FBtr0112464"; transcript_version "1"; gene_name "CG34269"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG34269-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	542063	542147	.	+	.	gene_id "FBgn0035140"; gene_version "1"; transcript_id "FBtr0072550"; transcript_version "1"; gene_name "CG17180"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG17180-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	542122	542229	.	-	.	gene_id "FBgn0035141"; gene_version "1"; transcript_id "FBtr0072564"; transcript_version "1"; gene_name "Cypl"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Cypl-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	574202	581099	.	+	.	gene_id "FBgn0035142"; gene_version "1"; transcript_id "FBtr0072551"; transcript_version "1"; gene_name "hipk"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "hipk-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	582622	582810	.	+	.	gene_id "FBgn0035143"; gene_version "1"; transcript_id "FBtr0072553"; transcript_version "1"; gene_name "Ppm1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Ppm1-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	582824	583983	.	-	.	gene_id "FBgn0035144"; gene_version "1"; transcript_id "FBtr0072563"; transcript_version "1"; gene_name "Kah"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Kah-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	598575	599239	.	+	.	gene_id "FBgn0035145"; gene_version "1"; transcript_id "FBtr0344912"; transcript_version "1"; gene_name "MED14"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "MED14-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	599094	599483	.	-	.	gene_id "FBgn0035146"; gene_version "1"; transcript_id "FBtr0072562"; transcript_version "1"; gene_name "CG13893"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13893-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	605165	605195	.	-	.	gene_id "FBgn0016715"; gene_version "1"; transcript_id "FBtr0072561"; transcript_version "1"; gene_name "Reg-2"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Reg-2-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	620695	621645	.	-	.	gene_id "FBgn0263042"; gene_version "1"; transcript_id "FBtr0306919"; transcript_version "1"; gene_name "CG43337"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG43337-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	651106	651311	.	+	.	gene_id "FBgn0035147"; gene_version "1"; transcript_id "FBtr0072556"; transcript_version "1"; gene_name "Gale"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Gale-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	651542	652087	.	-	.	gene_id "FBgn0035148"; gene_version "1"; transcript_id "FBtr0072559"; transcript_version "1"; gene_name "CG3402"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG3402-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	652987	653053	.	-	.	gene_id "FBgn0035149"; gene_version "1"; transcript_id "FBtr0072558"; transcript_version "1"; gene_name "MED30"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "MED30-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	657795	657971	.	+	.	gene_id "FBgn0035150"; gene_version "1"; transcript_id "FBtr0072557"; transcript_version "1"; gene_name "Rev1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Rev1-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	677879	677999	.	-	.	gene_id "FBgn0035151"; gene_version "1"; transcript_id "FBtr0072612"; transcript_version "1"; gene_name "CG17129"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG17129-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	680436	680857	.	-	.	gene_id "FBgn0035152"; gene_version "1"; transcript_id "FBtr0333379"; transcript_version "1"; gene_name "CG3386"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG3386-RD"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	682885	683065	.	-	.	gene_id "FBgn0035153"; gene_version "1"; transcript_id "FBtr0072610"; transcript_version "1"; gene_name "ebd1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "ebd1-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	685942	685990	.	-	.	gene_id "FBgn0035154"; gene_version "1"; transcript_id "FBtr0072609"; transcript_version "1"; gene_name "CG3344"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG3344-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	688523	688584	.	-	.	gene_id "FBgn0052483"; gene_version "1"; transcript_id "FBtr0072608"; transcript_version "1"; gene_name "CG32483"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG32483-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	691438	691951	.	+	.	gene_id "FBgn0035155"; gene_version "1"; transcript_id "FBtr0330078"; transcript_version "1"; gene_name "RabX6"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "RabX6-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	691486	692021	.	-	.	gene_id "FBgn0260862"; gene_version "1"; transcript_id "FBtr0072607"; transcript_version "1"; gene_name "Vti1"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Vti1-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	693083	693282	.	-	.	gene_id "FBgn0035157"; gene_version "1"; transcript_id "FBtr0072606"; transcript_version "1"; gene_name "CG13894"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13894-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	705948	706134	.	-	.	gene_id "FBgn0035158"; gene_version "1"; transcript_id "FBtr0072605"; transcript_version "1"; gene_name "CG13895"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13895-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	708601	708709	.	-	.	gene_id "FBgn0025676"; gene_version "1"; transcript_id "FBtr0072604"; transcript_version "1"; gene_name "CkIIalpha-i3"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CkIIalpha-i3-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	710820	710941	.	-	.	gene_id "FBgn0035159"; gene_version "1"; transcript_id "FBtr0331770"; transcript_version "1"; gene_name "CG13896"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13896-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	737680	737863	.	-	.	gene_id "FBgn0035160"; gene_version "1"; transcript_id "FBtr0072601"; transcript_version "1"; gene_name "hng3"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "hng3-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	752308	753492	.	+	.	gene_id "FBgn0000575"; gene_version "1"; transcript_id "FBtr0072578"; transcript_version "1"; gene_name "emc"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "emc-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	803427	803517	.	-	.	gene_id "FBgn0035161"; gene_version "1"; transcript_id "FBtr0072599"; transcript_version "1"; gene_name "CG13898"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13898-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	826720	827877	.	-	.	gene_id "FBgn0035162"; gene_version "1"; transcript_id "FBtr0331404"; transcript_version "1"; gene_name "CG13900"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13900-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	835465	835515	.	+	.	gene_id "FBgn0260755"; gene_version "1"; transcript_id "FBtr0301252"; transcript_version "1"; gene_name "CG42553"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG42553-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	836554	836770	.	+	.	gene_id "FBgn0260756"; gene_version "1"; transcript_id "FBtr0301253"; transcript_version "1"; gene_name "CG42554"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG42554-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	836667	836782	.	-	.	gene_id "FBgn0035164"; gene_version "1"; transcript_id "FBtr0331403"; transcript_version "1"; gene_name "CG13901"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13901-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	839433	839931	.	+	.	gene_id "FBgn0035165"; gene_version "1"; transcript_id "FBtr0345588"; transcript_version "1"; gene_name "CG13887"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13887-RE"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	839980	840057	.	-	.	gene_id "FBgn0035166"; gene_version "1"; transcript_id "FBtr0331402"; transcript_version "1"; gene_name "CG13902"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13902-RC"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	843412	843701	.	+	.	gene_id "FBgn0035167"; gene_version "1"; transcript_id "FBtr0072585"; transcript_version "1"; gene_name "Gr61a"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "Gr61a-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	851017	851080	.	+	.	gene_id "FBgn0035168"; gene_version "1"; transcript_id "FBtr0332752"; transcript_version "1"; gene_name "CG13889"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13889-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	852349	852673	.	+	.	gene_id "FBgn0035169"; gene_version "1"; transcript_id "FBtr0072587"; transcript_version "1"; gene_name "CG13890"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13890-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	852720	853280	.	-	.	gene_id "FBgn0035170"; gene_version "1"; transcript_id "FBtr0072594"; transcript_version "1"; gene_name "dpr20"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "dpr20-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	870597	871701	.	+	.	gene_id "FBgn0035171"; gene_version "1"; transcript_id "FBtr0300345"; transcript_version "1"; gene_name "CG12502"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG12502-RB"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	892176	895313	.	+	.	gene_id "FBgn0052479"; gene_version "1"; transcript_id "FBtr0072589"; transcript_version "1"; gene_name "CG32479"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG32479-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
+3L	FlyBase	UTR	877291	878452	.	-	.	gene_id "FBgn0035173"; gene_version "1"; transcript_id "FBtr0072593"; transcript_version "1"; gene_name "CG13907"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "CG13907-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/breakpoint.bed	Thu Oct 29 15:51:10 2015 -0400
@@ -0,0 +1,12 @@
+3L	3371595	3371596	FBgn0027616_control_breakpoint	0	-
+3L	3371497	3371498	FBgn0027616_control_breakpoint	0	-
+3L	3371403	3371404	FBgn0027616_control_breakpoint	0	-
+3L	3371303	3371304	FBgn0027616_control_breakpoint	0	-
+3L	3371189	3371190	FBgn0027616_control_breakpoint	0	-
+3L	3370936	3370937	FBgn0027616_control_breakpoint	0	-
+3L	3371400	3371401	FBgn0027616_treatment_breakpoint	0	-
+3L	3371361	3371362	FBgn0027616_treatment_breakpoint	0	-
+3L	3371337	3371338	FBgn0027616_treatment_breakpoint	0	-
+3L	3371318	3371319	FBgn0027616_treatment_breakpoint	0	-
+3L	3371310	3371311	FBgn0027616_treatment_breakpoint	0	-
+3L	3371195	3371196	FBgn0027616_treatment_breakpoint	0	-
Binary file test-data/c1.bam has changed
Binary file test-data/c2.bam has changed
Binary file test-data/c3.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dapars.tab	Thu Oct 29 15:51:10 2015 -0400
@@ -0,0 +1,13 @@
+#chr	start	end	strand	gene	breakpoint	breakpoint_type	control_mean_percent	treatment_mean_percent	control_0_coverage_long	control_1_coverage_long	control_2_coverage_long	treatment_0_coverage_long	treatment_1_coverage_long	control_0_coverage_short	control_1_coverage_short	control_2_coverage_short	treatment_0_coverage_short	treatment_1_coverage_short	control_0_percent_long	control_1_percent_long	control_2_percent_long	treatment_0_percent_long	treatment_1_percent_long
+3L	3370451	3371600	-	FBgn0027616	3371595	control_breakpoint	0.60624805294465911	0.64703056230715239	24.737291739777078	24.785958478598335	24.72955959248966	24.77557161847185	24.75965846300016	19.115611620795107	10.209598416504893	20.530594574391877	12.110393819651023	15.022501270971024	0.56409701169337378	0.70826015293577238	0.54638699420483106	0.6716801722333523	0.62238095238095237
+3L	3370451	3371600	-	FBgn0027616	3371497	control_breakpoint	0.53167446986389089	0.50722931382807834	25.17339084590076	24.83086675847875	25.108450107102474	25.34486540101996	24.121432697028514	21.00523960096197	23.72215396429967	21.520352063080335	19.645096770550406	29.349412490560265	0.54513073692966096	0.51141754702049802	0.53847512564151367	0.56334489245326924	0.45111373520288744
+3L	3370451	3371600	-	FBgn0027616	3371403	control_breakpoint	0.42175237278876598	0.43968252706676059	21.43265545361876	23.24127468492176	23.840243530439217	24.668855460430212	21.796132549444817	36.722792421490546	30.085068034525737	27.88682196696674	24.845774122380487	35.388813029194914	0.36854080291232694	0.43583102646276056	0.46088528899121045	0.49821347081215273	0.38115158332136839
+3L	3370451	3371600	-	FBgn0027616	3371303	control_breakpoint	0.39108916961365203	0.32605616375337437	18.144907864186806	22.269351498851368	22.18854207447699	20.899986765802232	15.578958905406301	38.471119193978524	29.819508472106396	29.989017803369812	32.691946952542786	43.853564316268944	0.32049065974808444	0.42752618335797105	0.42525066573490061	0.38998381502043045	0.2621285124863183
+3L	3370451	3371600	-	FBgn0027616	3371189	control_breakpoint	0.36108879385216808	0.31348344456581922	16.42870568302662	19.697783365598664	19.006203838429983	18.29248418141787	13.92595176003268	34.958610236835646	30.910044445037187	31.76652614656724	32.650427132988575	38.05812786896196	0.31970351805583574	0.38922404334965222	0.37433882015101616	0.35907810742345286	0.26788878170818553
+3L	3370451	3371600	-	FBgn0027616	3370936	control_breakpoint	0.28090300350568276	0.29742764206502253	11.46529404386468	11.683852531070386	11.717151025282648	13.766111724681437	11.199166362163194	29.811874585497957	29.72761107235841	29.714773098686205	28.924812347110773	29.914478029045522	0.27776357789494338	0.28214053584193971	0.28280489678016518	0.32245991446639399	0.27239536966365108
+3L	3370451	3371600	-	FBgn0027616	3371400	treatment_breakpoint	0.42145916899796149	0.43851061373185951	21.401278818382607	23.215540467097224	23.801920635167825	24.63708242685255	21.731267864203808	36.60639625382263	30.075054318450007	27.96408571339583	24.957503263330818	35.418435688866296	0.36893874460135312	0.43564048328830879	0.45979827910422255	0.49676959861626907	0.38025162884745001
+3L	3370451	3371600	-	FBgn0027616	3371361	treatment_breakpoint	0.41022686556727411	0.38864357642733532	20.50124466178987	22.696466576127253	23.216860024806003	23.358299973732187	19.390276970900924	36.68975893439792	30.434754232875903	28.951959510908406	28.548944426311216	39.85531959337436	0.3584697482587354	0.42717758467694211	0.44503326376614494	0.45000077048421888	0.32728638237045177
+3L	3370451	3371600	-	FBgn0027616	3371337	treatment_breakpoint	0.40286144007500929	0.35591939407775497	19.63139192604695	22.507577656074748	22.83332278545971	22.312985873192638	17.50778335689563	37.385458192346604	30.200461900984376	29.386718364840192	30.686571411834358	42.69044233657633	0.34430860149732984	0.42702361623048329	0.4372521024972148	0.42100325014405965	0.29083553801145029
+3L	3370451	3371600	-	FBgn0027616	3371318	treatment_breakpoint	0.39318546325416648	0.33313189900140355	18.647118432026687	22.269249189393072	22.414766533494532	21.35928000871836	16.143664556084484	38.416093760166575	30.221344032507872	29.892123941952075	32.28006863275775	44.079936217085454	0.32678003420525586	0.42425219115453905	0.42852416440270436	0.39820170359418766	0.26806209440861944
+3L	3370451	3371600	-	FBgn0027616	3371310	treatment_breakpoint	0.39141424351796555	0.32838393490616724	18.343263676520557	22.241652724683444	22.284741160633022	21.08265781222581	15.796554450012511	38.530840978593275	30.061926839480794	29.968320926900674	32.579743373440486	44.06334722928318	0.32252399906345114	0.42524150182490184	0.42647722966554374	0.3928757816721995	0.26389208814013498
+3L	3370451	3371600	-	FBgn0027616	3371195	treatment_breakpoint	0.36099881812575446	0.31267620725828044	16.37902538523203	19.824462944669694	19.116066163474617	18.379188290373293	13.918532544261875	35.29630105334693	30.91506563825953	31.81586648348289	32.752884025821615	38.4250759005065	0.31696026932118287	0.39071042830578101	0.37532575675029961	0.35944540203100878	0.2659070124855521
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/example.gtf	Thu Oct 29 15:51:10 2015 -0400
@@ -0,0 +1,1 @@
+3L	FlyBase	UTR	3370451	3371600	.	-	.	gene_id "FBgn0027616"; gene_version "1"; transcript_id "FBtr0073078"; transcript_version "1"; gene_name "YT521-B"; gene_source "FlyBase"; gene_biotype "protein_coding"; transcript_name "YT521-B-RA"; transcript_source "FlyBase"; transcript_biotype "protein_coding";
Binary file test-data/t1.bam has changed
Binary file test-data/t2.bam has changed
Binary file test-data/t3.bam has changed
--- a/tool_dependencies.xml	Wed Oct 28 06:22:18 2015 -0400
+++ b/tool_dependencies.xml	Thu Oct 29 15:51:10 2015 -0400
@@ -6,4 +6,13 @@
     <package name="numpy" version="1.9">
         <repository changeset_revision="816d3480b0b1" name="package_numpy_1_9" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
     </package>
+    <package name="scipy" version="0.14">
+        <repository changeset_revision="01f0415771cf" name="package_scipy_0_14" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="matplotlib" version="1.4">
+        <repository changeset_revision="fb53242ebfbe" name="package_matplotlib_1_4" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="tabulate" version="0.7.5">
+        <repository changeset_revision="78951742defa" name="package_tabulate_0_7_5" owner="mvdbeek" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
 </tool_dependency>