changeset 0:ff6683f8e9a1 draft

planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
author mvdbeek
date Mon, 23 Jan 2017 10:05:02 -0500
parents
children 540a84c471e9
files flip_deletions.py macros.xml merge_deletions.py tepid_merge_deletions.xml test-data/1.bed test-data/2.bed test-data/merged.bed
diffstat 7 files changed, 208 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/flip_deletions.py	Mon Jan 23 10:05:02 2017 -0500
@@ -0,0 +1,37 @@
+#! /usr/bin/python
+
+from argparse import ArgumentParser
+import sys
+
+parser = ArgumentParser(description='Invert the TE deletion calls to give a consistent data format between TE insertions and deletions')
+parser.add_argument('-s', '--samples', help='list of all sample names', nargs="+", required=True)
+parser.add_argument('-d', '--deletions', help='merged TEPID deletions', required=True)
+parser.add_argument('-r', '--reference', help='reference sample name, eg Col-0', required=True)
+parser.add_argument('-o', '--output', help='output file name', required=True)
+options = parser.parse_args()
+
+
+def filter_del(options):
+    with open(options.deletions, 'r') as dels, open(options.output, 'w+') as outfile:
+        sample_names = options.samples
+        for line in dels:
+            line = line.strip().split('\t')
+            accessions = line[5]
+            sys.stderr.write(accessions)
+            sys.stderr.write(",".join(sample_names))
+            accessions = accessions.split(',')
+            coords = line[:4]
+            temp = [options.reference]
+            te = line[4]
+            for sample in sample_names:
+                if sample not in accessions:
+                    temp.append(sample)
+                else:
+                    pass
+            coords.pop(3)  # remove strand
+            info = '\t'.join(coords) + '\t' + te + '\t' + ','.join(temp) + '\n'
+            outfile.write(info)
+
+
+if __name__ == "__main__":
+    filter_del(options)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Mon Jan 23 10:05:02 2017 -0500
@@ -0,0 +1,29 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="0.8.0">tepid</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@WRAPPER_VERSION@">0.8.0</token>
+    <token name="@REFERENCES@">
+<![CDATA[
+------
+This tool is part of the `TEPID`_ pipeline from the `Lister laboratory`_.
+.. _TEPID package: https://github.com/ListerLab/TEPID
+.. _Lister laboratory http://listerlab.org/
+]]>
+    </token>
+    <token name="@PROC@">-p \$GALAXY_SLOTS</token>
+    <token name="@NAME@">-n '$bowtie2_bam.element_identifier'</token>
+    <token name="@LINK_CONC@"><![CDATA[
+        ln -f -s '$bowtie2_bam' conc.bam &&
+        ln -f -s '$bowtie2_bam.metadata.bam_index' conc.bam.bai &&
+    ]]></token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.7554/eLife.20777</citation>
+            <yield />
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/merge_deletions.py	Mon Jan 23 10:05:02 2017 -0500
@@ -0,0 +1,59 @@
+#! /usr/bin/env python
+
+import os
+from argparse import ArgumentParser
+
+def create_master_dict(sample, fname):
+    with open(fname, 'r') as masterfile:
+        x = 0
+        master_dict = {}
+        for line in masterfile:
+            field = line.rsplit()
+            if not line[0] == 'ins_chr':
+                coords = '\t'.join(field[:5])
+                master_dict[x] = {'coords': coords, 'accessions': [sample]}
+                x += 1
+        return master_dict
+
+
+def merge_deletions(master, fname, sample):
+    with open(fname, 'r') as infile:
+        for line in infile:
+            field = line.rsplit()
+            coords = '\t'.join(field[:5])
+            i = len(master)-1
+            x = 0
+            while x <= i:
+                if master[x]['coords'] == coords:
+                    master[x]['accessions'].append(sample)
+                    break
+                elif x == i:
+                    master[x+1] = {'coords': coords, 'accessions': [sample]}
+                    break
+                else:
+                    x += 1
+
+
+def save_deletions(master, outf):
+    with open(outf, 'w+') as outfile:
+        for key, value in master.iteritems():
+            accessions = set(value['accessions'])
+            outfile.write('{c}\t{a}\n'.format(c=value['coords'], a=','.join(accessions)))
+
+def get_name_from_filename(filename):
+    return os.path.basename(filename).rsplit('.', 1)[0]
+
+if __name__ == "__main__":
+
+    parser = ArgumentParser(description='Merge TE deletions calls')
+    parser.add_argument('-o', '--output', help="File to write merged deletions to.", required=True)
+    parser.add_argument('-i', '--input', help='all files that should be merged', nargs="+", required=True)
+    options = parser.parse_args()
+
+    first_file = options.input[0]
+    first_samplename = get_name_from_filename(first_file)
+    master_dictionary = create_master_dict(first_samplename, first_file)
+    for filename in options.input[1:]:
+        samplename = get_name_from_filename(filename)
+        merge_deletions(master_dictionary, filename, samplename)
+    save_deletions(master_dictionary, options.output)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tepid_merge_deletions.xml	Mon Jan 23 10:05:02 2017 -0500
@@ -0,0 +1,50 @@
+<tool id="tepid_merge_deletions" name="tepid-merge-deletions" version="@WRAPPER_VERSION@">
+    <description>merge discovered TE deletions</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <version_command>tepid-discover --version</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+        #for $input in $deletions:
+            ln -s -f '$input' '$input.element_identifier' &&
+        #end for
+        python '$__tool_directory__'/merge_deletions.py -i
+        #for $input in $deletions:
+            '$input.element_identifier'
+        #end for
+        -o merged.bed && cat merged.bed && python $__tool_directory__/flip_deletions.py
+        -s
+        #for $sample in $deletions:
+            '$sample.element_identifier'
+        #end for
+        -d merged.bed
+        -r '$reference_name'
+        -o '$merged_out'
+    ]]></command>
+    <inputs>
+        <param name="deletions" label="TEPID deletions" argument="--input" type="data_collection" collection_type="list" format="bed"/>
+        <param name="reference_name" label="Reference sample name" argument="--reference" type="text" value="ref"/>
+    </inputs>
+    <outputs>
+        <data name="merged_out" format="bed" label="tepid_discover merged deletions on ${on_string}">
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="deletions">
+                <collection type="list">
+                    <element name="1" value="1.bed" />
+                    <element name="2" value="2.bed" />
+                </collection>
+            </param>
+            <param name="reference_name" value="ref"/>
+            <output name="merged_out" file="merged.bed"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+        This step merges deletions found by tepid discover.
+        The output can be used as input in the tepid-refine step.
+    ]]></help>
+<expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/1.bed	Mon Jan 23 10:05:02 2017 -0500
@@ -0,0 +1,10 @@
+2L	15826914	15829287	+	FBti0019180	90
+2L	15838500	15839906	-	FBti0019181	91
+2L	15956081	15957487	+	FBti0019182	92
+2L	16141217	16146366	+	FBti0019183	93
+2L	16153790	16160787	-	FBti0019184	94
+2L	16280312	16281417	-	FBti0019770	95
+2L	16512017	16519074	+	FBti0019185	96
+2L	16858765	16859500	-	FBti0061428	97
+2L	16958165	16962888	+	FBti0019186	98
+2L	17144384	17151999	-	FBti0019187	99
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/2.bed	Mon Jan 23 10:05:02 2017 -0500
@@ -0,0 +1,10 @@
+2L	15956081	15957487	+	FBti0019182	90
+2L	16141217	16146366	+	FBti0019183	91
+2L	16153790	16160787	-	FBti0019184	92
+2L	16280312	16281417	-	FBti0019770	93
+2L	16512017	16519074	+	FBti0019185	94
+2L	16958165	16962888	+	FBti0019186	95
+2L	17144384	17151999	-	FBti0019187	96
+2L	17167617	17167803	+	FBti0019188	97
+2L	17199216	17200622	-	FBti0019189	98
+2L	17273262	17277961	-	FBti0019190	99
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/merged.bed	Mon Jan 23 10:05:02 2017 -0500
@@ -0,0 +1,13 @@
+2L	15826914	15829287	FBti0019180	ref,2
+2L	15838500	15839906	FBti0019181	ref,2
+2L	15956081	15957487	FBti0019182	ref
+2L	16141217	16146366	FBti0019183	ref
+2L	16153790	16160787	FBti0019184	ref
+2L	16280312	16281417	FBti0019770	ref
+2L	16512017	16519074	FBti0019185	ref
+2L	16858765	16859500	FBti0061428	ref,2
+2L	16958165	16962888	FBti0019186	ref
+2L	17144384	17151999	FBti0019187	ref
+2L	17167617	17167803	FBti0019188	ref,1
+2L	17199216	17200622	FBti0019189	ref,1
+2L	17273262	17277961	FBti0019190	ref,1