Mercurial > repos > mvdbeek > tepid_merge_deletions
changeset 0:ff6683f8e9a1 draft
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
author | mvdbeek |
---|---|
date | Mon, 23 Jan 2017 10:05:02 -0500 |
parents | |
children | 540a84c471e9 |
files | flip_deletions.py macros.xml merge_deletions.py tepid_merge_deletions.xml test-data/1.bed test-data/2.bed test-data/merged.bed |
diffstat | 7 files changed, 208 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flip_deletions.py Mon Jan 23 10:05:02 2017 -0500 @@ -0,0 +1,37 @@ +#! /usr/bin/python + +from argparse import ArgumentParser +import sys + +parser = ArgumentParser(description='Invert the TE deletion calls to give a consistent data format between TE insertions and deletions') +parser.add_argument('-s', '--samples', help='list of all sample names', nargs="+", required=True) +parser.add_argument('-d', '--deletions', help='merged TEPID deletions', required=True) +parser.add_argument('-r', '--reference', help='reference sample name, eg Col-0', required=True) +parser.add_argument('-o', '--output', help='output file name', required=True) +options = parser.parse_args() + + +def filter_del(options): + with open(options.deletions, 'r') as dels, open(options.output, 'w+') as outfile: + sample_names = options.samples + for line in dels: + line = line.strip().split('\t') + accessions = line[5] + sys.stderr.write(accessions) + sys.stderr.write(",".join(sample_names)) + accessions = accessions.split(',') + coords = line[:4] + temp = [options.reference] + te = line[4] + for sample in sample_names: + if sample not in accessions: + temp.append(sample) + else: + pass + coords.pop(3) # remove strand + info = '\t'.join(coords) + '\t' + te + '\t' + ','.join(temp) + '\n' + outfile.write(info) + + +if __name__ == "__main__": + filter_del(options)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Jan 23 10:05:02 2017 -0500 @@ -0,0 +1,29 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="0.8.0">tepid</requirement> + <yield/> + </requirements> + </xml> + <token name="@WRAPPER_VERSION@">0.8.0</token> + <token name="@REFERENCES@"> +<![CDATA[ +------ +This tool is part of the `TEPID`_ pipeline from the `Lister laboratory`_. +.. _TEPID package: https://github.com/ListerLab/TEPID +.. _Lister laboratory http://listerlab.org/ +]]> + </token> + <token name="@PROC@">-p \$GALAXY_SLOTS</token> + <token name="@NAME@">-n '$bowtie2_bam.element_identifier'</token> + <token name="@LINK_CONC@"><![CDATA[ + ln -f -s '$bowtie2_bam' conc.bam && + ln -f -s '$bowtie2_bam.metadata.bam_index' conc.bam.bai && + ]]></token> + <xml name="citations"> + <citations> + <citation type="doi">10.7554/eLife.20777</citation> + <yield /> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_deletions.py Mon Jan 23 10:05:02 2017 -0500 @@ -0,0 +1,59 @@ +#! /usr/bin/env python + +import os +from argparse import ArgumentParser + +def create_master_dict(sample, fname): + with open(fname, 'r') as masterfile: + x = 0 + master_dict = {} + for line in masterfile: + field = line.rsplit() + if not line[0] == 'ins_chr': + coords = '\t'.join(field[:5]) + master_dict[x] = {'coords': coords, 'accessions': [sample]} + x += 1 + return master_dict + + +def merge_deletions(master, fname, sample): + with open(fname, 'r') as infile: + for line in infile: + field = line.rsplit() + coords = '\t'.join(field[:5]) + i = len(master)-1 + x = 0 + while x <= i: + if master[x]['coords'] == coords: + master[x]['accessions'].append(sample) + break + elif x == i: + master[x+1] = {'coords': coords, 'accessions': [sample]} + break + else: + x += 1 + + +def save_deletions(master, outf): + with open(outf, 'w+') as outfile: + for key, value in master.iteritems(): + accessions = set(value['accessions']) + outfile.write('{c}\t{a}\n'.format(c=value['coords'], a=','.join(accessions))) + +def get_name_from_filename(filename): + return os.path.basename(filename).rsplit('.', 1)[0] + +if __name__ == "__main__": + + parser = ArgumentParser(description='Merge TE deletions calls') + parser.add_argument('-o', '--output', help="File to write merged deletions to.", required=True) + parser.add_argument('-i', '--input', help='all files that should be merged', nargs="+", required=True) + options = parser.parse_args() + + first_file = options.input[0] + first_samplename = get_name_from_filename(first_file) + master_dictionary = create_master_dict(first_samplename, first_file) + for filename in options.input[1:]: + samplename = get_name_from_filename(filename) + merge_deletions(master_dictionary, filename, samplename) + save_deletions(master_dictionary, options.output)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tepid_merge_deletions.xml Mon Jan 23 10:05:02 2017 -0500 @@ -0,0 +1,50 @@ +<tool id="tepid_merge_deletions" name="tepid-merge-deletions" version="@WRAPPER_VERSION@"> + <description>merge discovered TE deletions</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <version_command>tepid-discover --version</version_command> + <command detect_errors="exit_code"><![CDATA[ + #for $input in $deletions: + ln -s -f '$input' '$input.element_identifier' && + #end for + python '$__tool_directory__'/merge_deletions.py -i + #for $input in $deletions: + '$input.element_identifier' + #end for + -o merged.bed && cat merged.bed && python $__tool_directory__/flip_deletions.py + -s + #for $sample in $deletions: + '$sample.element_identifier' + #end for + -d merged.bed + -r '$reference_name' + -o '$merged_out' + ]]></command> + <inputs> + <param name="deletions" label="TEPID deletions" argument="--input" type="data_collection" collection_type="list" format="bed"/> + <param name="reference_name" label="Reference sample name" argument="--reference" type="text" value="ref"/> + </inputs> + <outputs> + <data name="merged_out" format="bed" label="tepid_discover merged deletions on ${on_string}"> + </data> + </outputs> + <tests> + <test> + <param name="deletions"> + <collection type="list"> + <element name="1" value="1.bed" /> + <element name="2" value="2.bed" /> + </collection> + </param> + <param name="reference_name" value="ref"/> + <output name="merged_out" file="merged.bed"/> + </test> + </tests> + <help><![CDATA[ + This step merges deletions found by tepid discover. + The output can be used as input in the tepid-refine step. + ]]></help> +<expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1.bed Mon Jan 23 10:05:02 2017 -0500 @@ -0,0 +1,10 @@ +2L 15826914 15829287 + FBti0019180 90 +2L 15838500 15839906 - FBti0019181 91 +2L 15956081 15957487 + FBti0019182 92 +2L 16141217 16146366 + FBti0019183 93 +2L 16153790 16160787 - FBti0019184 94 +2L 16280312 16281417 - FBti0019770 95 +2L 16512017 16519074 + FBti0019185 96 +2L 16858765 16859500 - FBti0061428 97 +2L 16958165 16962888 + FBti0019186 98 +2L 17144384 17151999 - FBti0019187 99
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/2.bed Mon Jan 23 10:05:02 2017 -0500 @@ -0,0 +1,10 @@ +2L 15956081 15957487 + FBti0019182 90 +2L 16141217 16146366 + FBti0019183 91 +2L 16153790 16160787 - FBti0019184 92 +2L 16280312 16281417 - FBti0019770 93 +2L 16512017 16519074 + FBti0019185 94 +2L 16958165 16962888 + FBti0019186 95 +2L 17144384 17151999 - FBti0019187 96 +2L 17167617 17167803 + FBti0019188 97 +2L 17199216 17200622 - FBti0019189 98 +2L 17273262 17277961 - FBti0019190 99
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/merged.bed Mon Jan 23 10:05:02 2017 -0500 @@ -0,0 +1,13 @@ +2L 15826914 15829287 FBti0019180 ref,2 +2L 15838500 15839906 FBti0019181 ref,2 +2L 15956081 15957487 FBti0019182 ref +2L 16141217 16146366 FBti0019183 ref +2L 16153790 16160787 FBti0019184 ref +2L 16280312 16281417 FBti0019770 ref +2L 16512017 16519074 FBti0019185 ref +2L 16858765 16859500 FBti0061428 ref,2 +2L 16958165 16962888 FBti0019186 ref +2L 17144384 17151999 FBti0019187 ref +2L 17167617 17167803 FBti0019188 ref,1 +2L 17199216 17200622 FBti0019189 ref,1 +2L 17273262 17277961 FBti0019190 ref,1