# HG changeset patch # User mvdbeek # Date 1485183902 18000 # Node ID ff6683f8e9a1fe12b421e7efe16b225e38673d2f planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7 diff -r 000000000000 -r ff6683f8e9a1 flip_deletions.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flip_deletions.py Mon Jan 23 10:05:02 2017 -0500 @@ -0,0 +1,37 @@ +#! /usr/bin/python + +from argparse import ArgumentParser +import sys + +parser = ArgumentParser(description='Invert the TE deletion calls to give a consistent data format between TE insertions and deletions') +parser.add_argument('-s', '--samples', help='list of all sample names', nargs="+", required=True) +parser.add_argument('-d', '--deletions', help='merged TEPID deletions', required=True) +parser.add_argument('-r', '--reference', help='reference sample name, eg Col-0', required=True) +parser.add_argument('-o', '--output', help='output file name', required=True) +options = parser.parse_args() + + +def filter_del(options): + with open(options.deletions, 'r') as dels, open(options.output, 'w+') as outfile: + sample_names = options.samples + for line in dels: + line = line.strip().split('\t') + accessions = line[5] + sys.stderr.write(accessions) + sys.stderr.write(",".join(sample_names)) + accessions = accessions.split(',') + coords = line[:4] + temp = [options.reference] + te = line[4] + for sample in sample_names: + if sample not in accessions: + temp.append(sample) + else: + pass + coords.pop(3) # remove strand + info = '\t'.join(coords) + '\t' + te + '\t' + ','.join(temp) + '\n' + outfile.write(info) + + +if __name__ == "__main__": + filter_del(options) diff -r 000000000000 -r ff6683f8e9a1 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Jan 23 10:05:02 2017 -0500 @@ -0,0 +1,29 @@ + + + + tepid + + + + 0.8.0 + + + + -p \$GALAXY_SLOTS + -n '$bowtie2_bam.element_identifier' + + + + 10.7554/eLife.20777 + + + + diff -r 000000000000 -r ff6683f8e9a1 merge_deletions.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_deletions.py Mon Jan 23 10:05:02 2017 -0500 @@ -0,0 +1,59 @@ +#! /usr/bin/env python + +import os +from argparse import ArgumentParser + +def create_master_dict(sample, fname): + with open(fname, 'r') as masterfile: + x = 0 + master_dict = {} + for line in masterfile: + field = line.rsplit() + if not line[0] == 'ins_chr': + coords = '\t'.join(field[:5]) + master_dict[x] = {'coords': coords, 'accessions': [sample]} + x += 1 + return master_dict + + +def merge_deletions(master, fname, sample): + with open(fname, 'r') as infile: + for line in infile: + field = line.rsplit() + coords = '\t'.join(field[:5]) + i = len(master)-1 + x = 0 + while x <= i: + if master[x]['coords'] == coords: + master[x]['accessions'].append(sample) + break + elif x == i: + master[x+1] = {'coords': coords, 'accessions': [sample]} + break + else: + x += 1 + + +def save_deletions(master, outf): + with open(outf, 'w+') as outfile: + for key, value in master.iteritems(): + accessions = set(value['accessions']) + outfile.write('{c}\t{a}\n'.format(c=value['coords'], a=','.join(accessions))) + +def get_name_from_filename(filename): + return os.path.basename(filename).rsplit('.', 1)[0] + +if __name__ == "__main__": + + parser = ArgumentParser(description='Merge TE deletions calls') + parser.add_argument('-o', '--output', help="File to write merged deletions to.", required=True) + parser.add_argument('-i', '--input', help='all files that should be merged', nargs="+", required=True) + options = parser.parse_args() + + first_file = options.input[0] + first_samplename = get_name_from_filename(first_file) + master_dictionary = create_master_dict(first_samplename, first_file) + for filename in options.input[1:]: + samplename = get_name_from_filename(filename) + merge_deletions(master_dictionary, filename, samplename) + save_deletions(master_dictionary, options.output) diff -r 000000000000 -r ff6683f8e9a1 tepid_merge_deletions.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tepid_merge_deletions.xml Mon Jan 23 10:05:02 2017 -0500 @@ -0,0 +1,50 @@ + + merge discovered TE deletions + + macros.xml + + + tepid-discover --version + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r ff6683f8e9a1 test-data/1.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1.bed Mon Jan 23 10:05:02 2017 -0500 @@ -0,0 +1,10 @@ +2L 15826914 15829287 + FBti0019180 90 +2L 15838500 15839906 - FBti0019181 91 +2L 15956081 15957487 + FBti0019182 92 +2L 16141217 16146366 + FBti0019183 93 +2L 16153790 16160787 - FBti0019184 94 +2L 16280312 16281417 - FBti0019770 95 +2L 16512017 16519074 + FBti0019185 96 +2L 16858765 16859500 - FBti0061428 97 +2L 16958165 16962888 + FBti0019186 98 +2L 17144384 17151999 - FBti0019187 99 diff -r 000000000000 -r ff6683f8e9a1 test-data/2.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/2.bed Mon Jan 23 10:05:02 2017 -0500 @@ -0,0 +1,10 @@ +2L 15956081 15957487 + FBti0019182 90 +2L 16141217 16146366 + FBti0019183 91 +2L 16153790 16160787 - FBti0019184 92 +2L 16280312 16281417 - FBti0019770 93 +2L 16512017 16519074 + FBti0019185 94 +2L 16958165 16962888 + FBti0019186 95 +2L 17144384 17151999 - FBti0019187 96 +2L 17167617 17167803 + FBti0019188 97 +2L 17199216 17200622 - FBti0019189 98 +2L 17273262 17277961 - FBti0019190 99 diff -r 000000000000 -r ff6683f8e9a1 test-data/merged.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/merged.bed Mon Jan 23 10:05:02 2017 -0500 @@ -0,0 +1,13 @@ +2L 15826914 15829287 FBti0019180 ref,2 +2L 15838500 15839906 FBti0019181 ref,2 +2L 15956081 15957487 FBti0019182 ref +2L 16141217 16146366 FBti0019183 ref +2L 16153790 16160787 FBti0019184 ref +2L 16280312 16281417 FBti0019770 ref +2L 16512017 16519074 FBti0019185 ref +2L 16858765 16859500 FBti0061428 ref,2 +2L 16958165 16962888 FBti0019186 ref +2L 17144384 17151999 FBti0019187 ref +2L 17167617 17167803 FBti0019188 ref,1 +2L 17199216 17200622 FBti0019189 ref,1 +2L 17273262 17277961 FBti0019190 ref,1