view merge_deletions.py @ 3:08de71d3ea76 draft default tip

planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7-dirty
author mvdbeek
date Fri, 27 Jan 2017 08:16:01 -0500
parents ff6683f8e9a1
children
line wrap: on
line source

#! /usr/bin/env python

import os
from argparse import ArgumentParser

def create_master_dict(sample, fname):
    with open(fname, 'r') as masterfile:
        x = 0
        master_dict = {}
        for line in masterfile:
            field = line.rsplit()
            if not line[0] == 'ins_chr':
                coords = '\t'.join(field[:5])
                master_dict[x] = {'coords': coords, 'accessions': [sample]}
                x += 1
        return master_dict


def merge_deletions(master, fname, sample):
    with open(fname, 'r') as infile:
        for line in infile:
            field = line.rsplit()
            coords = '\t'.join(field[:5])
            i = len(master)-1
            x = 0
            while x <= i:
                if master[x]['coords'] == coords:
                    master[x]['accessions'].append(sample)
                    break
                elif x == i:
                    master[x+1] = {'coords': coords, 'accessions': [sample]}
                    break
                else:
                    x += 1


def save_deletions(master, outf):
    with open(outf, 'w+') as outfile:
        for key, value in master.iteritems():
            accessions = set(value['accessions'])
            outfile.write('{c}\t{a}\n'.format(c=value['coords'], a=','.join(accessions)))

def get_name_from_filename(filename):
    return os.path.basename(filename).rsplit('.', 1)[0]

if __name__ == "__main__":

    parser = ArgumentParser(description='Merge TE deletions calls')
    parser.add_argument('-o', '--output', help="File to write merged deletions to.", required=True)
    parser.add_argument('-i', '--input', help='all files that should be merged', nargs="+", required=True)
    options = parser.parse_args()

    first_file = options.input[0]
    first_samplename = get_name_from_filename(first_file)
    master_dictionary = create_master_dict(first_samplename, first_file)
    for filename in options.input[1:]:
        samplename = get_name_from_filename(filename)
        merge_deletions(master_dictionary, filename, samplename)
    save_deletions(master_dictionary, options.output)