Mercurial > repos > mvdbeek > tepid_merge_deletions
diff flip_deletions.py @ 0:ff6683f8e9a1 draft
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
author | mvdbeek |
---|---|
date | Mon, 23 Jan 2017 10:05:02 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/flip_deletions.py Mon Jan 23 10:05:02 2017 -0500 @@ -0,0 +1,37 @@ +#! /usr/bin/python + +from argparse import ArgumentParser +import sys + +parser = ArgumentParser(description='Invert the TE deletion calls to give a consistent data format between TE insertions and deletions') +parser.add_argument('-s', '--samples', help='list of all sample names', nargs="+", required=True) +parser.add_argument('-d', '--deletions', help='merged TEPID deletions', required=True) +parser.add_argument('-r', '--reference', help='reference sample name, eg Col-0', required=True) +parser.add_argument('-o', '--output', help='output file name', required=True) +options = parser.parse_args() + + +def filter_del(options): + with open(options.deletions, 'r') as dels, open(options.output, 'w+') as outfile: + sample_names = options.samples + for line in dels: + line = line.strip().split('\t') + accessions = line[5] + sys.stderr.write(accessions) + sys.stderr.write(",".join(sample_names)) + accessions = accessions.split(',') + coords = line[:4] + temp = [options.reference] + te = line[4] + for sample in sample_names: + if sample not in accessions: + temp.append(sample) + else: + pass + coords.pop(3) # remove strand + info = '\t'.join(coords) + '\t' + te + '\t' + ','.join(temp) + '\n' + outfile.write(info) + + +if __name__ == "__main__": + filter_del(options)