Mercurial > repos > mvdbeek > tepid_merge_deletions
annotate merge_deletions.py @ 0:ff6683f8e9a1 draft
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
author | mvdbeek |
---|---|
date | Mon, 23 Jan 2017 10:05:02 -0500 |
parents | |
children |
rev | line source |
---|---|
0
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
1 #! /usr/bin/env python |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
2 |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
3 import os |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
4 from argparse import ArgumentParser |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
5 |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
6 def create_master_dict(sample, fname): |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
7 with open(fname, 'r') as masterfile: |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
8 x = 0 |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
9 master_dict = {} |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
10 for line in masterfile: |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
11 field = line.rsplit() |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
12 if not line[0] == 'ins_chr': |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
13 coords = '\t'.join(field[:5]) |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
14 master_dict[x] = {'coords': coords, 'accessions': [sample]} |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
15 x += 1 |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
16 return master_dict |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
17 |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
18 |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
19 def merge_deletions(master, fname, sample): |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
20 with open(fname, 'r') as infile: |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
21 for line in infile: |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
22 field = line.rsplit() |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
23 coords = '\t'.join(field[:5]) |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
24 i = len(master)-1 |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
25 x = 0 |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
26 while x <= i: |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
27 if master[x]['coords'] == coords: |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
28 master[x]['accessions'].append(sample) |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
29 break |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
30 elif x == i: |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
31 master[x+1] = {'coords': coords, 'accessions': [sample]} |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
32 break |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
33 else: |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
34 x += 1 |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
35 |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
36 |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
37 def save_deletions(master, outf): |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
38 with open(outf, 'w+') as outfile: |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
39 for key, value in master.iteritems(): |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
40 accessions = set(value['accessions']) |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
41 outfile.write('{c}\t{a}\n'.format(c=value['coords'], a=','.join(accessions))) |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
42 |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
43 def get_name_from_filename(filename): |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
44 return os.path.basename(filename).rsplit('.', 1)[0] |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
45 |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
46 if __name__ == "__main__": |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
47 |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
48 parser = ArgumentParser(description='Merge TE deletions calls') |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
49 parser.add_argument('-o', '--output', help="File to write merged deletions to.", required=True) |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
50 parser.add_argument('-i', '--input', help='all files that should be merged', nargs="+", required=True) |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
51 options = parser.parse_args() |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
52 |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
53 first_file = options.input[0] |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
54 first_samplename = get_name_from_filename(first_file) |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
55 master_dictionary = create_master_dict(first_samplename, first_file) |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
56 for filename in options.input[1:]: |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
57 samplename = get_name_from_filename(filename) |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
58 merge_deletions(master_dictionary, filename, samplename) |
ff6683f8e9a1
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
59 save_deletions(master_dictionary, options.output) |