Mercurial > repos > mvdbeek > tepid_merge_insertions
annotate merge_insertions.py @ 0:6e4b5319cb89 draft default tip
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
author | mvdbeek |
---|---|
date | Mon, 23 Jan 2017 10:05:12 -0500 |
parents | |
children |
rev | line source |
---|---|
0
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
1 from argparse import ArgumentParser |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
2 import os |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
3 import tempfile |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
4 import pandas as pd |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
5 import pybedtools |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
6 |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
7 COLUMNS = ['ins_chrom', |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
8 'ins_start', |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
9 'ins_end', |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
10 'ref_chrom', |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
11 'ref_start', |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
12 'ref_end', |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
13 'agi', |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
14 'accession', |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
15 'cluster'] |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
16 |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
17 def overlap(start1, stop1, start2, stop2, d=50): |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
18 """returns True if sets of coordinates overlap. Assumes coordinates are on same chromosome""" |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
19 return start1 <= stop2+d and stop1 >= start2-d |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
20 |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
21 |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
22 def merge(i, insertion, result): |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
23 if len(result) == 0: |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
24 result[i] = insertion |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
25 else: |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
26 if not can_merge(insertion, result): |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
27 result[i] = insertion |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
28 |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
29 |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
30 def can_merge(insertion, result): |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
31 """ |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
32 Merges insertions and returns True if all requirements are met |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
33 """ |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
34 for j, master_insertion in result.items(): |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
35 if insertion['agi'] & master_insertion['agi']: |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
36 if overlap(master_insertion['ins_start'], master_insertion['ins_end'], insertion['ins_start'],insertion['ins_end']): |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
37 # Adjusting the insertion start (doesn't really do anything?!) |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
38 if len(insertion['agi']) < len(master_insertion['agi']): |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
39 ref_start = master_insertion['ref_start'] |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
40 else: |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
41 ref_start = insertion['ref_start'] |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
42 if master_insertion['ins_chrom'] == insertion['ins_chrom'] and insertion['ref_chrom'] == master_insertion['ref_chrom'] and ref_start == master_insertion['ref_start']: |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
43 result[j]['accession'] = result[j]['accession'] | (insertion['accession']) |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
44 result[j]['agi'] = result[j]['agi'] | (insertion['agi']) |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
45 return True |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
46 return False |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
47 |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
48 |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
49 def inner_merge(s): |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
50 result = {} |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
51 for i, insertion in s.items(): |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
52 merge(i, insertion, result) |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
53 return result.values() |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
54 |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
55 |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
56 def reduce_and_cluster(inputfiles): |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
57 """ |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
58 Read in inputfiles using pandas, write additional column with sample identifier, |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
59 sort and cluster using pybedtools and return dataframe. |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
60 """ |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
61 usecols = [0,1,2,3,4,5,6] # skip col 7, which contains the read support id |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
62 tables = [pd.read_table(f, header=None) for f in inputfiles] |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
63 sample_ids = [os.path.basename(f).rsplit('.')[0] for f in inputfiles] |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
64 for sample_id, df in zip(sample_ids, tables): |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
65 df[7] = sample_id |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
66 merged_table = pd.concat(tables) |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
67 tfile = tempfile.NamedTemporaryFile() |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
68 merged_table.to_csv(tfile, sep="\t", header=None, index=False) |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
69 tfile.flush() |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
70 bedfile = pybedtools.BedTool(tfile.name).sort().cluster(d=50) |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
71 df = bedfile.to_dataframe() |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
72 df.columns = COLUMNS |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
73 # Split comma separated agi values and make set |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
74 df['agi'] = [set(v.split(',')) for v in df['agi'].values] |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
75 df['accession'] = [set(str(v).split(',')) for v in df['accession'].values] |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
76 return df |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
77 |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
78 |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
79 def split_clusters(df): |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
80 """ |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
81 clusters as defined by bedtools allow for 50 nt distance. This means that |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
82 clusters can be many kb large, so we check each individual insertion in |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
83 the cluster against the other insertions. We split the clusters based on |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
84 whether the overlap and TE identity criteria are fulfilled (so a |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
85 different TE would lead to a split in the clusters) |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
86 """ |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
87 groups = df.groupby('cluster') |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
88 nested_list = [inner_merge(group.transpose().to_dict()) for _, group in groups] |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
89 return pd.DataFrame([i for n in nested_list for i in n])[COLUMNS] |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
90 |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
91 |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
92 def write_output(df, output): |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
93 # Turn sets back to comma-separated values |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
94 df['agi'] = [",".join(agi) for agi in df['agi']] |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
95 df['accession'] = [",".join(acc) for acc in df['accession']] |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
96 # write out result without last column |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
97 df.to_csv(output, sep="\t",header=None, index=None, columns=COLUMNS[:-1]) |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
98 |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
99 |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
100 def main(inputfiles, output): |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
101 df = reduce_and_cluster(inputfiles) |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
102 df = split_clusters(df) |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
103 write_output(df, output) |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
104 |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
105 |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
106 if __name__ == "__main__": |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
107 parser = ArgumentParser(description='Merge TE insertions calls') |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
108 parser.add_argument('-o', '--output', help='output file', required=True) |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
109 parser.add_argument('-i', '--input', help='Insertion files to merge', nargs="+", required=True) |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
110 options = parser.parse_args() |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
111 |
6e4b5319cb89
planemo upload for repository https://github.com/ListerLab/TEPID commit 82fd0448ff5baa9822a388aee78753e4b1cd94d7
mvdbeek
parents:
diff
changeset
|
112 main(inputfiles=options.input, output=options.output) |