annotate FlapjackProject.py @ 80:7f0f361efcc0 draft

Uploaded
author cropgeeks
date Wed, 07 Mar 2018 05:14:57 -0500
parents df76e58799e7
children 819fdece454a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
1 #!/usr/bin/env python
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
2 # encoding: utf-8
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
3 '''
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
4 DNASampleSplitter -- shortdesc
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
5
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
6 DNASampleSplitter is a description
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
7
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
8 It defines classes_and_methods
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
9
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
10 @author: John Carlos Ignacio, Milcah Kigoni, Yaw Nti-Addae
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
11
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
12 @copyright: 2017 Cornell University. All rights reserved.
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
13
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
14 @license: MIT License
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
15
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
16 @contact: yn259@cornell.edu
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
17 @deffield updated: Updated
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
18 '''
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
19
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
20 import sys
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
21 import os
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
22 import math
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
23 import pandas as pd
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
24 import tempfile
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
25
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
26 from optparse import OptionParser
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
27 from __builtin__ import str
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
28 from subprocess import call
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
29
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
30 __all__ = []
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
31 __version__ = 0.2
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
32 __date__ = '2017-06-20'
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
33 __updated__ = '2017-06-27'
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
34
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
35 DEBUG = 1
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
36 TESTRUN = 0
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
37 PROFILE = 0
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
38
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
39 parents = {}
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
40
77
6ace5881c494 Uploaded
cropgeeks
parents: 76
diff changeset
41 filenames = {}
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
42
79
df76e58799e7 Uploaded
cropgeeks
parents: 77
diff changeset
43 favAlleleHeaders = []
df76e58799e7 Uploaded
cropgeeks
parents: 77
diff changeset
44
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
45 def splitfile(my_file, sample_data, isSample):
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
46 temp_parents = parents
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
47 header = ''
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
48 fj_header = ''
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
49 with open(my_file) as infile:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
50 for line in infile:
79
df76e58799e7 Uploaded
cropgeeks
parents: 77
diff changeset
51 if line.lower().startswith("# fjfav"):
df76e58799e7 Uploaded
cropgeeks
parents: 77
diff changeset
52 favAlleleHeaders.append(line)
df76e58799e7 Uploaded
cropgeeks
parents: 77
diff changeset
53 continue
df76e58799e7 Uploaded
cropgeeks
parents: 77
diff changeset
54 elif line[:2] == '# ':
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
55 fj_header += line
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
56 elif header == '':
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
57 if fj_header == '':
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
58 fj_header = '# fjFile = PHENOTYPE\n'
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
59 header_list = line.split('\t')
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
60 if header_list[0] != '':
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
61 header_list[0] = ''
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
62 line = "\t".join(header_list)
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
63 header = fj_header+line
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
64 else:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
65 lst = line.split('\t')
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
66 dnarun = lst[0]
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
67 dnarun_data = sample_data[sample_data.dnarun_name == dnarun]
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
68 group = list(dnarun_data.dnasample_sample_group)[0]
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
69 cycle = list(dnarun_data.dnasample_sample_group_cycle)[0]
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
70
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
71 isParent = False
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
72 for key in temp_parents:
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
73 value = temp_parents[key]
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
74 if dnarun in value:
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
75 name = my_file + "_" + key
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
76 if isSample:
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
77 continue
77
6ace5881c494 Uploaded
cropgeeks
parents: 76
diff changeset
78 if name not in filenames:
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
79 filename = tempfile.NamedTemporaryFile(delete=False).name
77
6ace5881c494 Uploaded
cropgeeks
parents: 76
diff changeset
80 filenames[name] = filename
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
81 f = open(filename, "w")
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
82 f.write('%s' % header)
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
83 else:
77
6ace5881c494 Uploaded
cropgeeks
parents: 76
diff changeset
84 filename = filenames.get(name)
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
85 f=open(filename, "a+")
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
86 f.write('%s' % line)
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
87 isParent = True
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
88
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
89 if isParent:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
90 continue
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
91
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
92 if isinstance(group, float) and math.isnan(group):
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
93 continue
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
94 elif isSample == 1:
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
95 # get parent data #
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
96
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
97 filename = tempfile.NamedTemporaryFile(delete=False).name
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
98 # get file name for genotype data
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
99 if isinstance(cycle, float) and math.isnan(cycle):
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
100 # save genotype data to file
77
6ace5881c494 Uploaded
cropgeeks
parents: 76
diff changeset
101 if my_file + "_" + group not in filenames:
6ace5881c494 Uploaded
cropgeeks
parents: 76
diff changeset
102 filenames[my_file + "_" + group] = filename
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
103 f = open(filename, "w")
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
104 f.write('%s' % header)
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
105 else :
77
6ace5881c494 Uploaded
cropgeeks
parents: 76
diff changeset
106 filename = filenames.get(my_file + "_" + group)
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
107 f=open(filename, "a+")
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
108 f.write('%s' % line)
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
109 else:
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
110 # save genotype data to file
77
6ace5881c494 Uploaded
cropgeeks
parents: 76
diff changeset
111 if my_file + "_" + group+'_'+cycle not in filenames:
6ace5881c494 Uploaded
cropgeeks
parents: 76
diff changeset
112 filenames[my_file + "_" + group+'_'+cycle] = filename
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
113 f = open(filename, "w")
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
114 f.write('%s' % header)
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
115 else :
77
6ace5881c494 Uploaded
cropgeeks
parents: 76
diff changeset
116 filename = filenames.get(my_file + "_" + group+'_'+cycle)
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
117 f=open(filename, "a+")
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
118 f.write('%s' % line)
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
119
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
120
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
121
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
122 def splitData(samplefile, genofile):
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
123 # Split sample file #
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
124 sample_data = pd.read_table(samplefile, dtype='str')
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
125 group_list = sample_data.dnasample_sample_group.drop_duplicates()
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
126 for index, item in group_list.iteritems():
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
127 if isinstance(item, float):
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
128 if math.isnan(item):
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
129 continue
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
130 elif isinstance(item, str):
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
131 if not item:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
132 continue
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
133 df = sample_data[sample_data.dnasample_sample_group == item]
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
134
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
135 # store dnaruns of parents in a dictionary
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
136 par1 = list(set(filter(lambda x: str(x) != 'nan', df.germplasm_par1)))
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
137 par2 = list(set(filter(lambda x: str(x) != 'nan', df.germplasm_par2)))
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
138 lst1 = list(sample_data.loc[sample_data.germplasm_name.isin(par1), 'dnarun_name'])
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
139 lst2 = list(sample_data.loc[sample_data.germplasm_name.isin(par2), 'dnarun_name'])
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
140 mergedlst = lst1 + lst2
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
141
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
142 subgroup_list = df.dnasample_sample_group_cycle.drop_duplicates()
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
143 for idx, sub in subgroup_list.iteritems():
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
144 if isinstance(sub, float):
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
145 if math.isnan(sub):
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
146 # df.to_csv(samplefile+"_"+item+".txt", index=None, na_rep='', sep="\t", mode="w", line_terminator="\n")
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
147 if not item in parents and mergedlst:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
148 parents.update({item : mergedlst})
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
149 continue
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
150 elif isinstance(sub, str):
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
151 if not sub:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
152 # df.to_csv(samplefile+"_"+item+".txt", index=None, na_rep='', sep="\t", mode="w", line_terminator="\n")
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
153 continue
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
154
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
155 subkey = item+'_'+sub
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
156 if not subkey in parents and mergedlst:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
157 parents.update({subkey : lst1+lst2})
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
158 # df_sub = df[df.dnasample_sample_group_cycle == sub]
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
159 # df_sub.to_csv(samplefile+"_"+item+"_"+sub+".txt", index=None, na_rep='', sep="\t", mode="w", line_terminator="\n")
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
160
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
161 # Split genotype file based on sample information #
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
162 splitfile(samplefile, sample_data, 0)
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
163 splitfile(samplefile, sample_data, 1)
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
164 splitfile(genofile, sample_data, 0)
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
165 splitfile(genofile, sample_data, 1)
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
166
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
167 def createProjectFile(samplefile, genofile, jarfile, separator, missing, qtlfile, mapfile, project):
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
168 sample_data = pd.read_table(samplefile, dtype='str')
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
169 groups = sample_data.dnasample_sample_group.drop_duplicates()
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
170 for index, key in groups.iteritems():
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
171 if isinstance(key, float) and math.isnan(key):
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
172 continue
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
173 df = sample_data[sample_data.dnasample_sample_group == key]
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
174 subgroup_list = df.dnasample_sample_group_cycle.drop_duplicates()
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
175 for idx, sub in subgroup_list.iteritems():
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
176 if isinstance(sub, float) and math.isnan(sub):
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
177 name = key
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
178 elif isinstance(sub, str) and not sub:
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
179 name = key
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
180 else:
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
181 name = key+'_'+sub
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
182 name = str(name)
77
6ace5881c494 Uploaded
cropgeeks
parents: 76
diff changeset
183 sfile = filenames.get(samplefile + "_" + name)
6ace5881c494 Uploaded
cropgeeks
parents: 76
diff changeset
184 gfile = filenames.get(genofile + "_" + name)
80
7f0f361efcc0 Uploaded
cropgeeks
parents: 79
diff changeset
185 f = open(gfile, "a+")
7f0f361efcc0 Uploaded
cropgeeks
parents: 79
diff changeset
186 for fav in favAlleleHeaders:
7f0f361efcc0 Uploaded
cropgeeks
parents: 79
diff changeset
187 f.write(fav)
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
188 gfile += '.tmp'
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
189 cmd = ['java', '-cp',jarfile,'jhi.flapjack.io.cmd.CreateProject','-A','-g',gfile,'-t',sfile,'-p',project,'-n',name,'-S',separator,'-M',missing,'-C']
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
190 if qtlfile:
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
191 cmd += ['-q',qtlfile]
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
192 if mapfile:
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
193 cmd += ['-m',mapfile]
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
194 print(cmd)
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
195 call(cmd)
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
196
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
197 def createHeader(samplefile, genofile, headerjar):
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
198 sample_data = pd.read_table(samplefile, dtype='str')
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
199 groups = sample_data.dnasample_sample_group.drop_duplicates()
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
200 for index, key in groups.iteritems():
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
201 if isinstance(key, float) and math.isnan(key):
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
202 continue
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
203 df = sample_data[sample_data.dnasample_sample_group == key]
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
204 subgroup_list = df.dnasample_sample_group_cycle.drop_duplicates()
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
205 for idx, sub in subgroup_list.iteritems():
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
206 if isinstance(sub, float) and math.isnan(sub):
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
207 name = key
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
208 elif isinstance(sub, str) and not sub:
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
209 name = key
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
210 else:
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
211 name = key+'_'+sub
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
212 name = str(name)
77
6ace5881c494 Uploaded
cropgeeks
parents: 76
diff changeset
213 sfile = filenames.get(samplefile + "_" + name)
6ace5881c494 Uploaded
cropgeeks
parents: 76
diff changeset
214 gfile = filenames.get(genofile + "_" + name)
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
215
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
216 cmd = ['java','-jar',headerjar,sfile,gfile,gfile+'.tmp']
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
217 call(cmd)
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
218
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
219 def main(argv=None):
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
220 '''Command line options.'''
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
221
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
222 program_name = os.path.basename(sys.argv[0])
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
223 program_version = "v0.1"
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
224 program_build_date = "%s" % __updated__
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
225
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
226 program_version_string = '%%prog %s (%s)' % (program_version, program_build_date)
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
227 #program_usage = '''usage: spam two eggs''' # optional - will be autogenerated by optparse
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
228 program_longdesc = '''''' # optional - give further explanation about what the program does
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
229 program_license = "Copyright 2017 user_name (organization_name) \
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
230 Licensed under the Apache License 2.0\nhttp://www.apache.org/licenses/LICENSE-2.0"
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
231
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
232 if argv is None:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
233 argv = sys.argv[1:]
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
234 try:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
235 # setup option parser
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
236 parser = OptionParser(version=program_version_string, epilog=program_longdesc, description=program_license)
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
237 parser.add_option("-g", "--geno", dest="genofile", help="set input genotype file path [default: %default]", metavar="FILE")
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
238 parser.add_option("-s", "--sample", dest="samplefile", help="set input sample file path [default: %default]", metavar="FILE")
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
239 parser.add_option("-m", "--mapfile", dest="mapfile", help="set input map file path [default: %default]", metavar="FILE")
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
240 parser.add_option("-q", "--qtlfile", dest="qtlfile", help="set input QTL file path [default: %default]", metavar="FILE")
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
241 parser.add_option("-j", "--jar", dest="jarfile", help="set Flapjack project creator jar file path [default: %default]", metavar="FILE", default='jars/flapjack.jar')
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
242 parser.add_option("-J", "--headerjar", dest="headerjar", help="set Flapjack header creator jar file path [default: %default]", metavar="FILE", default='jars/pedigreeheader.jar')
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
243 parser.add_option("-S", "--separator", dest="separator", help="declare separator for genotypes, \"\" for no separator [default: \"\"]", metavar="STRING", default='')
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
244 parser.add_option("-M", "--missingGenotype", dest="missing", help="set missing genotype string [default: %default]", metavar="STRING", default='NN')
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
245 parser.add_option("-v", "--verbose", dest="verbose", action="count", help="set verbosity level [default: %default]")
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
246 parser.add_option("-p", "--project", dest="project", help="name of output file [default: %default]")
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
247
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
248 # process options
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
249 (opts, args) = parser.parse_args(argv)
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
250
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
251 if opts.verbose > 0:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
252 print("verbosity level = %d" % opts.verbose)
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
253 if opts.genofile:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
254 print("genofile = %s" % opts.genofile)
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
255 else:
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
256 sys.stderr.write("No genotype file detected!\n")
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
257 sys.exit()
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
258 if opts.samplefile:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
259 print("samplefile = %s" % opts.samplefile)
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
260 else:
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
261 sys.stderr.write("No sample file detected!\n")
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
262 sys.exit()
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
263 if opts.mapfile:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
264 print("mapfile = %s" % opts.mapfile)
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
265 else:
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
266 sys.stderr.write("No map file detected!\n")
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
267 if opts.qtlfile:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
268 print("qtlfile = %s" % opts.qtlfile)
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
269 else:
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
270 sys.stderr.write("No QTL file detected!\n")
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
271 if opts.jarfile:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
272 print("jarfile = %s" % opts.jarfile)
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
273 else:
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
274 sys.stderr.write("No Flapjack project creator jar file detected!\n")
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
275 if opts.headerjar:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
276 print("headerjar = %s" % opts.headerjar)
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
277 else:
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
278 sys.stderr.write("No Flapjack header creator jar file detected!\n")
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
279
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
280 # MAIN BODY #
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
281 splitData(samplefile=opts.samplefile, genofile=opts.genofile)
76
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
282 createHeader(samplefile=opts.samplefile, genofile=opts.genofile, headerjar=opts.headerjar)
84ce7c332dc4 Uploaded
cropgeeks
parents: 65
diff changeset
283 createProjectFile(samplefile=opts.samplefile, genofile=opts.genofile, jarfile=opts.jarfile, separator=opts.separator, missing=opts.missing,qtlfile=opts.qtlfile,mapfile=opts.mapfile, project=opts.project)
65
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
284
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
285
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
286 except Exception, e:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
287 indent = len(program_name) * " "
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
288 sys.stderr.write(program_name + ": " + repr(e) + "\n")
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
289 sys.stderr.write(indent + " for help use --help")
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
290 return 2
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
291
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
292
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
293 if __name__ == "__main__":
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
294 # if DEBUG:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
295 # sys.argv.append("-h")
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
296 if TESTRUN:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
297 import doctest
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
298 doctest.testmod()
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
299 if PROFILE:
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
300 import cProfile
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
301 import pstats
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
302 profile_filename = 'DNASampleSplitter_profile.txt'
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
303 cProfile.run('main()', profile_filename)
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
304 statsfile = open("profile_stats.txt", "wb")
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
305 p = pstats.Stats(profile_filename, stream=statsfile)
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
306 stats = p.strip_dirs().sort_stats('cumulative')
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
307 stats.print_stats()
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
308 statsfile.close()
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
309 sys.exit(0)
d7e91a614582 Uploaded
cropgeeks
parents:
diff changeset
310 sys.exit(main())