1
2
3
4
5
6 """
7 This module allows to control fdist.
8
9 This will allow to call fdist and associated program (cplot, datacal, pv).
10
11 http://www.rubic.rdg.ac.uk/~mab/software.html
12 """
13
14 import os
15 import sys
16 from random import randint
17 from time import strftime, clock
18
19
20 if sys.version_info[0] == 3:
21 maxint = sys.maxsize
22 else:
23 maxint = sys.maxint
24
25
27
28 if f=="-nan":
29 f="nan"
30 return float(f)
31
32
34 - def __init__(self, fdist_dir = '', ext = None):
35 """Initializes the controller.
36
37 fdist_dir is the directory where fdist2 is.
38 ext is the extension of binaries (.exe on windows,
39 none on Unix)
40
41 """
42 self.tmp_idx = 0
43 self.fdist_dir = fdist_dir
44 self.os_name = os.name
45 if sys.platform=='win32':
46 py_ext = '.exe'
47 else:
48 py_ext = ''
49 if ext is None:
50 self.ext = py_ext
51 else:
52 self.ext = ext
53 exec_counts = 0
54
56 """Returns the path to an fdist application.
57
58 Includes Path where fdist can be found plus executable extension.
59 """
60 if self.fdist_dir == '':
61 return app + self.ext
62 else:
63 return os.sep.join([self.fdist_dir, app]) + self.ext
64
66 """Gets a temporary file name.
67
68 Returns a temporary file name, if executing inside jython
69 tries to replace unexisting tempfile.mkstemp().
70 """
71 self.tmp_idx += 1
72 return strftime("%H%M%S") + str(int(clock()*100)) + str(randint(0,1000)) + str(self.tmp_idx)
73
74 - def run_datacal(self, data_dir='.', version=1,
75 crit_freq = 0.99, p = 0.5, beta= (0.25, 0.25)):
76 """Executes datacal.
77
78 data_dir - Where the data is found.
79 """
80 in_name = self._get_temp_file()
81 out_name = self._get_temp_file()
82 f = open(data_dir + os.sep + in_name, 'w')
83 if version==1:
84 f.write('a\n')
85 datacal_name = "datacal"
86 else:
87 f.write('%f\n%f\n%f %f\na\n' % (crit_freq, p, beta[0], beta[1]))
88 datacal_name = "Ddatacal"
89 f.close()
90 curr_dir = os.getcwd()
91 os.system('cd ' + data_dir + ' && ' +
92 self._get_path(datacal_name) + ' < ' + in_name + ' > ' + out_name)
93 f = open(data_dir + os.sep + out_name)
94 if version == 1:
95 fst_line = f.readline().rstrip().split(' ')
96 fst = my_float(fst_line[4])
97 sample_line = f.readline().rstrip().split(' ')
98 sample = int(sample_line[9])
99 else:
100 l = f.readline().rstrip().split(" ")
101 loci, pops = int(l[-5]), int(l[-2])
102 fst_line = f.readline().rstrip().split(' ')
103 fst = my_float(fst_line[4])
104 sample_line = f.readline().rstrip().split(' ')
105 sample = int(sample_line[9])
106 F_line = f.readline().rstrip().split(' ')
107 F, obs = my_float(F_line[5]), int(F_line[8])
108 f.close()
109 os.remove(data_dir + os.sep + in_name)
110 os.remove(data_dir + os.sep + out_name)
111 if version==1:
112 return fst, sample
113 else:
114 return fst, sample, loci, pops, F, obs
115
117 """Generates an INTFILE.
118
119 Parameter:
120 data_dir - data directory
121 """
122 inf = open(data_dir + os.sep + 'INTFILE', 'w')
123 for i in range(98):
124 inf.write(str(randint(-maxint+1,maxint-1)) + '\n')
125 inf.write('8\n')
126 inf.close()
127
128 - def run_fdist(self, npops, nsamples, fst, sample_size,
129 mut = 0, num_sims = 50000, data_dir='.',
130 is_dominant = False, theta = 0.06, beta = (0.25, 0.25),
131 max_freq = 0.99):
132 """Executes (d)fdist.
133
134 Parameters:
135 npops - Number of populations
136 nsamples - Number of populations sampled
137 fst - expected Fst
138 sample_size - Sample size per population
139 For dfdist: if zero a sample size file has to be provided
140 mut - 1=Stepwise, 0=Infinite allele
141 num_sims - number of simulations
142 data_dir - Where the data is found
143 is_dominant - If true executes dfdist
144 theta - Theta (=2Nmu)
145 beta - Parameters for the beta prior
146 max_freq - Maximum allowed frequency of the commonest allele
147
148 Returns:
149 fst - Average Fst
150
151 Important Note: This can take quite a while to run!
152 """
153 if fst >= 0.9:
154
155 fst = 0.899
156 if fst <= 0.0:
157
158 fst = 0.001
159 in_name = 'input.fd'
160 out_name = 'output.fd'
161
162 f = open(data_dir + os.sep + in_name, 'w')
163 f.write('y\n\n')
164 f.close()
165 if is_dominant:
166 config_name = "Dfdist_params"
167 else:
168 config_name = "fdist_params2.dat"
169
170 f = open(data_dir + os.sep + config_name, 'w')
171 f.write(str(npops) + '\n')
172 f.write(str(nsamples) + '\n')
173 f.write(str(fst) + '\n')
174 f.write(str(sample_size) + '\n')
175 if is_dominant:
176 f.write(str(theta) + '\n')
177 else:
178 f.write(str(mut) + '\n')
179 f.write(str(num_sims) + '\n')
180 if is_dominant:
181 f.write("%f %f\n" % beta)
182 f.write("%f\n" % max_freq)
183 f.close()
184 self._generate_intfile(data_dir)
185
186 if is_dominant:
187 bin_name = "Dfdist"
188 else:
189 bin_name = "fdist2"
190 os.system('cd ' + data_dir + ' && ' +
191 self._get_path(bin_name) + ' < ' + in_name + ' > ' + out_name)
192 f = open(data_dir + os.sep + out_name)
193 lines = f.readlines()
194 f.close()
195 for line in lines:
196 if line.startswith('average Fst'):
197 fst = my_float(line.rstrip().split(' ')[-1])
198 os.remove(data_dir + os.sep + in_name)
199 os.remove(data_dir + os.sep + out_name)
200 return fst
201
202 - def run_fdist_force_fst(self, npops, nsamples, fst, sample_size,
203 mut = 0, num_sims = 50000, data_dir='.',
204 try_runs = 5000, limit=0.001,is_dominant = False,
205 theta = 0.06, beta = (0.25, 0.25),
206 max_freq = 0.99):
207 """Executes fdist trying to force Fst.
208
209 Parameters:
210 try_runs - Number of simulations on the part trying to get
211 Fst correct
212 limit - Interval limit
213 Other parameters can be seen on run_fdist.
214 """
215 max_run_fst = 1
216 min_run_fst = 0
217 current_run_fst = fst
218 old_fst = fst
219 while True:
220
221 real_fst = self.run_fdist(npops, nsamples,
222 current_run_fst, sample_size,
223 mut, try_runs, data_dir,
224 is_dominant, theta, beta, max_freq)
225
226 if abs(real_fst - fst) < limit:
227
228 return self.run_fdist(npops, nsamples, current_run_fst,
229 sample_size,
230 mut, num_sims, data_dir,
231 is_dominant, theta, beta, max_freq)
232 old_fst = current_run_fst
233 if real_fst > fst:
234 max_run_fst = current_run_fst
235 if current_run_fst < min_run_fst + limit:
236
237
238 return self.run_fdist(npops, nsamples, current_run_fst,
239 sample_size, mut, num_sims, data_dir)
240 current_run_fst = (min_run_fst + current_run_fst)/2
241 else:
242 min_run_fst = current_run_fst
243 if current_run_fst > max_run_fst - limit:
244
245
246 return self.run_fdist(npops, nsamples, current_run_fst,
247 sample_size, mut, num_sims, data_dir,
248 is_dominant, theta, beta, max_freq)
249 current_run_fst = (max_run_fst + current_run_fst)/2
250
251 - def run_cplot(self, ci= 0.95, data_dir='.', version = 1, smooth=0.04):
252 """Executes cplot.
253
254 ci - Confidence interval.
255 data_dir - Where the data is found.
256 """
257 in_name = self._get_temp_file()
258 out_name = self._get_temp_file()
259 f = open(data_dir + os.sep + in_name, 'w')
260 if version == 1:
261 f.write('out.dat out.cpl\n' + str(ci) + '\n')
262 else:
263 f.write("\n".join([
264 "data_fst_outfile out.cpl out.dat",
265 str(ci), str(smooth)]))
266 f.close()
267 curr_dir = os.getcwd()
268 self._generate_intfile(data_dir)
269 if version == 1:
270 cplot_name = "cplot"
271 else:
272 cplot_name = "cplot2"
273 os.system('cd ' + data_dir + ' && ' +
274 self._get_path(cplot_name) + ' < ' + in_name + ' > ' + out_name)
275 os.remove(data_dir + os.sep + in_name)
276 os.remove(data_dir + os.sep + out_name)
277 f = open(data_dir + os.sep + 'out.cpl')
278 conf_lines = []
279 l = f.readline()
280 try:
281 while l!='':
282 conf_lines.append(
283 tuple(map(lambda x : my_float(x), l.rstrip().split(' ')))
284 )
285 l = f.readline()
286 except ValueError:
287 f.close()
288 return []
289 f.close()
290 return conf_lines
291
292 - def run_pv(self, out_file='probs.dat', data_dir='.',
293 version = 1, smooth=0.04):
294 """Executes pv.
295
296 out_file - Name of output file.
297 data_dir - Where the data is found.
298 """
299 in_name = self._get_temp_file()
300 out_name = self._get_temp_file()
301 f = open(data_dir + os.sep + in_name, 'w')
302 f.write('data_fst_outfile ' + out_file + ' out.dat\n')
303 f.write(str(smooth) + '\n')
304 f.close()
305 self._generate_intfile(data_dir)
306 if version == 1:
307 pv_name = "pv"
308 else:
309 pv_name = "pv2"
310 os.system('cd ' + data_dir + ' && ' +
311 self._get_path(pv_name) + ' < ' + in_name + ' > ' + out_name)
312 pvf = open(data_dir + os.sep + out_file, 'r')
313 result = map(lambda x: tuple(map(lambda y: my_float(y), x.rstrip().split(' '))),
314 pvf.readlines())
315 pvf.close()
316 os.remove(data_dir + os.sep + in_name)
317 os.remove(data_dir + os.sep + out_name)
318 return result
319