Package Bio :: Package PopGen :: Package FDist :: Module Controller
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.FDist.Controller

  1  # Copyright 2007 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """This module allows you to control fdist. 
  7   
  8  This will allow you to call fdist and associated programs (cplot, 
  9  datacal, pv) by Mark Beaumont. 
 10   
 11  http://www.rubic.rdg.ac.uk/~mab/software.html (old) 
 12  http://www.maths.bris.ac.uk/~mamab/ (new) 
 13  """ 
 14   
 15  import os 
 16  import subprocess 
 17  import sys 
 18  from random import randint 
 19  from time import strftime, clock 
 20  # from logging import debug 
 21   
 22  __docformat__ = "restructuredtext en" 
 23   
 24   
25 -def my_float(f):
26 # Because of Jython, mostly 27 if f == "-nan": 28 f = "nan" 29 return float(f)
30 31
32 -class FDistController(object):
33 - def __init__(self, fdist_dir='', ext=None):
34 """Initializes the controller. 35 36 fdist_dir is the directory where fdist2 is. 37 ext is the extension of binaries (.exe on windows, 38 none on Unix) 39 40 """ 41 self.tmp_idx = 0 42 self.fdist_dir = fdist_dir 43 self.os_name = os.name 44 if sys.platform == 'win32': 45 py_ext = '.exe' 46 else: 47 py_ext = '' 48 if ext is None: 49 self.ext = py_ext 50 else: 51 self.ext = ext
52
53 - def _get_path(self, app):
54 """Returns the path to an fdist application. 55 56 Includes Path where fdist can be found plus executable extension. 57 """ 58 if self.fdist_dir == '': 59 return app + self.ext 60 else: 61 return os.sep.join([self.fdist_dir, app]) + self.ext
62
63 - def _get_temp_file(self):
64 """Gets a temporary file name. 65 66 Returns a temporary file name, if executing inside jython 67 tries to replace unexisting tempfile.mkstemp(). 68 """ 69 self.tmp_idx += 1 70 return strftime("%H%M%S") + str(int(clock() * 100)) + str(randint(0, 1000)) + str(self.tmp_idx)
71
72 - def run_datacal(self, data_dir='.', version=1, 73 crit_freq=0.99, p=0.5, beta=(0.25, 0.25)):
74 """Executes datacal. 75 76 data_dir - Where the data is found. 77 """ 78 if version == 1: 79 datacal_name = "datacal" 80 else: 81 datacal_name = "Ddatacal" 82 proc = subprocess.Popen([self._get_path(datacal_name)], 83 universal_newlines=True, 84 stdin=subprocess.PIPE, 85 shell=(sys.platform != "win32"), 86 stdout=subprocess.PIPE, cwd=data_dir) 87 if version == 1: 88 out, err = proc.communicate('a\n') 89 lines = out.split("\n") 90 fst_line = lines[0].rstrip().split(' ') 91 fst = my_float(fst_line[4]) 92 sample_line = lines[1].rstrip().split(' ') 93 sample = int(sample_line[9]) 94 else: 95 out, err = proc.communicate('%f\n%f\n%f %f\na\n' % ( 96 crit_freq, p, beta[0], beta[1])) 97 lines = out.split("\n") 98 l = lines[0].rstrip().split(" ") 99 loci, pops = int(l[-5]), int(l[-2]) 100 fst_line = lines[1].rstrip().split(' ') 101 fst = my_float(fst_line[4]) 102 sample_line = lines[2].rstrip().split(' ') 103 sample = int(sample_line[9]) 104 F_line = lines[3].rstrip().split(' ') 105 F, obs = my_float(F_line[5]), int(F_line[8]) 106 if version == 1: 107 return fst, sample 108 else: 109 return fst, sample, loci, pops, F, obs
110
111 - def _generate_intfile(self, data_dir):
112 """Generates an INTFILE. 113 114 Parameter: 115 data_dir - data directory 116 """ 117 inf = open(data_dir + os.sep + 'INTFILE', 'w') 118 for i in range(98): 119 inf.write(str(randint(-2 ** 31 + 1, 2 ** 31 - 1)) + '\n') 120 inf.write('8\n') 121 inf.close()
122
123 - def run_fdist(self, npops, nsamples, fst, sample_size, 124 mut=0, num_sims=50000, data_dir='.', 125 is_dominant=False, theta=0.06, beta=(0.25, 0.25), 126 max_freq=0.99):
127 """Executes (d)fdist. 128 129 Parameters: 130 131 - npops - Number of populations 132 - nsamples - Number of populations sampled 133 - fst - expected Fst 134 - sample_size - Sample size per population 135 For dfdist: if zero a sample size file has to be provided 136 - mut - 1=Stepwise, 0=Infinite allele 137 - num_sims - number of simulations 138 - data_dir - Where the data is found 139 - is_dominant - If true executes dfdist 140 - theta - Theta (=2Nmu) 141 - beta - Parameters for the beta prior 142 - max_freq - Maximum allowed frequency of the commonest allele 143 144 Returns: 145 146 - fst - Average Fst 147 148 Important Note: This can take quite a while to run! 149 """ 150 if fst >= 0.9: 151 # Lets not joke 152 fst = 0.899 153 if fst <= 0.0: 154 # 0 will make fdist run forever 155 fst = 0.001 156 if is_dominant: 157 config_name = "Dfdist_params" 158 else: 159 config_name = "fdist_params2.dat" 160 161 f = open(data_dir + os.sep + config_name, 'w') 162 f.write(str(npops) + '\n') 163 f.write(str(nsamples) + '\n') 164 f.write(str(fst) + '\n') 165 f.write(str(sample_size) + '\n') 166 if is_dominant: 167 f.write(str(theta) + '\n') 168 else: 169 f.write(str(mut) + '\n') 170 f.write(str(num_sims) + '\n') 171 if is_dominant: 172 f.write("%f %f\n" % beta) 173 f.write("%f\n" % max_freq) 174 f.close() 175 176 self._generate_intfile(data_dir) 177 178 if is_dominant: 179 bin_name = "Dfdist" 180 else: 181 bin_name = "fdist2" 182 proc = subprocess.Popen([self._get_path(bin_name)], cwd=data_dir, 183 universal_newlines=True, 184 stdin=subprocess.PIPE, stdout=subprocess.PIPE, 185 shell=(sys.platform != "win32")) 186 out, err = proc.communicate('y\n\n') 187 lines = out.split("\n") 188 for line in lines: 189 if line.startswith('average Fst'): 190 fst = my_float(line.rstrip().split(' ')[-1]) 191 return fst
192
193 - def run_fdist_force_fst(self, npops, nsamples, fst, sample_size, 194 mut=0, num_sims=50000, data_dir='.', 195 try_runs=5000, limit=0.001, is_dominant=False, 196 theta=0.06, beta=(0.25, 0.25), 197 max_freq=0.99):
198 """Executes fdist trying to force Fst. 199 200 Parameters: 201 202 - try_runs - Number of simulations on the part trying to get 203 Fst correct 204 - limit - Interval limit 205 206 Other parameters can be seen on run_fdist. 207 """ 208 max_run_fst = 1 209 min_run_fst = 0 210 current_run_fst = fst 211 while True: 212 real_fst = self.run_fdist(npops, nsamples, current_run_fst, 213 sample_size, mut, try_runs, data_dir, 214 is_dominant, theta, beta, max_freq) 215 if abs(real_fst - fst) < limit: 216 return self.run_fdist(npops, nsamples, current_run_fst, 217 sample_size, mut, num_sims, data_dir, 218 is_dominant, theta, beta, max_freq) 219 if real_fst > fst: 220 max_run_fst = current_run_fst 221 if current_run_fst < min_run_fst + limit: 222 # we can do no better 223 # debug('Lower limit is ' + str(min_run_fst)) 224 return self.run_fdist(npops, nsamples, current_run_fst, 225 sample_size, mut, num_sims, 226 data_dir) 227 current_run_fst = (min_run_fst + current_run_fst) / 2 228 else: 229 min_run_fst = current_run_fst 230 if current_run_fst > max_run_fst - limit: 231 return self.run_fdist(npops, nsamples, current_run_fst, 232 sample_size, mut, num_sims, 233 data_dir, is_dominant, theta, 234 beta, max_freq) 235 current_run_fst = (max_run_fst + current_run_fst) / 2
236
237 - def run_cplot(self, ci=0.95, data_dir='.', version=1, smooth=0.04):
238 """Executes cplot. 239 240 ci - Confidence interval. 241 data_dir - Where the data is found. 242 """ 243 244 self._generate_intfile(data_dir) 245 if version == 1: 246 cplot_name = "cplot" 247 else: 248 cplot_name = "cplot2" 249 proc = subprocess.Popen([self._get_path(cplot_name)], cwd=data_dir, 250 stdin=subprocess.PIPE, stdout=subprocess.PIPE, 251 shell=(sys.platform != "win32"), 252 universal_newlines=True) 253 if version == 1: 254 proc.communicate('out.dat out.cpl\n' + str(ci) + '\n') 255 else: 256 proc.communicate("\n".join([ 257 "data_fst_outfile out.cpl out.dat", 258 str(ci), str(smooth)])) 259 260 f = open(data_dir + os.sep + 'out.cpl') 261 conf_lines = [] 262 l = f.readline() 263 try: 264 while l != '': 265 conf_lines.append( 266 tuple(my_float(x) for x in l.rstrip().split(' '))) 267 l = f.readline() 268 except ValueError: 269 f.close() 270 return [] 271 f.close() 272 return conf_lines
273
274 - def run_pv(self, out_file='probs.dat', data_dir='.', 275 version=1, smooth=0.04):
276 """Executes pv. 277 278 out_file - Name of output file. 279 data_dir - Where the data is found. 280 """ 281 282 self._generate_intfile(data_dir) 283 284 if version == 1: 285 pv_name = "pv" 286 else: 287 pv_name = "pv2" 288 289 proc = subprocess.Popen([self._get_path(pv_name)], cwd=data_dir, 290 shell=(sys.platform != "win32"), 291 stdin=subprocess.PIPE, 292 stdout=subprocess.PIPE, 293 universal_newlines=True) 294 proc.communicate('data_fst_outfile ' + out_file + 295 ' out.dat\n' + str(smooth) + '\n') 296 pvf = open(data_dir + os.sep + out_file, 'r') 297 result = [tuple(my_float(y) for y in x.rstrip().split(' ')) for x in pvf.readlines()] 298 pvf.close() 299 return result
300