Package Bio :: Package PopGen :: Package FDist :: Module Controller
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.FDist.Controller

  1  # Copyright 2007 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """This module allows you to control fdist. 
  7   
  8  This will allow you to call fdist and associated programs (cplot, 
  9  datacal, pv) by Mark Beaumont. 
 10   
 11  http://www.rubic.rdg.ac.uk/~mab/software.html (old) 
 12  http://www.maths.bris.ac.uk/~mamab/ (new) 
 13  """ 
 14   
 15  import os 
 16  import subprocess 
 17  import sys 
 18  from random import randint 
 19  from time import strftime, clock 
 20  # from logging import debug 
 21   
 22  __docformat__ = "restructuredtext en" 
 23   
24 -def my_float(f):
25 # Because of Jython, mostly 26 if f == "-nan": 27 f = "nan" 28 return float(f)
29 30
31 -class FDistController(object):
32 - def __init__(self, fdist_dir='', ext=None):
33 """Initializes the controller. 34 35 fdist_dir is the directory where fdist2 is. 36 ext is the extension of binaries (.exe on windows, 37 none on Unix) 38 39 """ 40 self.tmp_idx = 0 41 self.fdist_dir = fdist_dir 42 self.os_name = os.name 43 if sys.platform == 'win32': 44 py_ext = '.exe' 45 else: 46 py_ext = '' 47 if ext is None: 48 self.ext = py_ext 49 else: 50 self.ext = ext
51
52 - def _get_path(self, app):
53 """Returns the path to an fdist application. 54 55 Includes Path where fdist can be found plus executable extension. 56 """ 57 if self.fdist_dir == '': 58 return app + self.ext 59 else: 60 return os.sep.join([self.fdist_dir, app]) + self.ext
61
62 - def _get_temp_file(self):
63 """Gets a temporary file name. 64 65 Returns a temporary file name, if executing inside jython 66 tries to replace unexisting tempfile.mkstemp(). 67 """ 68 self.tmp_idx += 1 69 return strftime("%H%M%S") + str(int(clock() * 100)) + str(randint(0, 1000)) + str(self.tmp_idx)
70
71 - def run_datacal(self, data_dir='.', version=1, 72 crit_freq=0.99, p=0.5, beta=(0.25, 0.25)):
73 """Executes datacal. 74 75 data_dir - Where the data is found. 76 """ 77 if version == 1: 78 datacal_name = "datacal" 79 else: 80 datacal_name = "Ddatacal" 81 proc = subprocess.Popen([self._get_path(datacal_name)], 82 universal_newlines=True, 83 stdin=subprocess.PIPE, 84 shell=(sys.platform != "win32"), 85 stdout=subprocess.PIPE, cwd=data_dir) 86 if version == 1: 87 out, err = proc.communicate('a\n') 88 lines = out.split("\n") 89 fst_line = lines[0].rstrip().split(' ') 90 fst = my_float(fst_line[4]) 91 sample_line = lines[1].rstrip().split(' ') 92 sample = int(sample_line[9]) 93 else: 94 out, err = proc.communicate('%f\n%f\n%f %f\na\n' % ( 95 crit_freq, p, beta[0], beta[1])) 96 lines = out.split("\n") 97 l = lines[0].rstrip().split(" ") 98 loci, pops = int(l[-5]), int(l[-2]) 99 fst_line = lines[1].rstrip().split(' ') 100 fst = my_float(fst_line[4]) 101 sample_line = lines[2].rstrip().split(' ') 102 sample = int(sample_line[9]) 103 F_line = lines[3].rstrip().split(' ') 104 F, obs = my_float(F_line[5]), int(F_line[8]) 105 if version == 1: 106 return fst, sample 107 else: 108 return fst, sample, loci, pops, F, obs
109
110 - def _generate_intfile(self, data_dir):
111 """Generates an INTFILE. 112 113 Parameter: 114 data_dir - data directory 115 """ 116 inf = open(data_dir + os.sep + 'INTFILE', 'w') 117 for i in range(98): 118 inf.write(str(randint(-2**31 + 1, 2**31 - 1)) + '\n') 119 inf.write('8\n') 120 inf.close()
121
122 - def run_fdist(self, npops, nsamples, fst, sample_size, 123 mut=0, num_sims=50000, data_dir='.', 124 is_dominant=False, theta=0.06, beta=(0.25, 0.25), 125 max_freq=0.99):
126 """Executes (d)fdist. 127 128 Parameters: 129 130 - npops - Number of populations 131 - nsamples - Number of populations sampled 132 - fst - expected Fst 133 - sample_size - Sample size per population 134 For dfdist: if zero a sample size file has to be provided 135 - mut - 1=Stepwise, 0=Infinite allele 136 - num_sims - number of simulations 137 - data_dir - Where the data is found 138 - is_dominant - If true executes dfdist 139 - theta - Theta (=2Nmu) 140 - beta - Parameters for the beta prior 141 - max_freq - Maximum allowed frequency of the commonest allele 142 143 Returns: 144 145 - fst - Average Fst 146 147 Important Note: This can take quite a while to run! 148 """ 149 if fst >= 0.9: 150 # Lets not joke 151 fst = 0.899 152 if fst <= 0.0: 153 # 0 will make fdist run forever 154 fst = 0.001 155 if is_dominant: 156 config_name = "Dfdist_params" 157 else: 158 config_name = "fdist_params2.dat" 159 160 f = open(data_dir + os.sep + config_name, 'w') 161 f.write(str(npops) + '\n') 162 f.write(str(nsamples) + '\n') 163 f.write(str(fst) + '\n') 164 f.write(str(sample_size) + '\n') 165 if is_dominant: 166 f.write(str(theta) + '\n') 167 else: 168 f.write(str(mut) + '\n') 169 f.write(str(num_sims) + '\n') 170 if is_dominant: 171 f.write("%f %f\n" % beta) 172 f.write("%f\n" % max_freq) 173 f.close() 174 175 self._generate_intfile(data_dir) 176 177 if is_dominant: 178 bin_name = "Dfdist" 179 else: 180 bin_name = "fdist2" 181 proc = subprocess.Popen([self._get_path(bin_name)], cwd=data_dir, 182 universal_newlines=True, 183 stdin=subprocess.PIPE, stdout=subprocess.PIPE, 184 shell=(sys.platform != "win32")) 185 out, err = proc.communicate('y\n\n') 186 lines = out.split("\n") 187 for line in lines: 188 if line.startswith('average Fst'): 189 fst = my_float(line.rstrip().split(' ')[-1]) 190 return fst
191
192 - def run_fdist_force_fst(self, npops, nsamples, fst, sample_size, 193 mut=0, num_sims=50000, data_dir='.', 194 try_runs=5000, limit=0.001, is_dominant=False, 195 theta=0.06, beta=(0.25, 0.25), 196 max_freq=0.99):
197 """Executes fdist trying to force Fst. 198 199 Parameters: 200 201 - try_runs - Number of simulations on the part trying to get 202 Fst correct 203 - limit - Interval limit 204 205 Other parameters can be seen on run_fdist. 206 """ 207 max_run_fst = 1 208 min_run_fst = 0 209 current_run_fst = fst 210 while True: 211 real_fst = self.run_fdist(npops, nsamples, current_run_fst, 212 sample_size, mut, try_runs, data_dir, 213 is_dominant, theta, beta, max_freq) 214 if abs(real_fst - fst) < limit: 215 return self.run_fdist(npops, nsamples, current_run_fst, 216 sample_size, mut, num_sims, data_dir, 217 is_dominant, theta, beta, max_freq) 218 if real_fst > fst: 219 max_run_fst = current_run_fst 220 if current_run_fst < min_run_fst + limit: 221 # we can do no better 222 # debug('Lower limit is ' + str(min_run_fst)) 223 return self.run_fdist(npops, nsamples, current_run_fst, 224 sample_size, mut, num_sims, 225 data_dir) 226 current_run_fst = (min_run_fst + current_run_fst) / 2 227 else: 228 min_run_fst = current_run_fst 229 if current_run_fst > max_run_fst - limit: 230 return self.run_fdist(npops, nsamples, current_run_fst, 231 sample_size, mut, num_sims, 232 data_dir, is_dominant, theta, 233 beta, max_freq) 234 current_run_fst = (max_run_fst + current_run_fst) / 2
235
236 - def run_cplot(self, ci=0.95, data_dir='.', version=1, smooth=0.04):
237 """Executes cplot. 238 239 ci - Confidence interval. 240 data_dir - Where the data is found. 241 """ 242 243 self._generate_intfile(data_dir) 244 if version == 1: 245 cplot_name = "cplot" 246 else: 247 cplot_name = "cplot2" 248 proc = subprocess.Popen([self._get_path(cplot_name)], cwd=data_dir, 249 stdin=subprocess.PIPE, stdout=subprocess.PIPE, 250 shell=(sys.platform != "win32"), 251 universal_newlines=True) 252 if version == 1: 253 proc.communicate('out.dat out.cpl\n' + str(ci) + '\n') 254 else: 255 proc.communicate("\n".join([ 256 "data_fst_outfile out.cpl out.dat", 257 str(ci), str(smooth)])) 258 259 f = open(data_dir + os.sep + 'out.cpl') 260 conf_lines = [] 261 l = f.readline() 262 try: 263 while l != '': 264 conf_lines.append( 265 tuple(my_float(x) for x in l.rstrip().split(' '))) 266 l = f.readline() 267 except ValueError: 268 f.close() 269 return [] 270 f.close() 271 return conf_lines
272
273 - def run_pv(self, out_file='probs.dat', data_dir='.', 274 version=1, smooth=0.04):
275 """Executes pv. 276 277 out_file - Name of output file. 278 data_dir - Where the data is found. 279 """ 280 281 self._generate_intfile(data_dir) 282 283 if version == 1: 284 pv_name = "pv" 285 else: 286 pv_name = "pv2" 287 288 proc = subprocess.Popen([self._get_path(pv_name)], cwd=data_dir, 289 shell=(sys.platform != "win32"), 290 stdin=subprocess.PIPE, 291 stdout=subprocess.PIPE, 292 universal_newlines=True) 293 proc.communicate('data_fst_outfile ' + out_file + 294 ' out.dat\n' + str(smooth) + '\n') 295 pvf = open(data_dir + os.sep + out_file, 'r') 296 result = [tuple(my_float(y) for y in x.rstrip().split(' ')) for x in pvf.readlines()] 297 pvf.close() 298 return result
299