Package Bio :: Package PopGen :: Package GenePop :: Module Controller
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.GenePop.Controller

  1  # Copyright 2009 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  This module allows to control GenePop. 
  8   
  9  """ 
 10   
 11  import os 
 12  import re 
 13  import shutil 
 14  import sys 
 15  import tempfile 
 16   
 17  from Bio.Application import AbstractCommandline, _Argument 
 18   
 19   
20 -def _gp_float(tok):
21 """Gets a float from a token, if it fails, returns the string. 22 """ 23 try: 24 return float(tok) 25 except ValueError: 26 return str(tok)
27 28
29 -def _gp_int(tok):
30 """Gets a int from a token, if it fails, returns the string. 31 """ 32 try: 33 return int(tok) 34 except ValueError: 35 return str(tok)
36 37
38 -def _read_allele_freq_table(f):
39 l = f.readline() 40 while ' --' not in l: 41 if l == "": 42 raise StopIteration 43 if 'No data' in l: 44 return None, None 45 l = f.readline() 46 alleles = [x for x in f.readline().rstrip().split(" ") if x != ''] 47 alleles = [_gp_int(x) for x in alleles] 48 l = f.readline().rstrip() 49 table = [] 50 while l != "": 51 line = [x for x in l.split(" ") if x != ''] 52 try: 53 table.append( 54 (line[0], [_gp_float(x) for x in line[1: -1]], 55 _gp_int(line[-1]))) 56 except ValueError: 57 table.append( 58 (line[0], [None] * len(alleles), 0)) 59 l = f.readline().rstrip() 60 return alleles, table
61 62
63 -def _read_table(f, funs):
64 table = [] 65 l = f.readline().rstrip() 66 while '---' not in l: 67 l = f.readline().rstrip() 68 l = f.readline().rstrip() 69 while '===' not in l and '---' not in l and l != "": 70 toks = [x for x in l.split(" ") if x != ""] 71 line = [] 72 for i in range(len(toks)): 73 try: 74 line.append(funs[i](toks[i])) 75 except ValueError: 76 line.append(toks[i]) # Could not cast 77 table.append(tuple(line)) 78 l = f.readline().rstrip() 79 return table
80 81
82 -def _read_triangle_matrix(f):
83 matrix = [] 84 l = f.readline().rstrip() 85 while l != "": 86 matrix.append( 87 [_gp_float(x) for x in [y for y in l.split(" ") if y != ""]]) 88 l = f.readline().rstrip() 89 return matrix
90 91
92 -def _read_headed_triangle_matrix(f):
93 matrix = {} 94 header = f.readline().rstrip() 95 if '---' in header or '===' in header: 96 header = f.readline().rstrip() 97 nlines = len([x for x in header.split(' ') if x != '']) - 1 98 for line_pop in range(nlines): 99 l = f.readline().rstrip() 100 vals = [x for x in l.split(' ')[1:] if x != ''] 101 clean_vals = [] 102 for val in vals: 103 try: 104 clean_vals.append(_gp_float(val)) 105 except ValueError: 106 clean_vals.append(None) 107 for col_pop in range(len(clean_vals)): 108 matrix[(line_pop + 1, col_pop)] = clean_vals[col_pop] 109 return matrix
110 111
112 -def _hw_func(stream, is_locus, has_fisher=False):
113 l = stream.readline() 114 if is_locus: 115 hook = "Locus " 116 else: 117 hook = " Pop : " 118 while l != "": 119 if l.startswith(hook): 120 stream.readline() 121 stream.readline() 122 stream.readline() 123 table = _read_table(stream, [str, _gp_float, _gp_float, _gp_float, _gp_float, _gp_int, str]) 124 #loci might mean pop if hook="Locus " 125 loci = {} 126 for entry in table: 127 if len(entry) < 3: 128 loci[entry[0]] = None 129 else: 130 locus, p, se, fis_wc, fis_rh, steps = entry[:-1] 131 if se == "-": 132 se = None 133 loci[locus] = p, se, fis_wc, fis_rh, steps 134 return loci 135 l = stream.readline() 136 #self.done = True 137 raise StopIteration
138 139
140 -class _FileIterator:
141 """Iterator which crawls over a stream of lines with a function. 142 143 The generator function is expected to yield a tuple, while 144 consuming input 145 """
146 - def __init__(self, func, fname, handle=None):
147 self.func = func 148 if handle is None: 149 self.stream = open(fname) 150 else: 151 # For special cases where calling code wants to 152 # seek into the file before starting: 153 self.stream = handle 154 self.fname = fname 155 self.done = False
156
157 - def __iter__(self):
158 if self.done: 159 self.done = True 160 raise StopIteration 161 return self
162
163 - def __next__(self):
164 return self.func(self)
165 166 if sys.version_info[0] < 3:
167 - def next(self):
168 """Python 2 style alias for Python 3 style __next__ method.""" 169 return self.__next__()
170
171 - def __del__(self):
172 self.stream.close() 173 try: 174 os.remove(self.fname) 175 except OSError: 176 #Jython seems to call the iterator twice 177 pass
178 179
180 -class _GenePopCommandline(AbstractCommandline):
181 """ Command Line Wrapper for GenePop. 182 """
183 - def __init__(self, genepop_dir=None, cmd='Genepop', **kwargs):
184 self.parameters = [ 185 _Argument(["command"], "GenePop option to be called", 186 is_required=True), 187 _Argument(["mode"], "Should allways be batch", is_required=True), 188 _Argument(["input"], "Input file", is_required=True), 189 _Argument(["Dememorization"], "Dememorization step"), 190 _Argument(["BatchNumber"], "Number of MCMC batches"), 191 _Argument(["BatchLength"], "Length of MCMC chains"), 192 _Argument(["HWtests"], "Enumeration or MCMC"), 193 _Argument(["IsolBDstatistic"], "IBD statistic (a or e)"), 194 _Argument(["MinimalDistance"], "Minimal IBD distance"), 195 _Argument(["GeographicScale"], "Log or Linear"), 196 ] 197 AbstractCommandline.__init__(self, cmd, **kwargs) 198 self.set_parameter("mode", "Mode=Batch")
199
200 - def set_menu(self, option_list):
201 """Sets the menu option. 202 203 Example set_menu([6,1]) = get all F statistics (menu 6.1) 204 """ 205 self.set_parameter("command", "MenuOptions=" + 206 ".".join(str(x) for x in option_list))
207
208 - def set_input(self, fname):
209 """Sets the input file name. 210 """ 211 self.set_parameter("input", "InputFile=" + fname)
212 213
214 -class GenePopController(object):
215 - def __init__(self, genepop_dir=None):
216 """Initializes the controller. 217 218 genepop_dir is the directory where GenePop is. 219 220 The binary should be called Genepop (capital G) 221 """ 222 self.controller = _GenePopCommandline(genepop_dir)
223
224 - def _get_opts(self, dememorization, batches, iterations, enum_test=None):
225 opts = {} 226 opts["Dememorization"] = dememorization 227 opts["BatchNumber"] = batches 228 opts["BatchLength"] = iterations 229 if enum_test is not None: 230 if enum_test is True: 231 opts["HWtests"] = "Enumeration" 232 else: 233 opts["HWtests"] = "MCMC" 234 return opts
235
236 - def _run_genepop(self, extensions, option, fname, opts={}):
237 cwd = os.getcwd() 238 temp_dir = tempfile.mkdtemp() 239 os.chdir(temp_dir) 240 self.controller.set_menu(option) 241 if os.path.isabs(fname): 242 self.controller.set_input(fname) 243 else: 244 self.controller.set_input(cwd + os.sep + fname) 245 for opt in opts: 246 self.controller.set_parameter(opt, opt + "=" + str(opts[opt])) 247 self.controller() # checks error level is zero 248 os.chdir(cwd) 249 shutil.rmtree(temp_dir) 250 return
251
252 - def _test_pop_hz_both(self, fname, type, ext, enum_test=True, 253 dememorization=10000, batches=20, 254 iterations=5000):
255 """Hardy-Weinberg test for heterozygote deficiency/excess. 256 257 Returns a population iterator containg 258 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 259 Some loci have a None if the info is not available 260 SE might be none (for enumerations) 261 """ 262 opts = self._get_opts(dememorization, batches, iterations, enum_test) 263 self._run_genepop([ext], [1, type], fname, opts) 264 265 def hw_func(self): 266 return _hw_func(self.stream, False)
267 268 return _FileIterator(hw_func, fname + ext)
269
270 - def _test_global_hz_both(self, fname, type, ext, enum_test=True, 271 dememorization=10000, batches=20, 272 iterations=5000):
273 """Global Hardy-Weinberg test for heterozygote deficiency/excess. 274 275 Returns a triple with: 276 A list per population containg 277 (pop_name, P-val, SE, switches) 278 Some pops have a None if the info is not available 279 SE might be none (for enumerations) 280 A list per loci containg 281 (locus_name, P-val, SE, switches) 282 Some loci have a None if the info is not available 283 SE might be none (for enumerations) 284 Overall results (P-val, SE, switches) 285 286 """ 287 opts = self._get_opts(dememorization, batches, iterations, enum_test) 288 self._run_genepop([ext], [1, type], fname, opts) 289 290 def hw_pop_func(self): 291 return _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float])
292 293 with open(fname + ext) as f1: 294 l = f1.readline() 295 while "by population" not in l: 296 l = f1.readline() 297 pop_p = _read_table(f1, [str, _gp_float, _gp_float, _gp_float]) 298 with open(fname + ext) as f2: 299 l = f2.readline() 300 while "by locus" not in l: 301 l = f2.readline() 302 loc_p = _read_table(f2, [str, _gp_float, _gp_float, _gp_float]) 303 with open(fname + ext) as f: 304 l = f.readline() 305 while "all locus" not in l: 306 l = f.readline() 307 f.readline() 308 f.readline() 309 f.readline() 310 f.readline() 311 l = f.readline().rstrip() 312 p, se, switches = tuple(_gp_float(x) for x in [y for y in l.split(" ") if y != ""]) 313 return pop_p, loc_p, (p, se, switches) 314 315 #1.1
316 - def test_pop_hz_deficiency(self, fname, enum_test=True, 317 dememorization=10000, batches=20, 318 iterations=5000):
319 """Hardy-Weinberg test for heterozygote deficiency. 320 321 Returns a population iterator containg 322 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 323 Some loci have a None if the info is not available 324 SE might be none (for enumerations) 325 """ 326 return self._test_pop_hz_both(fname, 1, ".D", enum_test, 327 dememorization, batches, iterations)
328 329 #1.2
330 - def test_pop_hz_excess(self, fname, enum_test=True, 331 dememorization=10000, batches=20, 332 iterations=5000):
333 """Hardy-Weinberg test for heterozygote deficiency. 334 335 Returns a population iterator containg 336 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 337 Some loci have a None if the info is not available 338 SE might be none (for enumerations) 339 """ 340 return self._test_pop_hz_both(fname, 2, ".E", enum_test, 341 dememorization, batches, iterations)
342 343 #1.3 P file
344 - def test_pop_hz_prob(self, fname, ext, enum_test=False, 345 dememorization=10000, batches=20, 346 iterations=5000):
347 """Hardy-Weinberg test based on probability. 348 349 Returns 2 iterators and a final tuple: 350 351 1. Returns a loci iterator containing 352 b. A dictionary[pop_pos]=(P-val, SE, Fis-WC, Fis-RH, steps) 353 Some pops have a None if the info is not available 354 SE might be none (for enumerations) 355 c. Result of Fisher's test (Chi2, deg freedom, prob) 356 2. Returns a population iterator containg 357 a. A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 358 Some loci have a None if the info is not available 359 SE might be none (for enumerations) 360 b. Result of Fisher's test (Chi2, deg freedom, prob) 361 3. (Chi2, deg freedom, prob) 362 """ 363 opts = self._get_opts(dememorization, batches, iterations, enum_test) 364 self._run_genepop([ext], [1, 3], fname, opts) 365 366 def hw_prob_loci_func(self): 367 return _hw_func(self.stream, True, True)
368 369 def hw_prob_pop_func(self): 370 return _hw_func(self.stream, False, True) 371 372 shutil.copyfile(fname + ".P", fname + ".P2") 373 374 return _FileIterator(hw_prob_loci_func, fname + ".P"), _FileIterator(hw_prob_pop_func, fname + ".P2") 375 376 #1.4
377 - def test_global_hz_deficiency(self, fname, enum_test=True, 378 dememorization=10000, batches=20, 379 iterations=5000):
380 """Global Hardy-Weinberg test for heterozygote deficiency. 381 382 Returns a triple with: 383 An list per population containg 384 (pop_name, P-val, SE, switches) 385 Some pops have a None if the info is not available 386 SE might be none (for enumerations) 387 An list per loci containg 388 (locus_name, P-val, SE, switches) 389 Some loci have a None if the info is not available 390 SE might be none (for enumerations) 391 Overall results (P-val, SE, switches) 392 """ 393 return self._test_global_hz_both(fname, 4, ".DG", enum_test, 394 dememorization, batches, iterations)
395 396 #1.5
397 - def test_global_hz_excess(self, fname, enum_test=True, 398 dememorization=10000, batches=20, 399 iterations=5000):
400 """Global Hardy-Weinberg test for heterozygote excess. 401 402 Returns a triple with: 403 An list per population containg 404 (pop_name, P-val, SE, switches) 405 Some pops have a None if the info is not available 406 SE might be none (for enumerations) 407 An list per loci containg 408 (locus_name, P-val, SE, switches) 409 Some loci have a None if the info is not available 410 SE might be none (for enumerations) 411 Overall results (P-val, SE, switches) 412 """ 413 return self._test_global_hz_both(fname, 5, ".EG", enum_test, 414 dememorization, batches, iterations)
415 416 #2.1
417 - def test_ld(self, fname, dememorization=10000, 418 batches=20, iterations=5000):
419 opts = self._get_opts(dememorization, batches, iterations) 420 self._run_genepop([".DIS"], [2, 1], fname, opts) 421 422 def ld_pop_func(self): 423 current_pop = None 424 l = self.stream.readline().rstrip() 425 if l == "": 426 self.done = True 427 raise StopIteration 428 toks = [x for x in l.split(" ") if x != ""] 429 pop, locus1, locus2 = toks[0], toks[1], toks[2] 430 if not hasattr(self, "start_locus1"): 431 start_locus1, start_locus2 = locus1, locus2 432 current_pop = -1 433 if locus1 == start_locus1 and locus2 == start_locus2: 434 current_pop += 1 435 if toks[3] == "No": 436 return current_pop, pop, (locus1, locus2), None 437 p, se, switches = _gp_float(toks[3]), _gp_float(toks[4]), _gp_int(toks[5]) 438 return current_pop, pop, (locus1, locus2), (p, se, switches)
439 440 def ld_func(self): 441 l = self.stream.readline().rstrip() 442 if l == "": 443 self.done = True 444 raise StopIteration 445 toks = [x for x in l.split(" ") if x != ""] 446 locus1, locus2 = toks[0], toks[2] 447 try: 448 chi2, df, p = _gp_float(toks[3]), _gp_int(toks[4]), _gp_float(toks[5]) 449 except ValueError: 450 return (locus1, locus2), None 451 return (locus1, locus2), (chi2, df, p) 452 453 f1 = open(fname + ".DIS") 454 l = f1.readline() 455 while "----" not in l: 456 l = f1.readline() 457 shutil.copyfile(fname + ".DIS", fname + ".DI2") 458 f2 = open(fname + ".DI2") 459 l = f2.readline() 460 while "Locus pair" not in l: 461 l = f2.readline() 462 while "----" not in l: 463 l = f2.readline() 464 return (_FileIterator(ld_pop_func, fname + ".DIS", f1), 465 _FileIterator(ld_func, fname + ".DI2", f2)) 466 467 #2.2
468 - def create_contingency_tables(self, fname):
469 raise NotImplementedError
470 471 #3.1 PR/GE files
472 - def test_genic_diff_all(self, fname, dememorization=10000, 473 batches=20, iterations=5000):
474 raise NotImplementedError
475 476 #3.2 PR2/GE2 files
477 - def test_genic_diff_pair(self, fname, dememorization=10000, 478 batches=20, iterations=5000):
479 raise NotImplementedError
480 481 #3.3 G files
482 - def test_genotypic_diff_all(self, fname, dememorization=10000, 483 batches=20, iterations=5000):
484 raise NotImplementedError
485 486 #3.4 2G2 files
487 - def test_genotypic_diff_pair(self, fname, dememorization=10000, 488 batches=20, iterations=5000):
489 raise NotImplementedError
490 491 #4
492 - def estimate_nm(self, fname):
493 self._run_genepop(["PRI"], [4], fname) 494 with open(fname + ".PRI") as f: 495 lines = f.readlines() # Small file, it is ok 496 for line in lines: 497 m = re.search("Mean sample size: ([.0-9]+)", line) 498 if m is not None: 499 mean_sample_size = _gp_float(m.group(1)) 500 m = re.search("Mean frequency of private alleles p\(1\)= ([.0-9]+)", line) 501 if m is not None: 502 mean_priv_alleles = _gp_float(m.group(1)) 503 m = re.search("N=10: ([.0-9]+)", line) 504 if m is not None: 505 mig10 = _gp_float(m.group(1)) 506 m = re.search("N=25: ([.0-9]+)", line) 507 if m is not None: 508 mig25 = _gp_float(m.group(1)) 509 m = re.search("N=50: ([.0-9]+)", line) 510 if m is not None: 511 mig50 = _gp_float(m.group(1)) 512 m = re.search("for size= ([.0-9]+)", line) 513 if m is not None: 514 mig_corrected = _gp_float(m.group(1)) 515 os.remove(fname + ".PRI") 516 return mean_sample_size, mean_priv_alleles, mig10, mig25, mig50, mig_corrected
517 518 #5.1
519 - def calc_allele_genotype_freqs(self, fname):
520 """Calculates allele and genotype frequencies per locus and per sample. 521 522 Parameters: 523 fname - file name 524 525 Returns tuple with 2 elements: 526 Population iterator with 527 population name 528 Locus dictionary with key = locus name and content tuple as 529 Genotype List with 530 (Allele1, Allele2, observed, expected) 531 (expected homozygotes, observed hm, 532 expected heterozygotes, observed ht) 533 Allele frequency/Fis dictionary with allele as key and 534 (count, frequency, Fis Weir & Cockerham) 535 Totals as a pair 536 count 537 Fis Weir & Cockerham, 538 Fis Robertson & Hill 539 Locus iterator with 540 Locus name 541 allele list 542 Population list with a triple 543 population name 544 list of allele frequencies in the same order as allele list above 545 number of genes 546 547 Will create a file called fname.INF 548 """ 549 self._run_genepop(["INF"], [5, 1], fname) 550 #First pass, general information 551 #num_loci = None 552 #num_pops = None 553 #with open(fname + ".INF") as f: 554 #l = f.readline() 555 #while (num_loci is None or num_pops is None) and l != '': 556 #m = re.search("Number of populations detected : ([0-9+])", l) 557 #if m is not None: 558 #num_pops = _gp_int(m.group(1)) 559 #m = re.search("Number of loci detected : ([0-9+])", l) 560 #if m is not None: 561 #num_loci = _gp_int(m.group(1)) 562 #l = f.readline() 563 564 def pop_parser(self): 565 if hasattr(self, "old_line"): 566 l = self.old_line 567 del self.old_line 568 else: 569 l = self.stream.readline() 570 loci_content = {} 571 while l != '': 572 l = l.rstrip() 573 if "Tables of allelic frequencies for each locus" in l: 574 return self.curr_pop, loci_content 575 match = re.match(".*Pop: (.+) Locus: (.+)", l) 576 if match is not None: 577 pop = match.group(1) 578 locus = match.group(2) 579 if not hasattr(self, "first_locus"): 580 self.first_locus = locus 581 if hasattr(self, "curr_pop"): 582 if self.first_locus == locus: 583 old_pop = self.curr_pop 584 #self.curr_pop = pop 585 self.old_line = l 586 del self.first_locus 587 del self.curr_pop 588 return old_pop, loci_content 589 self.curr_pop = pop 590 else: 591 l = self.stream.readline() 592 continue 593 geno_list = [] 594 l = self.stream.readline() 595 if "No data" in l: 596 continue 597 598 while "Genotypes Obs." not in l: 599 l = self.stream.readline() 600 601 while l != "\n": 602 m2 = re.match(" +([0-9]+) , ([0-9]+) *([0-9]+) *(.+)", l) 603 if m2 is not None: 604 geno_list.append((_gp_int(m2.group(1)), 605 _gp_int(m2.group(2)), 606 _gp_int(m2.group(3)), 607 _gp_float(m2.group(4)))) 608 else: 609 l = self.stream.readline() 610 continue 611 l = self.stream.readline() 612 613 while "Expected number of ho" not in l: 614 l = self.stream.readline() 615 expHo = _gp_float(l[38:]) 616 l = self.stream.readline() 617 obsHo = _gp_int(l[38:]) 618 l = self.stream.readline() 619 expHe = _gp_float(l[38:]) 620 l = self.stream.readline() 621 obsHe = _gp_int(l[38:]) 622 l = self.stream.readline() 623 624 while "Sample count" not in l: 625 l = self.stream.readline() 626 l = self.stream.readline() 627 freq_fis = {} 628 overall_fis = None 629 while "----" not in l: 630 vals = [x for x in l.rstrip().split(' ') if x != ''] 631 if vals[0] == "Tot": 632 overall_fis = (_gp_int(vals[1]), 633 _gp_float(vals[2]), 634 _gp_float(vals[3])) 635 else: 636 freq_fis[_gp_int(vals[0])] = (_gp_int(vals[1]), 637 _gp_float(vals[2]), 638 _gp_float(vals[3])) 639 l = self.stream.readline() 640 loci_content[locus] = (geno_list, 641 (expHo, obsHo, expHe, obsHe), 642 freq_fis, overall_fis) 643 self.done = True 644 raise StopIteration
645 646 def locus_parser(self): 647 l = self.stream.readline() 648 while l != "": 649 l = l.rstrip() 650 match = re.match(" Locus: (.+)", l) 651 if match is not None: 652 locus = match.group(1) 653 alleles, table = _read_allele_freq_table(self.stream) 654 return locus, alleles, table 655 l = self.stream.readline() 656 self.done = True 657 raise StopIteration 658 659 shutil.copyfile(fname + ".INF", fname + ".IN2") 660 pop_iter = _FileIterator(pop_parser, fname + ".INF") 661 locus_iter = _FileIterator(locus_parser, fname + ".IN2") 662 return (pop_iter, locus_iter) 663
664 - def _calc_diversities_fis(self, fname, ext):
665 self._run_genepop([ext], [5, 2], fname) 666 with open(fname + ext) as f: 667 l = f.readline() 668 while l != "": 669 l = l.rstrip() 670 if l.startswith("Statistics per sample over all loci with at least two individuals typed"): 671 avg_fis = _read_table(f, [str, _gp_float, _gp_float, _gp_float]) 672 avg_Qintra = _read_table(f, [str, _gp_float]) 673 l = f.readline() 674 675 def fis_func(self): 676 l = self.stream.readline() 677 while l != "": 678 l = l.rstrip() 679 m = re.search("Locus: (.+)", l) 680 if m is not None: 681 locus = m.group(1) 682 self.stream.readline() 683 if "No complete" in self.stream.readline(): 684 return locus, None 685 self.stream.readline() 686 fis_table = _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float]) 687 self.stream.readline() 688 avg_qinter, avg_fis = tuple(_gp_float(x) for x in 689 [y for y in self.stream.readline().split(" ") if y != ""]) 690 return locus, fis_table, avg_qinter, avg_fis 691 l = self.stream.readline() 692 self.done = True 693 raise StopIteration
694 695 return _FileIterator(fis_func, fname + ext), avg_fis, avg_Qintra 696 697 #5.2
698 - def calc_diversities_fis_with_identity(self, fname):
699 return self._calc_diversities_fis(fname, ".DIV")
700 701 #5.3
702 - def calc_diversities_fis_with_size(self, fname):
703 raise NotImplementedError
704 705 #6.1 Less genotype frequencies
706 - def calc_fst_all(self, fname):
707 """Executes GenePop and gets Fst/Fis/Fit (all populations) 708 709 Parameters: 710 fname - file name 711 712 Returns: 713 (multiLocusFis, multiLocusFst, multiLocus Fit), 714 Iterator of tuples 715 (Locus name, Fis, Fst, Fit, Qintra, Qinter) 716 717 Will create a file called fname.FST . 718 719 This does not return the genotype frequencies. 720 """ 721 self._run_genepop([".FST"], [6, 1], fname) 722 with open(fname + ".FST") as f: 723 l = f.readline() 724 while l != '': 725 if l.startswith(' All:'): 726 toks = [x for x in l.rstrip().split(' ') if x != ""] 727 try: 728 allFis = _gp_float(toks[1]) 729 except ValueError: 730 allFis = None 731 try: 732 allFst = _gp_float(toks[2]) 733 except ValueError: 734 allFst = None 735 try: 736 allFit = _gp_float(toks[3]) 737 except ValueError: 738 allFit = None 739 l = f.readline() 740 741 def proc(self): 742 if hasattr(self, "last_line"): 743 l = self.last_line 744 del self.last_line 745 else: 746 l = self.stream.readline() 747 locus = None 748 fis = None 749 fst = None 750 fit = None 751 qintra = None 752 qinter = None 753 while l != '': 754 l = l.rstrip() 755 if l.startswith(' Locus:'): 756 if locus is not None: 757 self.last_line = l 758 return locus, fis, fst, fit, qintra, qinter 759 else: 760 locus = l.split(':')[1].lstrip() 761 elif l.startswith('Fis^='): 762 fis = _gp_float(l.split(' ')[1]) 763 elif l.startswith('Fst^='): 764 fst = _gp_float(l.split(' ')[1]) 765 elif l.startswith('Fit^='): 766 fit = _gp_float(l.split(' ')[1]) 767 elif l.startswith('1-Qintra^='): 768 qintra = _gp_float(l.split(' ')[1]) 769 elif l.startswith('1-Qinter^='): 770 qinter = _gp_float(l.split(' ')[1]) 771 return locus, fis, fst, fit, qintra, qinter 772 l = self.stream.readline() 773 if locus is not None: 774 return locus, fis, fst, fit, qintra, qinter 775 self.stream.close() 776 self.done = True 777 raise StopIteration
778 return (allFis, allFst, allFit), _FileIterator(proc, fname + ".FST") 779 780 #6.2
781 - def calc_fst_pair(self, fname):
782 self._run_genepop([".ST2", ".MIG"], [6, 2], fname) 783 with open(fname + ".ST2") as f: 784 l = f.readline() 785 while l != "": 786 l = l.rstrip() 787 if l.startswith("Estimates for all loci"): 788 avg_fst = _read_headed_triangle_matrix(f) 789 l = f.readline() 790 791 def loci_func(self): 792 l = self.stream.readline() 793 while l != "": 794 l = l.rstrip() 795 m = re.search(" Locus: (.+)", l) 796 if m is not None: 797 locus = m.group(1) 798 matrix = _read_headed_triangle_matrix(self.stream) 799 return locus, matrix 800 l = self.stream.readline() 801 self.done = True 802 raise StopIteration
803 804 os.remove(fname + ".MIG") 805 return _FileIterator(loci_func, fname + ".ST2"), avg_fst 806 807 #6.3
808 - def calc_rho_all(self, fname):
809 raise NotImplementedError
810 811 #6.4
812 - def calc_rho_pair(self, fname):
813 raise NotImplementedError
814
815 - def _calc_ibd(self, fname, sub, stat="a", scale="Log", min_dist=0.00001):
816 """Calculates isolation by distance statistics 817 """ 818 self._run_genepop([".GRA", ".MIG", ".ISO"], [6, sub], 819 fname, opts={ 820 "MinimalDistance": min_dist, 821 "GeographicScale": scale, 822 "IsolBDstatistic": stat}) 823 with open(fname + ".ISO") as f: 824 f.readline() 825 f.readline() 826 f.readline() 827 f.readline() 828 estimate = _read_triangle_matrix(f) 829 f.readline() 830 f.readline() 831 distance = _read_triangle_matrix(f) 832 f.readline() 833 match = re.match("a = (.+), b = (.+)", f.readline().rstrip()) 834 a = _gp_float(match.group(1)) 835 b = _gp_float(match.group(2)) 836 f.readline() 837 f.readline() 838 match = re.match(" b=(.+)", f.readline().rstrip()) 839 bb = _gp_float(match.group(1)) 840 match = re.match(".*\[(.+) ; (.+)\]", f.readline().rstrip()) 841 bblow = _gp_float(match.group(1)) 842 bbhigh = _gp_float(match.group(2)) 843 os.remove(fname + ".MIG") 844 os.remove(fname + ".GRA") 845 os.remove(fname + ".ISO") 846 return estimate, distance, (a, b), (bb, bblow, bbhigh)
847 848 #6.5
849 - def calc_ibd_diplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
850 """Calculates isolation by distance statistics for diploid data. 851 852 See _calc_ibd for parameter details. 853 Note that each pop can only have a single individual and 854 the individual name has to be the sample coordinates. 855 """ 856 return self._calc_ibd(fname, 5, stat, scale, min_dist)
857 858 #6.6
859 - def calc_ibd_haplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
860 """Calculates isolation by distance statistics for haploid data. 861 862 See _calc_ibd for parameter details. 863 Note that each pop can only have a single individual and 864 the individual name has to be the sample coordinates. 865 """ 866 return self._calc_ibd(fname, 6, stat, scale, min_dist)
867