Package Bio :: Package PopGen :: Package GenePop
[hide private]
[frames] | no frames]

Source Code for Package Bio.PopGen.GenePop

  1  # Copyright 2007 by Tiago Antao.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  This module provides code to work with GenePop. 
  8   
  9  See http://wbiomed.curtin.edu.au/genepop/ , the format is documented 
 10  here: http://wbiomed.curtin.edu.au/genepop/help_input.html . 
 11   
 12  Classes: 
 13  Record           Holds GenePop data. 
 14   
 15  Functions: 
 16  read             Parses a GenePop record (file) into a Record object. 
 17   
 18   
 19  Partially inspired on MedLine Code. 
 20   
 21  """ 
 22  from copy import deepcopy 
 23   
 24  __docformat__ = "restructuredtext en" 
 25   
26 -def get_indiv(line):
27 def int_no_zero(val): 28 v = int(val) 29 if v == 0: 30 return None 31 return v
32 indiv_name, marker_line = line.split(',') 33 markers = marker_line.replace('\t', ' ').split(' ') 34 markers = [marker for marker in markers if marker!=''] 35 if len(markers[0]) in [2, 4]: # 2 digits per allele 36 marker_len = 2 37 else: 38 marker_len = 3 39 try: 40 allele_list = [(int_no_zero(marker[0:marker_len]), 41 int_no_zero(marker[marker_len:])) 42 for marker in markers] 43 except ValueError: # Haploid 44 allele_list = [(int_no_zero(marker[0:marker_len]),) 45 for marker in markers] 46 return indiv_name, allele_list, marker_len 47 48
49 -def read(handle):
50 """Parses a handle containing a GenePop file. 51 52 handle is a file-like object that contains a GenePop record. 53 """ 54 record = Record() 55 record.comment_line = str(next(handle)).rstrip() 56 # We can now have one loci per line or all loci in a single line 57 # separated by either space or comma+space... 58 # We will remove all commas on loci... that should not be a problem 59 sample_loci_line = str(next(handle)).rstrip().replace(',', '') 60 all_loci = sample_loci_line.split(' ') 61 record.loci_list.extend(all_loci) 62 for line in handle: 63 line = line.rstrip() 64 if line.upper()=='POP': 65 break 66 record.loci_list.append(line) 67 else: 68 raise ValueError('No population data found, file probably not GenePop related') 69 record.populations.append([]) 70 for line in handle: 71 line = line.rstrip() 72 if line.upper()=='POP': 73 record.populations.append([]) 74 else: 75 indiv_name, allele_list, record.marker_len = get_indiv(line) 76 record.populations[-1].append((indiv_name, allele_list)) 77 loci = record.loci_list 78 for pop in record.populations: 79 record.pop_list.append(pop[-1][0]) 80 for indiv in pop: 81 for mk_i in range(len(loci)): 82 mk_orig = indiv[1][mk_i] 83 mk_real = [] 84 for al in mk_orig: 85 if al == 0: 86 mk_real.append(None) 87 else: 88 mk_real.append(al) 89 indiv[1][mk_i] = tuple(mk_real) 90 return record
91 92
93 -class Record(object):
94 """Holds information from a GenePop record. 95 96 Members: 97 98 - marker_len The marker length (2 or 3 digit code per allele). 99 100 - comment_line Comment line. 101 102 - loci_list List of loci names. 103 104 - pop_list List of population names. 105 106 - populations List of population data. 107 108 In most genepop files, the population name is not trustable. 109 It is strongly recommended that populations are referred by index. 110 111 populations has one element per population. Each element is itself 112 a list of individuals, each individual is a pair composed by individual 113 name and a list of alleles (2 per marker or 1 for haploids): 114 Example:: 115 116 [ 117 [ 118 ('Ind1', [(1,2), (3,3), (200,201)], 119 ('Ind2', [(2,None), (3,3), (None,None)], 120 ], 121 [ 122 ('Other1', [(1,1), (4,3), (200,200)], 123 ] 124 ] 125 126 """
127 - def __init__(self):
128 self.marker_len = 0 129 self.comment_line = "" 130 self.loci_list = [] 131 self.pop_list = [] 132 self.populations = []
133
134 - def __str__(self):
135 """Returns (reconstructs) a GenePop textual representation. 136 """ 137 rep = [self.comment_line + '\n'] 138 rep.append('\n'.join(self.loci_list) + '\n') 139 for pop in self.populations: 140 rep.append('Pop\n') 141 for indiv in pop: 142 name, markers = indiv 143 rep.append(name) 144 rep.append(',') 145 for marker in markers: 146 rep.append(' ') 147 for al in marker: 148 if al is None: 149 al = '0' 150 aStr = str(al) 151 while len(aStr)<self.marker_len: 152 aStr = "".join(['0', aStr]) 153 rep.append(aStr) 154 rep.append('\n') 155 return "".join(rep)
156
157 - def split_in_pops(self, pop_names):
158 """Splits a GP record in a dictionary with 1 pop per entry. 159 160 Given a record with n pops and m loci returns a dictionary 161 of records (key pop_name) where each item is a record 162 with a single pop and m loci. 163 164 Parameters: 165 pop_names - Population names 166 """ 167 gp_pops = {} 168 for i in range(len(self.populations)): 169 gp_pop = Record() 170 gp_pop.marker_len = self.marker_len 171 gp_pop.comment_line = self.comment_line 172 gp_pop.loci_list = deepcopy(self.loci_list) 173 gp_pop.populations = [deepcopy(self.populations[i])] 174 gp_pops[pop_names[i]] = gp_pop 175 return gp_pops
176
177 - def split_in_loci(self, gp):
178 """Splits a GP record in a dictionary with 1 locus per entry. 179 180 Given a record with n pops and m loci returns a dictionary 181 of records (key locus name) where each item is a record 182 with a single locus and n pops. 183 """ 184 gp_loci = {} 185 for i in range(len(self.loci_list)): 186 gp_pop = Record() 187 gp_pop.marker_len = self.marker_len 188 gp_pop.comment_line = self.comment_line 189 gp_pop.loci_list = [self.loci_list[i]] 190 gp_pop.populations = [] 191 for pop in self.populations: 192 my_pop = [] 193 for indiv in pop: 194 my_pop.append((indiv[0], [indiv[1][i]])) 195 gp_pop.populations.append(my_pop) 196 gp_loci[gp_pop.loci_list[0]] = gp_pop 197 return gp_loci
198
199 - def remove_population(self, pos):
200 """Removes a population (by position). 201 """ 202 del self.populations[pos]
203
204 - def remove_locus_by_position(self, pos):
205 """Removes a locus by position. 206 """ 207 del self.loci_list[pos] 208 for pop in self.populations: 209 for indiv in pop: 210 name, loci = indiv 211 del loci[pos]
212
213 - def remove_locus_by_name(self, name):
214 """Removes a locus by name. 215 """ 216 for i in range(len(self.loci_list)): 217 if self.loci_list[i] == name: 218 self.remove_locus_by_position(i) 219 return
220 # If here than locus not existent... Maybe raise exception? 221 # Although it should be Ok... Just a boolean return, maybe? 222