Package Bio :: Package PopGen :: Package GenePop
[hide private]
[frames] | no frames]

Source Code for Package Bio.PopGen.GenePop

  1  # Copyright 2007 by Tiago Antao.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  This module provides code to work with GenePop. 
  8   
  9  See http://wbiomed.curtin.edu.au/genepop/ , the format is documented 
 10  here: http://wbiomed.curtin.edu.au/genepop/help_input.html . 
 11   
 12  Classes: 
 13  Record           Holds GenePop data. 
 14   
 15  Functions: 
 16  read             Parses a GenePop record (file) into a Record object. 
 17   
 18   
 19  Partially inspired on MedLine Code. 
 20   
 21  """ 
 22  from copy import deepcopy 
 23   
 24  __docformat__ = "restructuredtext en" 
 25   
 26   
27 -def get_indiv(line):
28 def int_no_zero(val): 29 v = int(val) 30 if v == 0: 31 return None 32 return v
33 indiv_name, marker_line = line.split(',') 34 markers = marker_line.replace('\t', ' ').split(' ') 35 markers = [marker for marker in markers if marker != ''] 36 if len(markers[0]) in [2, 4]: # 2 digits per allele 37 marker_len = 2 38 else: 39 marker_len = 3 40 try: 41 allele_list = [(int_no_zero(marker[0:marker_len]), 42 int_no_zero(marker[marker_len:])) 43 for marker in markers] 44 except ValueError: # Haploid 45 allele_list = [(int_no_zero(marker[0:marker_len]),) 46 for marker in markers] 47 return indiv_name, allele_list, marker_len 48 49
50 -def read(handle):
51 """Parses a handle containing a GenePop file. 52 53 handle is a file-like object that contains a GenePop record. 54 """ 55 record = Record() 56 record.comment_line = str(next(handle)).rstrip() 57 # We can now have one loci per line or all loci in a single line 58 # separated by either space or comma+space... 59 # We will remove all commas on loci... that should not be a problem 60 sample_loci_line = str(next(handle)).rstrip().replace(',', '') 61 all_loci = sample_loci_line.split(' ') 62 record.loci_list.extend(all_loci) 63 for line in handle: 64 line = line.rstrip() 65 if line.upper() == 'POP': 66 break 67 record.loci_list.append(line) 68 else: 69 raise ValueError('No population data found, file probably not GenePop related') 70 record.populations.append([]) 71 for line in handle: 72 line = line.rstrip() 73 if line.upper() == 'POP': 74 record.populations.append([]) 75 else: 76 indiv_name, allele_list, record.marker_len = get_indiv(line) 77 record.populations[-1].append((indiv_name, allele_list)) 78 loci = record.loci_list 79 for pop in record.populations: 80 record.pop_list.append(pop[-1][0]) 81 for indiv in pop: 82 for mk_i in range(len(loci)): 83 mk_orig = indiv[1][mk_i] 84 mk_real = [] 85 for al in mk_orig: 86 if al == 0: 87 mk_real.append(None) 88 else: 89 mk_real.append(al) 90 indiv[1][mk_i] = tuple(mk_real) 91 return record
92 93
94 -class Record(object):
95 """Holds information from a GenePop record. 96 97 Members: 98 99 - marker_len The marker length (2 or 3 digit code per allele). 100 101 - comment_line Comment line. 102 103 - loci_list List of loci names. 104 105 - pop_list List of population names. 106 107 - populations List of population data. 108 109 In most genepop files, the population name is not trustable. 110 It is strongly recommended that populations are referred by index. 111 112 populations has one element per population. Each element is itself 113 a list of individuals, each individual is a pair composed by individual 114 name and a list of alleles (2 per marker or 1 for haploids): 115 Example:: 116 117 [ 118 [ 119 ('Ind1', [(1,2), (3,3), (200,201)], 120 ('Ind2', [(2,None), (3,3), (None,None)], 121 ], 122 [ 123 ('Other1', [(1,1), (4,3), (200,200)], 124 ] 125 ] 126 127 """
128 - def __init__(self):
129 self.marker_len = 0 130 self.comment_line = "" 131 self.loci_list = [] 132 self.pop_list = [] 133 self.populations = []
134
135 - def __str__(self):
136 """Returns (reconstructs) a GenePop textual representation. 137 """ 138 rep = [self.comment_line + '\n'] 139 rep.append('\n'.join(self.loci_list) + '\n') 140 for pop in self.populations: 141 rep.append('Pop\n') 142 for indiv in pop: 143 name, markers = indiv 144 rep.append(name) 145 rep.append(',') 146 for marker in markers: 147 rep.append(' ') 148 for al in marker: 149 if al is None: 150 al = '0' 151 aStr = str(al) 152 while len(aStr) < self.marker_len: 153 aStr = "".join(['0', aStr]) 154 rep.append(aStr) 155 rep.append('\n') 156 return "".join(rep)
157
158 - def split_in_pops(self, pop_names):
159 """Splits a GP record in a dictionary with 1 pop per entry. 160 161 Given a record with n pops and m loci returns a dictionary 162 of records (key pop_name) where each item is a record 163 with a single pop and m loci. 164 165 Parameters: 166 pop_names - Population names 167 """ 168 gp_pops = {} 169 for i in range(len(self.populations)): 170 gp_pop = Record() 171 gp_pop.marker_len = self.marker_len 172 gp_pop.comment_line = self.comment_line 173 gp_pop.loci_list = deepcopy(self.loci_list) 174 gp_pop.populations = [deepcopy(self.populations[i])] 175 gp_pops[pop_names[i]] = gp_pop 176 return gp_pops
177
178 - def split_in_loci(self, gp):
179 """Splits a GP record in a dictionary with 1 locus per entry. 180 181 Given a record with n pops and m loci returns a dictionary 182 of records (key locus name) where each item is a record 183 with a single locus and n pops. 184 """ 185 gp_loci = {} 186 for i in range(len(self.loci_list)): 187 gp_pop = Record() 188 gp_pop.marker_len = self.marker_len 189 gp_pop.comment_line = self.comment_line 190 gp_pop.loci_list = [self.loci_list[i]] 191 gp_pop.populations = [] 192 for pop in self.populations: 193 my_pop = [] 194 for indiv in pop: 195 my_pop.append((indiv[0], [indiv[1][i]])) 196 gp_pop.populations.append(my_pop) 197 gp_loci[gp_pop.loci_list[0]] = gp_pop 198 return gp_loci
199
200 - def remove_population(self, pos):
201 """Removes a population (by position). 202 """ 203 del self.populations[pos]
204
205 - def remove_locus_by_position(self, pos):
206 """Removes a locus by position. 207 """ 208 del self.loci_list[pos] 209 for pop in self.populations: 210 for indiv in pop: 211 name, loci = indiv 212 del loci[pos]
213
214 - def remove_locus_by_name(self, name):
215 """Removes a locus by name. 216 """ 217 for i in range(len(self.loci_list)): 218 if self.loci_list[i] == name: 219 self.remove_locus_by_position(i) 220 return
221 # If here than locus not existent... Maybe raise exception? 222 # Although it should be Ok... Just a boolean return, maybe? 223