Package Bio :: Package PopGen :: Package GenePop
[hide private]
[frames] | no frames]

Source Code for Package Bio.PopGen.GenePop

  1  # Copyright 2007 by Tiago Antao.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  This module provides code to work with GenePop. 
  8   
  9  See http://wbiomed.curtin.edu.au/genepop/ , the format is documented 
 10  here: http://wbiomed.curtin.edu.au/genepop/help_input.html . 
 11   
 12  Classes: 
 13  Record           Holds GenePop data. 
 14   
 15  Functions: 
 16  read             Parses a GenePop record (file) into a Record object. 
 17   
 18   
 19  Partially inspired on MedLine Code. 
 20   
 21  """ 
 22  from copy import deepcopy 
 23   
 24   
25 -def get_indiv(line):
26 def int_no_zero(val): 27 v = int(val) 28 if v == 0: 29 return None 30 return v
31 indiv_name, marker_line = line.split(',') 32 markers = marker_line.replace('\t', ' ').split(' ') 33 markers = [marker for marker in markers if marker!=''] 34 if len(markers[0]) in [2, 4]: # 2 digits per allele 35 marker_len = 2 36 else: 37 marker_len = 3 38 try: 39 allele_list = [(int_no_zero(marker[0:marker_len]), 40 int_no_zero(marker[marker_len:])) 41 for marker in markers] 42 except ValueError: # Haploid 43 allele_list = [(int_no_zero(marker[0:marker_len]),) 44 for marker in markers] 45 return indiv_name, allele_list, marker_len 46 47
48 -def read(handle):
49 """Parses a handle containing a GenePop file. 50 51 handle is a file-like object that contains a GenePop record. 52 """ 53 record = Record() 54 record.comment_line = str(next(handle)).rstrip() 55 #We can now have one loci per line or all loci in a single line 56 #separated by either space or comma+space... 57 #We will remove all commas on loci... that should not be a problem 58 sample_loci_line = str(next(handle)).rstrip().replace(',', '') 59 all_loci = sample_loci_line.split(' ') 60 record.loci_list.extend(all_loci) 61 for line in handle: 62 line = line.rstrip() 63 if line.upper()=='POP': 64 break 65 record.loci_list.append(line) 66 else: 67 raise ValueError('No population data found, file probably not GenePop related') 68 record.populations.append([]) 69 for line in handle: 70 line = line.rstrip() 71 if line.upper()=='POP': 72 record.populations.append([]) 73 else: 74 indiv_name, allele_list, record.marker_len = get_indiv(line) 75 record.populations[-1].append((indiv_name, allele_list)) 76 loci = record.loci_list 77 for pop in record.populations: 78 record.pop_list.append(pop[-1][0]) 79 for indiv in pop: 80 for mk_i in range(len(loci)): 81 mk_orig = indiv[1][mk_i] 82 mk_real = [] 83 for al in mk_orig: 84 if al == 0: 85 mk_real.append(None) 86 else: 87 mk_real.append(al) 88 indiv[1][mk_i] = tuple(mk_real) 89 return record
90 91
92 -class Record(object):
93 """Holds information from a GenePop record. 94 95 Members: 96 marker_len The marker length (2 or 3 digit code per allele). 97 98 comment_line Comment line. 99 100 loci_list List of loci names. 101 102 pop_list List of population names. 103 104 populations List of population data. 105 106 In most genepop files, the population name is not trustable. 107 It is strongly recommended that populations are referred by index. 108 109 populations has one element per population. Each element is itself 110 a list of individuals, each individual is a pair composed by individual 111 name and a list of alleles (2 per marker or 1 for haploids): Example 112 [ 113 [ 114 ('Ind1', [(1,2), (3,3), (200,201)], 115 ('Ind2', [(2,None), (3,3), (None,None)], 116 ], 117 [ 118 ('Other1', [(1,1), (4,3), (200,200)], 119 ] 120 ] 121 122 """
123 - def __init__(self):
124 self.marker_len = 0 125 self.comment_line = "" 126 self.loci_list = [] 127 self.pop_list = [] 128 self.populations = []
129
130 - def __str__(self):
131 """Returns (reconstructs) a GenePop textual representation. 132 """ 133 rep = [self.comment_line + '\n'] 134 rep.append('\n'.join(self.loci_list) + '\n') 135 for pop in self.populations: 136 rep.append('Pop\n') 137 for indiv in pop: 138 name, markers = indiv 139 rep.append(name) 140 rep.append(',') 141 for marker in markers: 142 rep.append(' ') 143 for al in marker: 144 if al is None: 145 al = '0' 146 aStr = str(al) 147 while len(aStr)<self.marker_len: 148 aStr = "".join(['0', aStr]) 149 rep.append(aStr) 150 rep.append('\n') 151 return "".join(rep)
152
153 - def split_in_pops(self, pop_names):
154 """Splits a GP record in a dictionary with 1 pop per entry. 155 156 Given a record with n pops and m loci returns a dictionary 157 of records (key pop_name) where each item is a record 158 with a single pop and m loci. 159 160 Parameters: 161 pop_names - Population names 162 """ 163 gp_pops = {} 164 for i in range(len(self.populations)): 165 gp_pop = Record() 166 gp_pop.marker_len = self.marker_len 167 gp_pop.comment_line = self.comment_line 168 gp_pop.loci_list = deepcopy(self.loci_list) 169 gp_pop.populations = [deepcopy(self.populations[i])] 170 gp_pops[pop_names[i]] = gp_pop 171 return gp_pops
172
173 - def split_in_loci(self, gp):
174 """Splits a GP record in a dictionary with 1 locus per entry. 175 176 Given a record with n pops and m loci returns a dictionary 177 of records (key locus name) where each item is a record 178 with a single locus and n pops. 179 """ 180 gp_loci = {} 181 for i in range(len(self.loci_list)): 182 gp_pop = Record() 183 gp_pop.marker_len = self.marker_len 184 gp_pop.comment_line = self.comment_line 185 gp_pop.loci_list = [self.loci_list[i]] 186 gp_pop.populations = [] 187 for pop in self.populations: 188 my_pop = [] 189 for indiv in pop: 190 my_pop.append((indiv[0], [indiv[1][i]])) 191 gp_pop.populations.append(my_pop) 192 gp_loci[gp_pop.loci_list[0]] = gp_pop 193 return gp_loci
194
195 - def remove_population(self, pos):
196 """Removes a population (by position). 197 """ 198 del self.populations[pos]
199
200 - def remove_locus_by_position(self, pos):
201 """Removes a locus by position. 202 """ 203 del self.loci_list[pos] 204 for pop in self.populations: 205 for indiv in pop: 206 name, loci = indiv 207 del loci[pos]
208
209 - def remove_locus_by_name(self, name):
210 """Removes a locus by name. 211 """ 212 for i in range(len(self.loci_list)): 213 if self.loci_list[i] == name: 214 self.remove_locus_by_position(i) 215 return
216 #If here than locus not existent... Maybe raise exception? 217 # Although it should be Ok... Just a boolean return, maybe? 218