Package Bio :: Package GA :: Module Organism
[hide private]
[frames] | no frames]

Source Code for Module Bio.GA.Organism

  1  # This code is part of the Biopython distribution and governed by its 
  2  # license.  Please see the LICENSE file that should have been included 
  3  # as part of this package. 
  4  # 
  5   
  6  """Deal with an Organism in a Genetic Algorithm population. 
  7  """ 
  8  # standard modules 
  9  import sys  # for Python 3 hack 
 10  import random 
 11  import array 
 12   
 13  # Sequence objects from Biopython 
 14  from Bio.Seq import MutableSeq 
 15   
 16   
17 -def function_population(new_genome, num_organisms, fitness_calculator):
18 """Generate a population given a function to create genomes 19 20 Arguments: 21 22 o new_genome - A function or callable object that will return 23 a genome that can be used for a new organism. This new genome 24 should be a MutableSeq object with a specified alphabet. 25 26 o num_organisms - The number of individuals we want in the population. 27 28 o fitness_calculator -- A function that will calculate the fitness 29 of the organism when given the organisms genome. 30 """ 31 all_orgs = [] 32 33 for org_num in range(num_organisms): 34 cur_genome = new_genome() 35 all_orgs.append(Organism(cur_genome, fitness_calculator)) 36 37 return all_orgs
38 39
40 -def random_population(genome_alphabet, genome_size, num_organisms, 41 fitness_calculator):
42 """Generate a population of individuals with randomly set genomes. 43 44 Arguments: 45 46 o genome_alphabet -- An Alphabet object describing all of the 47 possible letters that could potentially be in the genome of an 48 organism. 49 50 o genome_size -- The size of each organisms genome. 51 52 o num_organism -- The number of organisms we want in the population. 53 54 o fitness_calculator -- A function that will calculate the fitness 55 of the organism when given the organisms genome. 56 """ 57 all_orgs = [] 58 59 # a random number generator to get letters for the genome 60 letter_rand = random.Random() 61 62 # figure out what type of characters are in the alphabet 63 if isinstance(genome_alphabet.letters[0], str): 64 if sys.version_info[0] == 3: 65 alphabet_type = "u" # Use unicode string on Python 3 66 else: 67 alphabet_type = "c" # Use byte string on Python 2 68 elif isinstance(genome_alphabet.letters[0], int): 69 alphabet_type = "i" 70 elif isinstance(genome_alphabet.letters[0], float): 71 alphabet_type = "d" 72 else: 73 raise ValueError( 74 "Alphabet type is unsupported: %s" % genome_alphabet.letters) 75 76 for org_num in range(num_organisms): 77 new_genome = MutableSeq(array.array(alphabet_type), genome_alphabet) 78 79 # generate the genome randomly 80 for gene_num in range(genome_size): 81 new_gene = letter_rand.choice(genome_alphabet.letters) 82 new_genome.append(new_gene) 83 84 # add the new organism with this genome 85 all_orgs.append(Organism(new_genome, fitness_calculator)) 86 87 return all_orgs
88 89
90 -class Organism(object):
91 """Represent a single individual in a population. 92 93 Attributes: 94 95 o genome -- The genome of the organism. This is a Bio.MutableSeq 96 object that has the sequence of the genome, and the alphabet 97 describing all elements that can be a part of the genome. 98 99 o fitness -- The calculate fitness of the organism. This fitness is 100 based on the last time it was calculated using the fitness_calculator. 101 So... the fitness could potentially be out of date with the real genome 102 if you are not careful to recalculate it after changes with 103 recalculate_fitness() 104 """
105 - def __init__(self, genome, fitness_calculator, start_fitness = None):
106 """Initialize an organism 107 108 Arguments: 109 110 o genome -- A MutableSeq object representing the sequence of the 111 genome. 112 113 o fitness_calculator -- A function that will calculate the fitness 114 of the organism when given the organisms genome. 115 116 o start_fitness - the starting fitness corresponding with the 117 given genome. If not supplied, the fitness will be calculated 118 using fitness_calculator. 119 """ 120 assert isinstance(genome, MutableSeq), "Genome must be a MutableSeq" 121 122 self.genome = genome 123 self._fitness_calc = fitness_calculator 124 125 # calculate the fitness of the genome 126 if start_fitness is None: 127 self.fitness = self._fitness_calc(self.genome) 128 else: 129 self.fitness = start_fitness
130
131 - def __str__(self):
132 """Provide a string output for debugging. 133 """ 134 return "Genome: %s; Fitness %s" % (str(self.genome), self.fitness)
135
136 - def __eq__(self, other):
137 """Compare organisms by their genomes (as strings of letters). 138 """ 139 # See Bio/Seq.py and the comments there about shifting to 140 # using simple string equality. Previously Seq objects used 141 # object equality, while MutableSeq objects used alphabet 142 # aware string equality. 143 return str(self.genome) == str(other.genome)
144
145 - def __ne__(self, other):
146 """Compare organisms by their genomes (as strings of letters). 147 """ 148 return str(self.genome) != str(other.genome)
149
150 - def __lt__(self, other):
151 """Compare organisms by their genomes (as strings of letters). 152 """ 153 return str(self.genome) < str(other.genome)
154
155 - def __le__(self, other):
156 """Compare organisms by their genomes (as strings of letters). 157 """ 158 return str(self.genome) <= str(other.genome)
159
160 - def __gt__(self, other):
161 """Compare organisms by their genomes (as strings of letters). 162 """ 163 return str(self.genome) > str(other.genome)
164
165 - def __ge__(self, other):
166 """Compare organisms by their genomes (as strings of letters). 167 """ 168 return str(self.genome) >= str(other.genome)
169
170 - def copy(self):
171 """Return a copy of the organism. 172 173 This makes it easy to duplicate an organism before changing it. 174 """ 175 copy_genome = self.genome[:] 176 return Organism(copy_genome, self._fitness_calc, self.fitness)
177
178 - def recalculate_fitness(self):
179 """Calculate and reset the fitness of the current genome 180 181 This should be called after the genome is updated to ensure that 182 fitness always stays in sync with the current genome. 183 """ 184 self.fitness = self._fitness_calc(self.genome)
185