1
2
3
4
5
6 """
7 Large file parsing of Genepop files
8
9 The standard parser loads the whole file into memory. This parser
10 provides an iterator over data.
11
12 Classes:
13 LargeRecord Holds GenePop data.
14
15 Functions:
16 read Parses a GenePop record (file) into a Record object.
17
18 """
19
20
22 indiv_name, marker_line = line.split(',')
23 markers = marker_line.replace('\t', ' ').split(' ')
24 markers = [marker for marker in markers if marker!='']
25 if len(markers[0]) in [2, 4]:
26 marker_len = 2
27 else:
28 marker_len = 3
29 try:
30 allele_list = [(int(marker[0:marker_len]),
31 int(marker[marker_len:]))
32 for marker in markers]
33 except ValueError:
34 allele_list = [(int(marker[0:marker_len]),)
35 for marker in markers]
36 return indiv_name, allele_list, marker_len
37
38
64
65
67 """Holds information from a GenePop record.
68
69 Members:
70 marker_len The marker length (2 or 3 digit code per allele).
71
72 comment_line Comment line.
73
74 loci_list List of loci names.
75
76 data_generator Iterates over population data.
77
78 The generator will only work once. If you want to read a handle
79 twice you have to re-open it!
80
81 data_generator can either be () - an empty tuple - marking a new
82 population or an individual. An individual is something like
83 ('Ind1', [(1,1), (3,None), (200,201)],
84 In the case above the individual is called Ind1,
85 has three diploid loci. For the second loci, one of the alleles
86 is unknown.
87
88 """
90 self.handle = handle
91 self.marker_len = 0
92 self.comment_line = ""
93 self.loci_list = []
94 self.populations = []
95 self.data_generator = None
96 self.stack = []
97
99 for handle in [self.stack, self.handle]:
100 for line in handle:
101 line = line.rstrip()
102 if line.upper()=='POP':
103 yield ()
104 else:
105 indiv_name, allele_list, marker_len = get_indiv(line)
106 clean_list = []
107 for locus in allele_list:
108 mk_real = []
109 for al in locus:
110 if al==0:
111 mk_real.append(None)
112 else:
113 mk_real.append(al)
114 clean_list.append(tuple(mk_real))
115 yield indiv_name, clean_list
116 raise StopIteration()
117