Coverage for /builds/kinetik161/ase/ase/ga/population.py: 26.18%
531 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
1""" Implementation of a population for maintaining a GA population and
2proposing structures to pair. """
3from math import exp, sqrt, tanh
4from operator import itemgetter
6import numpy as np
8from ase.db.core import now
9from ase.ga import get_raw_score
12def count_looks_like(a, all_cand, comp):
13 """Utility method for counting occurrences."""
14 n = 0
15 for b in all_cand:
16 if a.info['confid'] == b.info['confid']:
17 continue
18 if comp.looks_like(a, b):
19 n += 1
20 return n
23class Population:
24 """Population class which maintains the current population
25 and proposes which candidates to pair together.
27 Parameters:
29 data_connection: DataConnection object
30 Bla bla bla.
32 population_size: int
33 The number of candidates in the population.
35 comparator: Comparator object
36 this will tell if two configurations are equal.
37 Default compare atoms objects directly.
39 logfile: str
40 Text file that contains information about the population
41 The format is::
43 timestamp: generation(if available): id1,id2,id3...
45 Using this file greatly speeds up convergence checks.
46 Default None meaning that no file is written.
48 use_extinct: boolean
49 Set this to True if mass extinction and the extinct key
50 are going to be used. Default is False.
52 rng: Random number generator
53 By default numpy.random.
54 """
56 def __init__(self, data_connection, population_size,
57 comparator=None, logfile=None, use_extinct=False,
58 rng=np.random):
59 self.dc = data_connection
60 self.pop_size = population_size
61 if comparator is None:
62 from ase.ga.standard_comparators import AtomsComparator
63 comparator = AtomsComparator()
64 self.comparator = comparator
65 self.logfile = logfile
66 self.use_extinct = use_extinct
67 self.rng = rng
68 self.pop = []
69 self.pairs = None
70 self.all_cand = None
71 self.__initialize_pop__()
73 def __initialize_pop__(self):
74 """ Private method that initializes the population when
75 the population is created. """
77 # Get all relaxed candidates from the database
78 ue = self.use_extinct
79 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)
80 all_cand.sort(key=lambda x: x.info['key_value_pairs']['raw_score'],
81 reverse=True)
82 # all_cand.sort(key=lambda x: x.get_potential_energy())
84 # Fill up the population with the self.pop_size most stable
85 # unique candidates.
86 i = 0
87 while i < len(all_cand) and len(self.pop) < self.pop_size:
88 c = all_cand[i]
89 i += 1
90 eq = False
91 for a in self.pop:
92 if self.comparator.looks_like(a, c):
93 eq = True
94 break
95 if not eq:
96 self.pop.append(c)
98 for a in self.pop:
99 a.info['looks_like'] = count_looks_like(a, all_cand,
100 self.comparator)
102 self.all_cand = all_cand
103 self.__calc_participation__()
105 def __calc_participation__(self):
106 """ Determines, from the database, how many times each
107 candidate has been used to generate new candidates. """
108 (participation, pairs) = self.dc.get_participation_in_pairing()
109 for a in self.pop:
110 if a.info['confid'] in participation.keys():
111 a.info['n_paired'] = participation[a.info['confid']]
112 else:
113 a.info['n_paired'] = 0
114 self.pairs = pairs
116 def update(self, new_cand=None):
117 """ New candidates can be added to the database
118 after the population object has been created.
119 This method extracts these new candidates from the
120 database and includes them in the population. """
122 if len(self.pop) == 0:
123 self.__initialize_pop__()
125 if new_cand is None:
126 ue = self.use_extinct
127 new_cand = self.dc.get_all_relaxed_candidates(only_new=True,
128 use_extinct=ue)
130 for a in new_cand:
131 self.__add_candidate__(a)
132 self.all_cand.append(a)
133 self.__calc_participation__()
134 self._write_log()
136 def get_current_population(self):
137 """ Returns a copy of the current population. """
138 self.update()
139 return [a.copy() for a in self.pop]
141 def get_population_after_generation(self, gen):
142 """ Returns a copy of the population as it where
143 after generation gen"""
144 if self.logfile is not None:
145 fd = open(self.logfile)
146 gens = {}
147 for line in fd:
148 _, no, popul = line.split(':')
149 gens[int(no)] = [int(i) for i in popul.split(',')]
150 fd.close()
151 return [c.copy() for c in self.all_cand[::-1]
152 if c.info['relax_id'] in gens[gen]]
154 all_candidates = [c for c in self.all_cand
155 if c.info['key_value_pairs']['generation'] <= gen]
156 cands = [all_candidates[0]]
157 for b in all_candidates:
158 if b not in cands:
159 for a in cands:
160 if self.comparator.looks_like(a, b):
161 break
162 else:
163 cands.append(b)
164 pop = cands[:self.pop_size]
165 return [a.copy() for a in pop]
167 def __add_candidate__(self, a):
168 """ Adds a single candidate to the population. """
170 # check if the structure is too low in raw score
171 raw_score_a = get_raw_score(a)
172 raw_score_worst = get_raw_score(self.pop[-1])
173 if raw_score_a < raw_score_worst \
174 and len(self.pop) == self.pop_size:
175 return
177 # check if the new candidate should
178 # replace a similar structure in the population
179 for (i, b) in enumerate(self.pop):
180 if self.comparator.looks_like(a, b):
181 if get_raw_score(b) < raw_score_a:
182 del self.pop[i]
183 a.info['looks_like'] = count_looks_like(a,
184 self.all_cand,
185 self.comparator)
186 self.pop.append(a)
187 self.pop.sort(key=get_raw_score,
188 reverse=True)
189 return
191 # the new candidate needs to be added, so remove the highest
192 # energy one
193 if len(self.pop) == self.pop_size:
194 del self.pop[-1]
196 # add the new candidate
197 a.info['looks_like'] = count_looks_like(a,
198 self.all_cand,
199 self.comparator)
200 self.pop.append(a)
201 self.pop.sort(key=get_raw_score, reverse=True)
203 def __get_fitness__(self, indecies, with_history=True):
204 """Calculates the fitness using the formula from
205 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816
207 Sign change on the fitness compared to the formulation in the
208 abovementioned paper due to maximizing raw_score instead of
209 minimizing energy. (Set raw_score=-energy to optimize the energy)
210 """
212 scores = [get_raw_score(x) for x in self.pop]
213 min_s = min(scores)
214 max_s = max(scores)
215 T = min_s - max_s
216 if isinstance(indecies, int):
217 indecies = [indecies]
219 f = [0.5 * (1. - tanh(2. * (scores[i] - max_s) / T - 1.))
220 for i in indecies]
221 if with_history:
222 M = [float(self.pop[i].info['n_paired']) for i in indecies]
223 L = [float(self.pop[i].info['looks_like']) for i in indecies]
224 f = [f[i] * 1. / sqrt(1. + M[i]) * 1. / sqrt(1. + L[i])
225 for i in range(len(f))]
226 return f
228 def get_two_candidates(self, with_history=True):
229 """ Returns two candidates for pairing employing the
230 fitness criteria from
231 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816
232 and the roulete wheel selection scheme described in
233 R.L. Johnston Dalton Transactions,
234 Vol. 22, No. 22. (2003), pp. 4193-4207
235 """
237 if len(self.pop) < 2:
238 self.update()
240 if len(self.pop) < 2:
241 return None
243 fit = self.__get_fitness__(range(len(self.pop)), with_history)
244 fmax = max(fit)
245 c1 = self.pop[0]
246 c2 = self.pop[0]
247 used_before = False
248 while c1.info['confid'] == c2.info['confid'] and not used_before:
249 nnf = True
250 while nnf:
251 t = self.rng.randint(len(self.pop))
252 if fit[t] > self.rng.random() * fmax:
253 c1 = self.pop[t]
254 nnf = False
255 nnf = True
256 while nnf:
257 t = self.rng.randint(len(self.pop))
258 if fit[t] > self.rng.random() * fmax:
259 c2 = self.pop[t]
260 nnf = False
262 c1id = c1.info['confid']
263 c2id = c2.info['confid']
264 used_before = (min([c1id, c2id]), max([c1id, c2id])) in self.pairs
265 return (c1.copy(), c2.copy())
267 def get_one_candidate(self, with_history=True):
268 """Returns one candidate for mutation employing the
269 fitness criteria from
270 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816
271 and the roulete wheel selection scheme described in
272 R.L. Johnston Dalton Transactions,
273 Vol. 22, No. 22. (2003), pp. 4193-4207
274 """
275 if len(self.pop) < 1:
276 self.update()
278 if len(self.pop) < 1:
279 return None
281 fit = self.__get_fitness__(range(len(self.pop)), with_history)
282 fmax = max(fit)
283 nnf = True
284 while nnf:
285 t = self.rng.randint(len(self.pop))
286 if fit[t] > self.rng.random() * fmax:
287 c1 = self.pop[t]
288 nnf = False
290 return c1.copy()
292 def _write_log(self):
293 """Writes the population to a logfile.
295 The format is::
297 timestamp: generation(if available): id1,id2,id3..."""
298 if self.logfile is not None:
299 ids = [str(a.info['relax_id']) for a in self.pop]
300 if ids != []:
301 try:
302 gen_nums = [c.info['key_value_pairs']['generation']
303 for c in self.all_cand]
304 max_gen = max(gen_nums)
305 except KeyError:
306 max_gen = ' '
307 fd = open(self.logfile, 'a')
308 fd.write('{time}: {gen}: {pop}\n'.format(time=now(),
309 pop=','.join(ids),
310 gen=max_gen))
311 fd.close()
313 def is_uniform(self, func, min_std, pop=None):
314 """Tests whether the current population is uniform or diverse.
315 Returns True if uniform, False otherwise.
317 Parameters:
319 func: function
320 that takes one argument an atoms object and returns a value that
321 will be used for testing against the rest of the population.
323 min_std: int or float
324 The minimum standard deviation, if the population has a lower
325 std dev it is uniform.
327 pop: list, optional
328 use this list of Atoms objects instead of the current population.
329 """
330 if pop is None:
331 pop = self.pop
332 vals = [func(a) for a in pop]
333 stddev = np.std(vals)
334 if stddev < min_std:
335 return True
336 return False
338 def mass_extinction(self, ids):
339 """Kills every candidate in the database with gaid in the
340 supplied list of ids. Typically used on the main part of the current
341 population if the diversity is to small.
343 Parameters:
345 ids: list
346 list of ids of candidates to be killed.
348 """
349 for confid in ids:
350 self.dc.kill_candidate(confid)
351 self.pop = []
354class RandomPopulation(Population):
355 def __init__(self, data_connection, population_size,
356 comparator=None, logfile=None, exclude_used_pairs=False,
357 bad_candidates=0, use_extinct=False):
358 self.exclude_used_pairs = exclude_used_pairs
359 self.bad_candidates = bad_candidates
360 Population.__init__(self, data_connection, population_size,
361 comparator, logfile, use_extinct)
363 def __initialize_pop__(self):
364 """ Private method that initializes the population when
365 the population is created. """
367 # Get all relaxed candidates from the database
368 ue = self.use_extinct
369 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)
370 all_cand.sort(key=get_raw_score, reverse=True)
371 # all_cand.sort(key=lambda x: x.get_potential_energy())
373 if len(all_cand) > 0:
374 # Fill up the population with the self.pop_size most stable
375 # unique candidates.
376 ratings = []
377 best_raw = get_raw_score(all_cand[0])
378 i = 0
379 while i < len(all_cand):
380 c = all_cand[i]
381 i += 1
382 eq = False
383 for a in self.pop:
384 if self.comparator.looks_like(a, c):
385 eq = True
386 break
387 if not eq:
388 if len(self.pop) < self.pop_size - self.bad_candidates:
389 self.pop.append(c)
390 else:
391 exp_fact = exp(get_raw_score(c) / best_raw)
392 ratings.append([c, (exp_fact - 1) * self.rng.random()])
393 ratings.sort(key=itemgetter(1), reverse=True)
395 for i in range(self.bad_candidates):
396 self.pop.append(ratings[i][0])
398 for a in self.pop:
399 a.info['looks_like'] = count_looks_like(a, all_cand,
400 self.comparator)
402 self.all_cand = all_cand
403 self.__calc_participation__()
405 def update(self):
406 """ The update method in Population will add to the end of
407 the population, that can't be used here since we might have
408 bad candidates that need to stay in the population, therefore
409 just recalc the population every time. """
411 self.pop = []
412 self.__initialize_pop__()
414 self._write_log()
416 def get_one_candidate(self):
417 """Returns one candidates at random."""
418 if len(self.pop) < 1:
419 self.update()
421 if len(self.pop) < 1:
422 return None
424 t = self.rng.randint(len(self.pop))
425 c = self.pop[t]
427 return c.copy()
429 def get_two_candidates(self):
430 """Returns two candidates at random."""
431 if len(self.pop) < 2:
432 self.update()
434 if len(self.pop) < 2:
435 return None
437 c1 = self.pop[0]
438 c2 = self.pop[0]
439 used_before = False
440 while c1.info['confid'] == c2.info['confid'] and not used_before:
441 t = self.rng.randint(len(self.pop))
442 c1 = self.pop[t]
443 t = self.rng.randint(len(self.pop))
444 c2 = self.pop[t]
446 c1id = c1.info['confid']
447 c2id = c2.info['confid']
448 used_before = (tuple(sorted([c1id, c2id])) in self.pairs and
449 self.exclude_used_pairs)
450 return (c1.copy(), c2.copy())
453class FitnessSharingPopulation(Population):
454 """ Fitness sharing population that penalizes structures if they are
455 too similar. This is determined by a distance measure
457 Parameters:
459 comp_key: string
460 Key where the distance measure can be found in the
461 atoms.info['key_value_pairs'] dictionary.
463 threshold: float or int
464 Value above which no penalization of the fitness takes place
466 alpha_sh: float or int
467 Determines the shape of the sharing function.
468 Default is 1, which gives a linear sharing function.
470 """
472 def __init__(self, data_connection, population_size,
473 comp_key, threshold, alpha_sh=1.,
474 comparator=None, logfile=None, use_extinct=False):
475 self.comp_key = comp_key
476 self.dt = threshold # dissimilarity threshold
477 self.alpha_sh = alpha_sh
478 self.fit_scaling = 1.
480 self.sh_cache = {}
482 Population.__init__(self, data_connection, population_size,
483 comparator, logfile, use_extinct)
485 def __get_fitness__(self, candidates):
486 """Input should be sorted according to raw_score."""
487 max_s = get_raw_score(candidates[0])
488 min_s = get_raw_score(candidates[-1])
489 T = min_s - max_s
491 shared_fit = []
492 for c in candidates:
493 sc = get_raw_score(c)
494 obj_fit = 0.5 * (1. - tanh(2. * (sc - max_s) / T - 1.))
495 m = 1.
496 ck = c.info['key_value_pairs'][self.comp_key]
497 for other in candidates:
498 if other != c:
499 name = tuple(sorted([c.info['confid'],
500 other.info['confid']]))
501 if name not in self.sh_cache:
502 ok = other.info['key_value_pairs'][self.comp_key]
503 d = abs(ck - ok)
504 if d < self.dt:
505 v = 1 - (d / self.dt)**self.alpha_sh
506 self.sh_cache[name] = v
507 else:
508 self.sh_cache[name] = 0
509 m += self.sh_cache[name]
511 shf = (obj_fit ** self.fit_scaling) / m
512 shared_fit.append(shf)
513 return shared_fit
515 def update(self):
516 """ The update method in Population will add to the end of
517 the population, that can't be used here since the shared fitness
518 will change for all candidates when new are added, therefore
519 just recalc the population every time. """
521 self.pop = []
522 self.__initialize_pop__()
524 self._write_log()
526 def __initialize_pop__(self):
527 # Get all relaxed candidates from the database
528 ue = self.use_extinct
529 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)
530 all_cand.sort(key=get_raw_score, reverse=True)
532 if len(all_cand) > 0:
533 shared_fit = self.__get_fitness__(all_cand)
534 all_sorted = list(zip(*sorted(zip(shared_fit, all_cand),
535 reverse=True)))[1]
537 # Fill up the population with the self.pop_size most stable
538 # unique candidates.
539 i = 0
540 while i < len(all_sorted) and len(self.pop) < self.pop_size:
541 c = all_sorted[i]
542 i += 1
543 eq = False
544 for a in self.pop:
545 if self.comparator.looks_like(a, c):
546 eq = True
547 break
548 if not eq:
549 self.pop.append(c)
551 for a in self.pop:
552 a.info['looks_like'] = count_looks_like(a, all_cand,
553 self.comparator)
554 self.all_cand = all_cand
556 def get_two_candidates(self):
557 """ Returns two candidates for pairing employing the
558 fitness criteria from
559 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816
560 and the roulete wheel selection scheme described in
561 R.L. Johnston Dalton Transactions,
562 Vol. 22, No. 22. (2003), pp. 4193-4207
563 """
565 if len(self.pop) < 2:
566 self.update()
568 if len(self.pop) < 2:
569 return None
571 fit = self.__get_fitness__(self.pop)
572 fmax = max(fit)
573 c1 = self.pop[0]
574 c2 = self.pop[0]
575 while c1.info['confid'] == c2.info['confid']:
576 nnf = True
577 while nnf:
578 t = self.rng.randint(len(self.pop))
579 if fit[t] > self.rng.random() * fmax:
580 c1 = self.pop[t]
581 nnf = False
582 nnf = True
583 while nnf:
584 t = self.rng.randint(len(self.pop))
585 if fit[t] > self.rng.random() * fmax:
586 c2 = self.pop[t]
587 nnf = False
589 return (c1.copy(), c2.copy())
592class RankFitnessPopulation(Population):
593 """ Ranks the fitness relative to set variable to flatten the surface
594 in a certain direction such that mating across variable is equally
595 likely irrespective of raw_score.
597 Parameters:
599 variable_function: function
600 A function that takes as input an Atoms object and returns
601 the variable that differentiates the ranks.
603 exp_function: boolean
604 If True use an exponential function for ranking the fitness.
605 If False use the same as in Population. Default True.
607 exp_prefactor: float
608 The prefactor used in the exponential fitness scaling function.
609 Default 0.5
610 """
612 def __init__(self, data_connection, population_size, variable_function,
613 comparator=None, logfile=None, use_extinct=False,
614 exp_function=True, exp_prefactor=0.5):
615 self.exp_function = exp_function
616 self.exp_prefactor = exp_prefactor
617 self.vf = variable_function
618 # The current fitness is set at each update of the population
619 self.current_fitness = None
621 Population.__init__(self, data_connection, population_size,
622 comparator, logfile, use_extinct)
624 def get_rank(self, rcand, key=None):
625 # Set the initial order of the candidates, will need to
626 # be returned in this order at the end of ranking.
627 ordered = list(zip(range(len(rcand)), rcand))
629 # Niche and rank candidates.
630 rec_nic = []
631 rank_fit = []
632 for o, c in ordered:
633 if o not in rec_nic:
634 ntr = []
635 ce1 = self.vf(c)
636 rec_nic.append(o)
637 ntr.append([o, c])
638 for oother, cother in ordered:
639 if oother not in rec_nic:
640 ce2 = self.vf(cother)
641 if ce1 == ce2:
642 # put the now processed in oother
643 # in rec_nic as well
644 rec_nic.append(oother)
645 ntr.append([oother, cother])
646 # Each niche is sorted according to raw_score and
647 # assigned a fitness according to the ranking of
648 # the candidates
649 ntr.sort(key=lambda x: x[1].info['key_value_pairs'][key],
650 reverse=True)
651 start_rank = -1
652 cor = 0
653 for on, cn in ntr:
654 rank = start_rank - cor
655 rank_fit.append([on, cn, rank])
656 cor += 1
657 # The original order is reformed
658 rank_fit.sort(key=itemgetter(0), reverse=False)
659 return np.array(list(zip(*rank_fit))[2])
661 def __get_fitness__(self, candidates):
662 expf = self.exp_function
663 rfit = self.get_rank(candidates, key='raw_score')
665 if not expf:
666 rmax = max(rfit)
667 rmin = min(rfit)
668 T = rmin - rmax
669 # If using obj_rank probability, must have non-zero T val.
670 # pop_size must be greater than number of permutations.
671 # We test for this here
672 msg = "Equal fitness for best and worst candidate in the "
673 msg += "population! Fitness scaling is impossible! "
674 msg += "Try with a larger population."
675 assert T != 0., msg
676 return 0.5 * (1. - np.tanh(2. * (rfit - rmax) / T - 1.))
677 else:
678 return self.exp_prefactor ** (-rfit - 1)
680 def update(self):
681 """ The update method in Population will add to the end of
682 the population, that can't be used here since the fitness
683 will potentially change for all candidates when new are added,
684 therefore just recalc the population every time. """
686 self.pop = []
687 self.__initialize_pop__()
688 self.current_fitness = self.__get_fitness__(self.pop)
690 self._write_log()
692 def __initialize_pop__(self):
693 # Get all relaxed candidates from the database
694 ue = self.use_extinct
695 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)
696 all_cand.sort(key=get_raw_score, reverse=True)
698 if len(all_cand) > 0:
699 fitf = self.__get_fitness__(all_cand)
700 all_sorted = list(zip(fitf, all_cand))
701 all_sorted.sort(key=itemgetter(0), reverse=True)
702 sort_cand = []
703 for _, t2 in all_sorted:
704 sort_cand.append(t2)
705 all_sorted = sort_cand
707 # Fill up the population with the self.pop_size most stable
708 # unique candidates.
709 i = 0
710 while i < len(all_sorted) and len(self.pop) < self.pop_size:
711 c = all_sorted[i]
712 c_vf = self.vf(c)
713 i += 1
714 eq = False
715 for a in self.pop:
716 a_vf = self.vf(a)
717 # Only run comparator if the variable_function (self.vf)
718 # returns the same. If it returns something different the
719 # candidates are inherently different.
720 # This is done to speed up.
721 if a_vf == c_vf:
722 if self.comparator.looks_like(a, c):
723 eq = True
724 break
725 if not eq:
726 self.pop.append(c)
727 self.all_cand = all_cand
729 def get_two_candidates(self):
730 """ Returns two candidates for pairing employing the
731 roulete wheel selection scheme described in
732 R.L. Johnston Dalton Transactions,
733 Vol. 22, No. 22. (2003), pp. 4193-4207
734 """
736 if len(self.pop) < 2:
737 self.update()
739 if len(self.pop) < 2:
740 return None
742 # Use saved fitness
743 fit = self.current_fitness
744 fmax = max(fit)
745 c1 = self.pop[0]
746 c2 = self.pop[0]
747 while c1.info['confid'] == c2.info['confid']:
748 nnf = True
749 while nnf:
750 t = self.rng.randint(len(self.pop))
751 if fit[t] > self.rng.random() * fmax:
752 c1 = self.pop[t]
753 nnf = False
754 nnf = True
755 while nnf:
756 t = self.rng.randint(len(self.pop))
757 if fit[t] > self.rng.random() * fmax:
758 c2 = self.pop[t]
759 nnf = False
761 return (c1.copy(), c2.copy())
764class MultiObjectivePopulation(RankFitnessPopulation):
765 """ Allows for assignment of fitness based on a set of two variables
766 such that fitness is ranked according to a Pareto-front of
767 non-dominated candidates.
769 Parameters
770 ----------
771 abs_data: list
772 Set of key_value_pairs in atoms object for which fitness should
773 be assigned based on absolute value.
775 rank_data: list
776 Set of key_value_pairs in atoms object for which data should
777 be ranked in order to ascribe fitness.
779 variable_function: function
780 A function that takes as input an Atoms object and returns
781 the variable that differentiates the ranks. Only use if
782 data is ranked.
784 exp_function: boolean
785 If True use an exponential function for ranking the fitness.
786 If False use the same as in Population. Default True.
788 exp_prefactor: float
789 The prefactor used in the exponential fitness scaling function.
790 Default 0.5
792 """
794 def __init__(self, data_connection, population_size,
795 variable_function=None, comparator=None, logfile=None,
796 use_extinct=False, abs_data=None, rank_data=None,
797 exp_function=True, exp_prefactor=0.5):
798 # The current fitness is set at each update of the population
799 self.current_fitness = None
801 if rank_data is None:
802 rank_data = []
803 self.rank_data = rank_data
805 if abs_data is None:
806 abs_data = []
807 self.abs_data = abs_data
809 RankFitnessPopulation.__init__(self, data_connection, population_size,
810 variable_function, comparator, logfile,
811 use_extinct, exp_function,
812 exp_prefactor)
814 def get_nonrank(self, nrcand, key=None):
815 """"Returns a list of fitness values."""
816 nrc_list = []
817 for nrc in nrcand:
818 nrc_list.append(nrc.info['key_value_pairs'][key])
819 return nrc_list
821 def __get_fitness__(self, candidates):
822 # There are no defaults set for the datasets to be
823 # used in this function, as such we test that the
824 # user has specified at least two here.
825 msg = "This is a multi-objective fitness function"
826 msg += " so there must be at least two datasets"
827 msg += " stated in the rank_data and abs_data variables"
828 assert len(self.rank_data) + len(self.abs_data) >= 2, msg
830 expf = self.exp_function
832 all_fitnesses = []
833 used = set()
834 for rd in self.rank_data:
835 used.add(rd)
836 # Build ranked fitness based on rd
837 all_fitnesses.append(self.get_rank(candidates, key=rd))
839 for d in self.abs_data:
840 if d not in used:
841 used.add(d)
842 # Build fitness based on d
843 all_fitnesses.append(self.get_nonrank(candidates, key=d))
845 # Set the initial order of the ranks, will need to
846 # be returned in this order at the end.
847 fordered = list(zip(range(len(all_fitnesses[0])), *all_fitnesses))
848 mvf_rank = -1 # Start multi variable rank at -1.
849 rec_vrc = [] # A record of already ranked candidates.
850 mvf_list = [] # A list for all candidate ranks.
851 # Sort by raw_score_1 in case this is different from
852 # the stored raw_score() variable that all_cands are
853 # sorted by.
854 fordered.sort(key=itemgetter(1), reverse=True)
855 # Niche candidates with equal or better raw_score to
856 # current candidate.
857 for a in fordered:
858 order, rest = a[0], a[1:]
859 if order not in rec_vrc:
860 pff = []
861 pff2 = []
862 rec_vrc.append(order)
863 pff.append((order, rest))
864 for b in fordered:
865 border, brest = b[0], b[1:]
866 if border not in rec_vrc:
867 if np.any(np.array(brest) >= np.array(rest)):
868 pff.append((border, brest))
869 # Remove any candidate from pff list that is dominated
870 # by another in the list.
871 for na in pff:
872 norder, nrest = na[0], na[1:]
873 dom = False
874 for nb in pff:
875 nborder, nbrest = nb[0], nb[1:]
876 if norder != nborder:
877 if np.all(np.array(nbrest) > np.array(nrest)):
878 dom = True
879 if not dom:
880 pff2.append((norder, nrest))
881 # Assign pareto rank from -1 to -N niches.
882 for ffa in pff2:
883 fforder, ffrest = ffa[0], ffa[1:]
884 rec_vrc.append(fforder)
885 mvf_list.append((fforder, mvf_rank, ffrest))
886 mvf_rank = mvf_rank - 1
887 # The original order is reformed
888 mvf_list.sort(key=itemgetter(0), reverse=False)
889 rfro = np.array(list(zip(*mvf_list))[1])
891 if not expf:
892 rmax = max(rfro)
893 rmin = min(rfro)
894 T = rmin - rmax
895 # If using obj_rank probability, must have non-zero T val.
896 # pop_size must be greater than number of permutations.
897 # We test for this here
898 msg = "Equal fitness for best and worst candidate in the "
899 msg += "population! Fitness scaling is impossible! "
900 msg += "Try with a larger population."
901 assert T != 0., msg
902 return 0.5 * (1. - np.tanh(2. * (rfro - rmax) / T - 1.))
903 else:
904 return self.exp_prefactor ** (-rfro - 1)
906 def __initialize_pop__(self):
907 # Get all relaxed candidates from the database
908 ue = self.use_extinct
909 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)
910 all_cand.sort(key=get_raw_score, reverse=True)
912 if len(all_cand) > 0:
913 fitf = self.__get_fitness__(all_cand)
914 all_sorted = list(zip(fitf, all_cand))
915 all_sorted.sort(key=itemgetter(0), reverse=True)
916 sort_cand = []
917 for _, t2 in all_sorted:
918 sort_cand.append(t2)
919 all_sorted = sort_cand
921 # Fill up the population with the self.pop_size most stable
922 # unique candidates.
923 i = 0
924 while i < len(all_sorted) and len(self.pop) < self.pop_size:
925 c = all_sorted[i]
926 # Use variable_function to decide whether to run comparator
927 # if the function has been defined by the user. This does not
928 # need to be dependent on using the rank_data function.
929 if self.vf is not None:
930 c_vf = self.vf(c)
931 i += 1
932 eq = False
933 for a in self.pop:
934 if self.vf is not None:
935 a_vf = self.vf(a)
936 # Only run comparator if the variable_function
937 # (self.vf) returns the same. If it returns something
938 # different the candidates are inherently different.
939 # This is done to speed up.
940 if a_vf == c_vf:
941 if self.comparator.looks_like(a, c):
942 eq = True
943 break
944 else:
945 if self.comparator.looks_like(a, c):
946 eq = True
947 break
948 if not eq:
949 self.pop.append(c)
950 self.all_cand = all_cand