Coverage for /builds/kinetik161/ase/ase/ga/population.py: 26.18%

531 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-12-10 11:04 +0000

1""" Implementation of a population for maintaining a GA population and 

2proposing structures to pair. """ 

3from math import exp, sqrt, tanh 

4from operator import itemgetter 

5 

6import numpy as np 

7 

8from ase.db.core import now 

9from ase.ga import get_raw_score 

10 

11 

12def count_looks_like(a, all_cand, comp): 

13 """Utility method for counting occurrences.""" 

14 n = 0 

15 for b in all_cand: 

16 if a.info['confid'] == b.info['confid']: 

17 continue 

18 if comp.looks_like(a, b): 

19 n += 1 

20 return n 

21 

22 

23class Population: 

24 """Population class which maintains the current population 

25 and proposes which candidates to pair together. 

26 

27 Parameters: 

28 

29 data_connection: DataConnection object 

30 Bla bla bla. 

31 

32 population_size: int 

33 The number of candidates in the population. 

34 

35 comparator: Comparator object 

36 this will tell if two configurations are equal. 

37 Default compare atoms objects directly. 

38 

39 logfile: str 

40 Text file that contains information about the population 

41 The format is:: 

42 

43 timestamp: generation(if available): id1,id2,id3... 

44 

45 Using this file greatly speeds up convergence checks. 

46 Default None meaning that no file is written. 

47 

48 use_extinct: boolean 

49 Set this to True if mass extinction and the extinct key 

50 are going to be used. Default is False. 

51 

52 rng: Random number generator 

53 By default numpy.random. 

54 """ 

55 

56 def __init__(self, data_connection, population_size, 

57 comparator=None, logfile=None, use_extinct=False, 

58 rng=np.random): 

59 self.dc = data_connection 

60 self.pop_size = population_size 

61 if comparator is None: 

62 from ase.ga.standard_comparators import AtomsComparator 

63 comparator = AtomsComparator() 

64 self.comparator = comparator 

65 self.logfile = logfile 

66 self.use_extinct = use_extinct 

67 self.rng = rng 

68 self.pop = [] 

69 self.pairs = None 

70 self.all_cand = None 

71 self.__initialize_pop__() 

72 

73 def __initialize_pop__(self): 

74 """ Private method that initializes the population when 

75 the population is created. """ 

76 

77 # Get all relaxed candidates from the database 

78 ue = self.use_extinct 

79 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

80 all_cand.sort(key=lambda x: x.info['key_value_pairs']['raw_score'], 

81 reverse=True) 

82 # all_cand.sort(key=lambda x: x.get_potential_energy()) 

83 

84 # Fill up the population with the self.pop_size most stable 

85 # unique candidates. 

86 i = 0 

87 while i < len(all_cand) and len(self.pop) < self.pop_size: 

88 c = all_cand[i] 

89 i += 1 

90 eq = False 

91 for a in self.pop: 

92 if self.comparator.looks_like(a, c): 

93 eq = True 

94 break 

95 if not eq: 

96 self.pop.append(c) 

97 

98 for a in self.pop: 

99 a.info['looks_like'] = count_looks_like(a, all_cand, 

100 self.comparator) 

101 

102 self.all_cand = all_cand 

103 self.__calc_participation__() 

104 

105 def __calc_participation__(self): 

106 """ Determines, from the database, how many times each 

107 candidate has been used to generate new candidates. """ 

108 (participation, pairs) = self.dc.get_participation_in_pairing() 

109 for a in self.pop: 

110 if a.info['confid'] in participation.keys(): 

111 a.info['n_paired'] = participation[a.info['confid']] 

112 else: 

113 a.info['n_paired'] = 0 

114 self.pairs = pairs 

115 

116 def update(self, new_cand=None): 

117 """ New candidates can be added to the database 

118 after the population object has been created. 

119 This method extracts these new candidates from the 

120 database and includes them in the population. """ 

121 

122 if len(self.pop) == 0: 

123 self.__initialize_pop__() 

124 

125 if new_cand is None: 

126 ue = self.use_extinct 

127 new_cand = self.dc.get_all_relaxed_candidates(only_new=True, 

128 use_extinct=ue) 

129 

130 for a in new_cand: 

131 self.__add_candidate__(a) 

132 self.all_cand.append(a) 

133 self.__calc_participation__() 

134 self._write_log() 

135 

136 def get_current_population(self): 

137 """ Returns a copy of the current population. """ 

138 self.update() 

139 return [a.copy() for a in self.pop] 

140 

141 def get_population_after_generation(self, gen): 

142 """ Returns a copy of the population as it where 

143 after generation gen""" 

144 if self.logfile is not None: 

145 fd = open(self.logfile) 

146 gens = {} 

147 for line in fd: 

148 _, no, popul = line.split(':') 

149 gens[int(no)] = [int(i) for i in popul.split(',')] 

150 fd.close() 

151 return [c.copy() for c in self.all_cand[::-1] 

152 if c.info['relax_id'] in gens[gen]] 

153 

154 all_candidates = [c for c in self.all_cand 

155 if c.info['key_value_pairs']['generation'] <= gen] 

156 cands = [all_candidates[0]] 

157 for b in all_candidates: 

158 if b not in cands: 

159 for a in cands: 

160 if self.comparator.looks_like(a, b): 

161 break 

162 else: 

163 cands.append(b) 

164 pop = cands[:self.pop_size] 

165 return [a.copy() for a in pop] 

166 

167 def __add_candidate__(self, a): 

168 """ Adds a single candidate to the population. """ 

169 

170 # check if the structure is too low in raw score 

171 raw_score_a = get_raw_score(a) 

172 raw_score_worst = get_raw_score(self.pop[-1]) 

173 if raw_score_a < raw_score_worst \ 

174 and len(self.pop) == self.pop_size: 

175 return 

176 

177 # check if the new candidate should 

178 # replace a similar structure in the population 

179 for (i, b) in enumerate(self.pop): 

180 if self.comparator.looks_like(a, b): 

181 if get_raw_score(b) < raw_score_a: 

182 del self.pop[i] 

183 a.info['looks_like'] = count_looks_like(a, 

184 self.all_cand, 

185 self.comparator) 

186 self.pop.append(a) 

187 self.pop.sort(key=get_raw_score, 

188 reverse=True) 

189 return 

190 

191 # the new candidate needs to be added, so remove the highest 

192 # energy one 

193 if len(self.pop) == self.pop_size: 

194 del self.pop[-1] 

195 

196 # add the new candidate 

197 a.info['looks_like'] = count_looks_like(a, 

198 self.all_cand, 

199 self.comparator) 

200 self.pop.append(a) 

201 self.pop.sort(key=get_raw_score, reverse=True) 

202 

203 def __get_fitness__(self, indecies, with_history=True): 

204 """Calculates the fitness using the formula from 

205 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816 

206 

207 Sign change on the fitness compared to the formulation in the 

208 abovementioned paper due to maximizing raw_score instead of 

209 minimizing energy. (Set raw_score=-energy to optimize the energy) 

210 """ 

211 

212 scores = [get_raw_score(x) for x in self.pop] 

213 min_s = min(scores) 

214 max_s = max(scores) 

215 T = min_s - max_s 

216 if isinstance(indecies, int): 

217 indecies = [indecies] 

218 

219 f = [0.5 * (1. - tanh(2. * (scores[i] - max_s) / T - 1.)) 

220 for i in indecies] 

221 if with_history: 

222 M = [float(self.pop[i].info['n_paired']) for i in indecies] 

223 L = [float(self.pop[i].info['looks_like']) for i in indecies] 

224 f = [f[i] * 1. / sqrt(1. + M[i]) * 1. / sqrt(1. + L[i]) 

225 for i in range(len(f))] 

226 return f 

227 

228 def get_two_candidates(self, with_history=True): 

229 """ Returns two candidates for pairing employing the 

230 fitness criteria from 

231 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816 

232 and the roulete wheel selection scheme described in 

233 R.L. Johnston Dalton Transactions, 

234 Vol. 22, No. 22. (2003), pp. 4193-4207 

235 """ 

236 

237 if len(self.pop) < 2: 

238 self.update() 

239 

240 if len(self.pop) < 2: 

241 return None 

242 

243 fit = self.__get_fitness__(range(len(self.pop)), with_history) 

244 fmax = max(fit) 

245 c1 = self.pop[0] 

246 c2 = self.pop[0] 

247 used_before = False 

248 while c1.info['confid'] == c2.info['confid'] and not used_before: 

249 nnf = True 

250 while nnf: 

251 t = self.rng.randint(len(self.pop)) 

252 if fit[t] > self.rng.random() * fmax: 

253 c1 = self.pop[t] 

254 nnf = False 

255 nnf = True 

256 while nnf: 

257 t = self.rng.randint(len(self.pop)) 

258 if fit[t] > self.rng.random() * fmax: 

259 c2 = self.pop[t] 

260 nnf = False 

261 

262 c1id = c1.info['confid'] 

263 c2id = c2.info['confid'] 

264 used_before = (min([c1id, c2id]), max([c1id, c2id])) in self.pairs 

265 return (c1.copy(), c2.copy()) 

266 

267 def get_one_candidate(self, with_history=True): 

268 """Returns one candidate for mutation employing the 

269 fitness criteria from 

270 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816 

271 and the roulete wheel selection scheme described in 

272 R.L. Johnston Dalton Transactions, 

273 Vol. 22, No. 22. (2003), pp. 4193-4207 

274 """ 

275 if len(self.pop) < 1: 

276 self.update() 

277 

278 if len(self.pop) < 1: 

279 return None 

280 

281 fit = self.__get_fitness__(range(len(self.pop)), with_history) 

282 fmax = max(fit) 

283 nnf = True 

284 while nnf: 

285 t = self.rng.randint(len(self.pop)) 

286 if fit[t] > self.rng.random() * fmax: 

287 c1 = self.pop[t] 

288 nnf = False 

289 

290 return c1.copy() 

291 

292 def _write_log(self): 

293 """Writes the population to a logfile. 

294 

295 The format is:: 

296 

297 timestamp: generation(if available): id1,id2,id3...""" 

298 if self.logfile is not None: 

299 ids = [str(a.info['relax_id']) for a in self.pop] 

300 if ids != []: 

301 try: 

302 gen_nums = [c.info['key_value_pairs']['generation'] 

303 for c in self.all_cand] 

304 max_gen = max(gen_nums) 

305 except KeyError: 

306 max_gen = ' ' 

307 fd = open(self.logfile, 'a') 

308 fd.write('{time}: {gen}: {pop}\n'.format(time=now(), 

309 pop=','.join(ids), 

310 gen=max_gen)) 

311 fd.close() 

312 

313 def is_uniform(self, func, min_std, pop=None): 

314 """Tests whether the current population is uniform or diverse. 

315 Returns True if uniform, False otherwise. 

316 

317 Parameters: 

318 

319 func: function 

320 that takes one argument an atoms object and returns a value that 

321 will be used for testing against the rest of the population. 

322 

323 min_std: int or float 

324 The minimum standard deviation, if the population has a lower 

325 std dev it is uniform. 

326 

327 pop: list, optional 

328 use this list of Atoms objects instead of the current population. 

329 """ 

330 if pop is None: 

331 pop = self.pop 

332 vals = [func(a) for a in pop] 

333 stddev = np.std(vals) 

334 if stddev < min_std: 

335 return True 

336 return False 

337 

338 def mass_extinction(self, ids): 

339 """Kills every candidate in the database with gaid in the 

340 supplied list of ids. Typically used on the main part of the current 

341 population if the diversity is to small. 

342 

343 Parameters: 

344 

345 ids: list 

346 list of ids of candidates to be killed. 

347 

348 """ 

349 for confid in ids: 

350 self.dc.kill_candidate(confid) 

351 self.pop = [] 

352 

353 

354class RandomPopulation(Population): 

355 def __init__(self, data_connection, population_size, 

356 comparator=None, logfile=None, exclude_used_pairs=False, 

357 bad_candidates=0, use_extinct=False): 

358 self.exclude_used_pairs = exclude_used_pairs 

359 self.bad_candidates = bad_candidates 

360 Population.__init__(self, data_connection, population_size, 

361 comparator, logfile, use_extinct) 

362 

363 def __initialize_pop__(self): 

364 """ Private method that initializes the population when 

365 the population is created. """ 

366 

367 # Get all relaxed candidates from the database 

368 ue = self.use_extinct 

369 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

370 all_cand.sort(key=get_raw_score, reverse=True) 

371 # all_cand.sort(key=lambda x: x.get_potential_energy()) 

372 

373 if len(all_cand) > 0: 

374 # Fill up the population with the self.pop_size most stable 

375 # unique candidates. 

376 ratings = [] 

377 best_raw = get_raw_score(all_cand[0]) 

378 i = 0 

379 while i < len(all_cand): 

380 c = all_cand[i] 

381 i += 1 

382 eq = False 

383 for a in self.pop: 

384 if self.comparator.looks_like(a, c): 

385 eq = True 

386 break 

387 if not eq: 

388 if len(self.pop) < self.pop_size - self.bad_candidates: 

389 self.pop.append(c) 

390 else: 

391 exp_fact = exp(get_raw_score(c) / best_raw) 

392 ratings.append([c, (exp_fact - 1) * self.rng.random()]) 

393 ratings.sort(key=itemgetter(1), reverse=True) 

394 

395 for i in range(self.bad_candidates): 

396 self.pop.append(ratings[i][0]) 

397 

398 for a in self.pop: 

399 a.info['looks_like'] = count_looks_like(a, all_cand, 

400 self.comparator) 

401 

402 self.all_cand = all_cand 

403 self.__calc_participation__() 

404 

405 def update(self): 

406 """ The update method in Population will add to the end of 

407 the population, that can't be used here since we might have 

408 bad candidates that need to stay in the population, therefore 

409 just recalc the population every time. """ 

410 

411 self.pop = [] 

412 self.__initialize_pop__() 

413 

414 self._write_log() 

415 

416 def get_one_candidate(self): 

417 """Returns one candidates at random.""" 

418 if len(self.pop) < 1: 

419 self.update() 

420 

421 if len(self.pop) < 1: 

422 return None 

423 

424 t = self.rng.randint(len(self.pop)) 

425 c = self.pop[t] 

426 

427 return c.copy() 

428 

429 def get_two_candidates(self): 

430 """Returns two candidates at random.""" 

431 if len(self.pop) < 2: 

432 self.update() 

433 

434 if len(self.pop) < 2: 

435 return None 

436 

437 c1 = self.pop[0] 

438 c2 = self.pop[0] 

439 used_before = False 

440 while c1.info['confid'] == c2.info['confid'] and not used_before: 

441 t = self.rng.randint(len(self.pop)) 

442 c1 = self.pop[t] 

443 t = self.rng.randint(len(self.pop)) 

444 c2 = self.pop[t] 

445 

446 c1id = c1.info['confid'] 

447 c2id = c2.info['confid'] 

448 used_before = (tuple(sorted([c1id, c2id])) in self.pairs and 

449 self.exclude_used_pairs) 

450 return (c1.copy(), c2.copy()) 

451 

452 

453class FitnessSharingPopulation(Population): 

454 """ Fitness sharing population that penalizes structures if they are 

455 too similar. This is determined by a distance measure 

456 

457 Parameters: 

458 

459 comp_key: string 

460 Key where the distance measure can be found in the 

461 atoms.info['key_value_pairs'] dictionary. 

462 

463 threshold: float or int 

464 Value above which no penalization of the fitness takes place 

465 

466 alpha_sh: float or int 

467 Determines the shape of the sharing function. 

468 Default is 1, which gives a linear sharing function. 

469 

470 """ 

471 

472 def __init__(self, data_connection, population_size, 

473 comp_key, threshold, alpha_sh=1., 

474 comparator=None, logfile=None, use_extinct=False): 

475 self.comp_key = comp_key 

476 self.dt = threshold # dissimilarity threshold 

477 self.alpha_sh = alpha_sh 

478 self.fit_scaling = 1. 

479 

480 self.sh_cache = {} 

481 

482 Population.__init__(self, data_connection, population_size, 

483 comparator, logfile, use_extinct) 

484 

485 def __get_fitness__(self, candidates): 

486 """Input should be sorted according to raw_score.""" 

487 max_s = get_raw_score(candidates[0]) 

488 min_s = get_raw_score(candidates[-1]) 

489 T = min_s - max_s 

490 

491 shared_fit = [] 

492 for c in candidates: 

493 sc = get_raw_score(c) 

494 obj_fit = 0.5 * (1. - tanh(2. * (sc - max_s) / T - 1.)) 

495 m = 1. 

496 ck = c.info['key_value_pairs'][self.comp_key] 

497 for other in candidates: 

498 if other != c: 

499 name = tuple(sorted([c.info['confid'], 

500 other.info['confid']])) 

501 if name not in self.sh_cache: 

502 ok = other.info['key_value_pairs'][self.comp_key] 

503 d = abs(ck - ok) 

504 if d < self.dt: 

505 v = 1 - (d / self.dt)**self.alpha_sh 

506 self.sh_cache[name] = v 

507 else: 

508 self.sh_cache[name] = 0 

509 m += self.sh_cache[name] 

510 

511 shf = (obj_fit ** self.fit_scaling) / m 

512 shared_fit.append(shf) 

513 return shared_fit 

514 

515 def update(self): 

516 """ The update method in Population will add to the end of 

517 the population, that can't be used here since the shared fitness 

518 will change for all candidates when new are added, therefore 

519 just recalc the population every time. """ 

520 

521 self.pop = [] 

522 self.__initialize_pop__() 

523 

524 self._write_log() 

525 

526 def __initialize_pop__(self): 

527 # Get all relaxed candidates from the database 

528 ue = self.use_extinct 

529 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

530 all_cand.sort(key=get_raw_score, reverse=True) 

531 

532 if len(all_cand) > 0: 

533 shared_fit = self.__get_fitness__(all_cand) 

534 all_sorted = list(zip(*sorted(zip(shared_fit, all_cand), 

535 reverse=True)))[1] 

536 

537 # Fill up the population with the self.pop_size most stable 

538 # unique candidates. 

539 i = 0 

540 while i < len(all_sorted) and len(self.pop) < self.pop_size: 

541 c = all_sorted[i] 

542 i += 1 

543 eq = False 

544 for a in self.pop: 

545 if self.comparator.looks_like(a, c): 

546 eq = True 

547 break 

548 if not eq: 

549 self.pop.append(c) 

550 

551 for a in self.pop: 

552 a.info['looks_like'] = count_looks_like(a, all_cand, 

553 self.comparator) 

554 self.all_cand = all_cand 

555 

556 def get_two_candidates(self): 

557 """ Returns two candidates for pairing employing the 

558 fitness criteria from 

559 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816 

560 and the roulete wheel selection scheme described in 

561 R.L. Johnston Dalton Transactions, 

562 Vol. 22, No. 22. (2003), pp. 4193-4207 

563 """ 

564 

565 if len(self.pop) < 2: 

566 self.update() 

567 

568 if len(self.pop) < 2: 

569 return None 

570 

571 fit = self.__get_fitness__(self.pop) 

572 fmax = max(fit) 

573 c1 = self.pop[0] 

574 c2 = self.pop[0] 

575 while c1.info['confid'] == c2.info['confid']: 

576 nnf = True 

577 while nnf: 

578 t = self.rng.randint(len(self.pop)) 

579 if fit[t] > self.rng.random() * fmax: 

580 c1 = self.pop[t] 

581 nnf = False 

582 nnf = True 

583 while nnf: 

584 t = self.rng.randint(len(self.pop)) 

585 if fit[t] > self.rng.random() * fmax: 

586 c2 = self.pop[t] 

587 nnf = False 

588 

589 return (c1.copy(), c2.copy()) 

590 

591 

592class RankFitnessPopulation(Population): 

593 """ Ranks the fitness relative to set variable to flatten the surface 

594 in a certain direction such that mating across variable is equally 

595 likely irrespective of raw_score. 

596 

597 Parameters: 

598 

599 variable_function: function 

600 A function that takes as input an Atoms object and returns 

601 the variable that differentiates the ranks. 

602 

603 exp_function: boolean 

604 If True use an exponential function for ranking the fitness. 

605 If False use the same as in Population. Default True. 

606 

607 exp_prefactor: float 

608 The prefactor used in the exponential fitness scaling function. 

609 Default 0.5 

610 """ 

611 

612 def __init__(self, data_connection, population_size, variable_function, 

613 comparator=None, logfile=None, use_extinct=False, 

614 exp_function=True, exp_prefactor=0.5): 

615 self.exp_function = exp_function 

616 self.exp_prefactor = exp_prefactor 

617 self.vf = variable_function 

618 # The current fitness is set at each update of the population 

619 self.current_fitness = None 

620 

621 Population.__init__(self, data_connection, population_size, 

622 comparator, logfile, use_extinct) 

623 

624 def get_rank(self, rcand, key=None): 

625 # Set the initial order of the candidates, will need to 

626 # be returned in this order at the end of ranking. 

627 ordered = list(zip(range(len(rcand)), rcand)) 

628 

629 # Niche and rank candidates. 

630 rec_nic = [] 

631 rank_fit = [] 

632 for o, c in ordered: 

633 if o not in rec_nic: 

634 ntr = [] 

635 ce1 = self.vf(c) 

636 rec_nic.append(o) 

637 ntr.append([o, c]) 

638 for oother, cother in ordered: 

639 if oother not in rec_nic: 

640 ce2 = self.vf(cother) 

641 if ce1 == ce2: 

642 # put the now processed in oother 

643 # in rec_nic as well 

644 rec_nic.append(oother) 

645 ntr.append([oother, cother]) 

646 # Each niche is sorted according to raw_score and 

647 # assigned a fitness according to the ranking of 

648 # the candidates 

649 ntr.sort(key=lambda x: x[1].info['key_value_pairs'][key], 

650 reverse=True) 

651 start_rank = -1 

652 cor = 0 

653 for on, cn in ntr: 

654 rank = start_rank - cor 

655 rank_fit.append([on, cn, rank]) 

656 cor += 1 

657 # The original order is reformed 

658 rank_fit.sort(key=itemgetter(0), reverse=False) 

659 return np.array(list(zip(*rank_fit))[2]) 

660 

661 def __get_fitness__(self, candidates): 

662 expf = self.exp_function 

663 rfit = self.get_rank(candidates, key='raw_score') 

664 

665 if not expf: 

666 rmax = max(rfit) 

667 rmin = min(rfit) 

668 T = rmin - rmax 

669 # If using obj_rank probability, must have non-zero T val. 

670 # pop_size must be greater than number of permutations. 

671 # We test for this here 

672 msg = "Equal fitness for best and worst candidate in the " 

673 msg += "population! Fitness scaling is impossible! " 

674 msg += "Try with a larger population." 

675 assert T != 0., msg 

676 return 0.5 * (1. - np.tanh(2. * (rfit - rmax) / T - 1.)) 

677 else: 

678 return self.exp_prefactor ** (-rfit - 1) 

679 

680 def update(self): 

681 """ The update method in Population will add to the end of 

682 the population, that can't be used here since the fitness 

683 will potentially change for all candidates when new are added, 

684 therefore just recalc the population every time. """ 

685 

686 self.pop = [] 

687 self.__initialize_pop__() 

688 self.current_fitness = self.__get_fitness__(self.pop) 

689 

690 self._write_log() 

691 

692 def __initialize_pop__(self): 

693 # Get all relaxed candidates from the database 

694 ue = self.use_extinct 

695 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

696 all_cand.sort(key=get_raw_score, reverse=True) 

697 

698 if len(all_cand) > 0: 

699 fitf = self.__get_fitness__(all_cand) 

700 all_sorted = list(zip(fitf, all_cand)) 

701 all_sorted.sort(key=itemgetter(0), reverse=True) 

702 sort_cand = [] 

703 for _, t2 in all_sorted: 

704 sort_cand.append(t2) 

705 all_sorted = sort_cand 

706 

707 # Fill up the population with the self.pop_size most stable 

708 # unique candidates. 

709 i = 0 

710 while i < len(all_sorted) and len(self.pop) < self.pop_size: 

711 c = all_sorted[i] 

712 c_vf = self.vf(c) 

713 i += 1 

714 eq = False 

715 for a in self.pop: 

716 a_vf = self.vf(a) 

717 # Only run comparator if the variable_function (self.vf) 

718 # returns the same. If it returns something different the 

719 # candidates are inherently different. 

720 # This is done to speed up. 

721 if a_vf == c_vf: 

722 if self.comparator.looks_like(a, c): 

723 eq = True 

724 break 

725 if not eq: 

726 self.pop.append(c) 

727 self.all_cand = all_cand 

728 

729 def get_two_candidates(self): 

730 """ Returns two candidates for pairing employing the 

731 roulete wheel selection scheme described in 

732 R.L. Johnston Dalton Transactions, 

733 Vol. 22, No. 22. (2003), pp. 4193-4207 

734 """ 

735 

736 if len(self.pop) < 2: 

737 self.update() 

738 

739 if len(self.pop) < 2: 

740 return None 

741 

742 # Use saved fitness 

743 fit = self.current_fitness 

744 fmax = max(fit) 

745 c1 = self.pop[0] 

746 c2 = self.pop[0] 

747 while c1.info['confid'] == c2.info['confid']: 

748 nnf = True 

749 while nnf: 

750 t = self.rng.randint(len(self.pop)) 

751 if fit[t] > self.rng.random() * fmax: 

752 c1 = self.pop[t] 

753 nnf = False 

754 nnf = True 

755 while nnf: 

756 t = self.rng.randint(len(self.pop)) 

757 if fit[t] > self.rng.random() * fmax: 

758 c2 = self.pop[t] 

759 nnf = False 

760 

761 return (c1.copy(), c2.copy()) 

762 

763 

764class MultiObjectivePopulation(RankFitnessPopulation): 

765 """ Allows for assignment of fitness based on a set of two variables 

766 such that fitness is ranked according to a Pareto-front of 

767 non-dominated candidates. 

768 

769 Parameters 

770 ---------- 

771 abs_data: list 

772 Set of key_value_pairs in atoms object for which fitness should 

773 be assigned based on absolute value. 

774 

775 rank_data: list 

776 Set of key_value_pairs in atoms object for which data should 

777 be ranked in order to ascribe fitness. 

778 

779 variable_function: function 

780 A function that takes as input an Atoms object and returns 

781 the variable that differentiates the ranks. Only use if 

782 data is ranked. 

783 

784 exp_function: boolean 

785 If True use an exponential function for ranking the fitness. 

786 If False use the same as in Population. Default True. 

787 

788 exp_prefactor: float 

789 The prefactor used in the exponential fitness scaling function. 

790 Default 0.5 

791 

792 """ 

793 

794 def __init__(self, data_connection, population_size, 

795 variable_function=None, comparator=None, logfile=None, 

796 use_extinct=False, abs_data=None, rank_data=None, 

797 exp_function=True, exp_prefactor=0.5): 

798 # The current fitness is set at each update of the population 

799 self.current_fitness = None 

800 

801 if rank_data is None: 

802 rank_data = [] 

803 self.rank_data = rank_data 

804 

805 if abs_data is None: 

806 abs_data = [] 

807 self.abs_data = abs_data 

808 

809 RankFitnessPopulation.__init__(self, data_connection, population_size, 

810 variable_function, comparator, logfile, 

811 use_extinct, exp_function, 

812 exp_prefactor) 

813 

814 def get_nonrank(self, nrcand, key=None): 

815 """"Returns a list of fitness values.""" 

816 nrc_list = [] 

817 for nrc in nrcand: 

818 nrc_list.append(nrc.info['key_value_pairs'][key]) 

819 return nrc_list 

820 

821 def __get_fitness__(self, candidates): 

822 # There are no defaults set for the datasets to be 

823 # used in this function, as such we test that the 

824 # user has specified at least two here. 

825 msg = "This is a multi-objective fitness function" 

826 msg += " so there must be at least two datasets" 

827 msg += " stated in the rank_data and abs_data variables" 

828 assert len(self.rank_data) + len(self.abs_data) >= 2, msg 

829 

830 expf = self.exp_function 

831 

832 all_fitnesses = [] 

833 used = set() 

834 for rd in self.rank_data: 

835 used.add(rd) 

836 # Build ranked fitness based on rd 

837 all_fitnesses.append(self.get_rank(candidates, key=rd)) 

838 

839 for d in self.abs_data: 

840 if d not in used: 

841 used.add(d) 

842 # Build fitness based on d 

843 all_fitnesses.append(self.get_nonrank(candidates, key=d)) 

844 

845 # Set the initial order of the ranks, will need to 

846 # be returned in this order at the end. 

847 fordered = list(zip(range(len(all_fitnesses[0])), *all_fitnesses)) 

848 mvf_rank = -1 # Start multi variable rank at -1. 

849 rec_vrc = [] # A record of already ranked candidates. 

850 mvf_list = [] # A list for all candidate ranks. 

851 # Sort by raw_score_1 in case this is different from 

852 # the stored raw_score() variable that all_cands are 

853 # sorted by. 

854 fordered.sort(key=itemgetter(1), reverse=True) 

855 # Niche candidates with equal or better raw_score to 

856 # current candidate. 

857 for a in fordered: 

858 order, rest = a[0], a[1:] 

859 if order not in rec_vrc: 

860 pff = [] 

861 pff2 = [] 

862 rec_vrc.append(order) 

863 pff.append((order, rest)) 

864 for b in fordered: 

865 border, brest = b[0], b[1:] 

866 if border not in rec_vrc: 

867 if np.any(np.array(brest) >= np.array(rest)): 

868 pff.append((border, brest)) 

869 # Remove any candidate from pff list that is dominated 

870 # by another in the list. 

871 for na in pff: 

872 norder, nrest = na[0], na[1:] 

873 dom = False 

874 for nb in pff: 

875 nborder, nbrest = nb[0], nb[1:] 

876 if norder != nborder: 

877 if np.all(np.array(nbrest) > np.array(nrest)): 

878 dom = True 

879 if not dom: 

880 pff2.append((norder, nrest)) 

881 # Assign pareto rank from -1 to -N niches. 

882 for ffa in pff2: 

883 fforder, ffrest = ffa[0], ffa[1:] 

884 rec_vrc.append(fforder) 

885 mvf_list.append((fforder, mvf_rank, ffrest)) 

886 mvf_rank = mvf_rank - 1 

887 # The original order is reformed 

888 mvf_list.sort(key=itemgetter(0), reverse=False) 

889 rfro = np.array(list(zip(*mvf_list))[1]) 

890 

891 if not expf: 

892 rmax = max(rfro) 

893 rmin = min(rfro) 

894 T = rmin - rmax 

895 # If using obj_rank probability, must have non-zero T val. 

896 # pop_size must be greater than number of permutations. 

897 # We test for this here 

898 msg = "Equal fitness for best and worst candidate in the " 

899 msg += "population! Fitness scaling is impossible! " 

900 msg += "Try with a larger population." 

901 assert T != 0., msg 

902 return 0.5 * (1. - np.tanh(2. * (rfro - rmax) / T - 1.)) 

903 else: 

904 return self.exp_prefactor ** (-rfro - 1) 

905 

906 def __initialize_pop__(self): 

907 # Get all relaxed candidates from the database 

908 ue = self.use_extinct 

909 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue) 

910 all_cand.sort(key=get_raw_score, reverse=True) 

911 

912 if len(all_cand) > 0: 

913 fitf = self.__get_fitness__(all_cand) 

914 all_sorted = list(zip(fitf, all_cand)) 

915 all_sorted.sort(key=itemgetter(0), reverse=True) 

916 sort_cand = [] 

917 for _, t2 in all_sorted: 

918 sort_cand.append(t2) 

919 all_sorted = sort_cand 

920 

921 # Fill up the population with the self.pop_size most stable 

922 # unique candidates. 

923 i = 0 

924 while i < len(all_sorted) and len(self.pop) < self.pop_size: 

925 c = all_sorted[i] 

926 # Use variable_function to decide whether to run comparator 

927 # if the function has been defined by the user. This does not 

928 # need to be dependent on using the rank_data function. 

929 if self.vf is not None: 

930 c_vf = self.vf(c) 

931 i += 1 

932 eq = False 

933 for a in self.pop: 

934 if self.vf is not None: 

935 a_vf = self.vf(a) 

936 # Only run comparator if the variable_function 

937 # (self.vf) returns the same. If it returns something 

938 # different the candidates are inherently different. 

939 # This is done to speed up. 

940 if a_vf == c_vf: 

941 if self.comparator.looks_like(a, c): 

942 eq = True 

943 break 

944 else: 

945 if self.comparator.looks_like(a, c): 

946 eq = True 

947 break 

948 if not eq: 

949 self.pop.append(c) 

950 self.all_cand = all_cand