Coverage for /builds/kinetik161/ase/ase/ga/population.py: 26.18%

1""" Implementation of a population for maintaining a GA population and

2proposing structures to pair. """

3from math import exp, sqrt, tanh

4from operator import itemgetter

6import numpy as np

8from ase.db.core import now

9from ase.ga import get_raw_score

12def count_looks_like(a, all_cand, comp):

13 """Utility method for counting occurrences."""

14 n = 0

15 for b in all_cand:

16 if a.info['confid'] == b.info['confid']:

17 continue

18 if comp.looks_like(a, b):

19 n += 1

20 return n

23class Population:

24 """Population class which maintains the current population

25 and proposes which candidates to pair together.

27 Parameters:

29 data_connection: DataConnection object

30 Bla bla bla.

32 population_size: int

33 The number of candidates in the population.

35 comparator: Comparator object

36 this will tell if two configurations are equal.

37 Default compare atoms objects directly.

39 logfile: str

40 Text file that contains information about the population

41 The format is::

43 timestamp: generation(if available): id1,id2,id3...

45 Using this file greatly speeds up convergence checks.

46 Default None meaning that no file is written.

48 use_extinct: boolean

49 Set this to True if mass extinction and the extinct key

50 are going to be used. Default is False.

52 rng: Random number generator

53 By default numpy.random.

54 """

56 def __init__(self, data_connection, population_size,

57 comparator=None, logfile=None, use_extinct=False,

58 rng=np.random):

59 self.dc = data_connection

60 self.pop_size = population_size

61 if comparator is None:

62 from ase.ga.standard_comparators import AtomsComparator

63 comparator = AtomsComparator()

64 self.comparator = comparator

65 self.logfile = logfile

66 self.use_extinct = use_extinct

67 self.rng = rng

68 self.pop = []

69 self.pairs = None

70 self.all_cand = None

71 self.__initialize_pop__()

73 def __initialize_pop__(self):

74 """ Private method that initializes the population when

75 the population is created. """

77 # Get all relaxed candidates from the database

78 ue = self.use_extinct

79 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)

80 all_cand.sort(key=lambda x: x.info['key_value_pairs']['raw_score'],

81 reverse=True)

82 # all_cand.sort(key=lambda x: x.get_potential_energy())

84 # Fill up the population with the self.pop_size most stable

85 # unique candidates.

86 i = 0

87 while i < len(all_cand) and len(self.pop) < self.pop_size:

88 c = all_cand[i]

89 i += 1

90 eq = False

91 for a in self.pop:

92 if self.comparator.looks_like(a, c):

93 eq = True

94 break

95 if not eq:

96 self.pop.append(c)

98 for a in self.pop:

99 a.info['looks_like'] = count_looks_like(a, all_cand,

100 self.comparator)

101

102 self.all_cand = all_cand

103 self.__calc_participation__()

104

105 def __calc_participation__(self):

106 """ Determines, from the database, how many times each

107 candidate has been used to generate new candidates. """

108 (participation, pairs) = self.dc.get_participation_in_pairing()

109 for a in self.pop:

110 if a.info['confid'] in participation.keys():

111 a.info['n_paired'] = participation[a.info['confid']]

112 else:

113 a.info['n_paired'] = 0

114 self.pairs = pairs

115

116 def update(self, new_cand=None):

117 """ New candidates can be added to the database

118 after the population object has been created.

119 This method extracts these new candidates from the

120 database and includes them in the population. """

121

122 if len(self.pop) == 0:

123 self.__initialize_pop__()

124

125 if new_cand is None:

126 ue = self.use_extinct

127 new_cand = self.dc.get_all_relaxed_candidates(only_new=True,

128 use_extinct=ue)

129

130 for a in new_cand:

131 self.__add_candidate__(a)

132 self.all_cand.append(a)

133 self.__calc_participation__()

134 self._write_log()

135

136 def get_current_population(self):

137 """ Returns a copy of the current population. """

138 self.update()

139 return [a.copy() for a in self.pop]

140

141 def get_population_after_generation(self, gen):

142 """ Returns a copy of the population as it where

143 after generation gen"""

144 if self.logfile is not None:

145 fd = open(self.logfile)

146 gens = {}

147 for line in fd:

148 _, no, popul = line.split(':')

149 gens[int(no)] = [int(i) for i in popul.split(',')]

150 fd.close()

151 return [c.copy() for c in self.all_cand[::-1]

152 if c.info['relax_id'] in gens[gen]]

153

154 all_candidates = [c for c in self.all_cand

155 if c.info['key_value_pairs']['generation'] <= gen]

156 cands = [all_candidates[0]]

157 for b in all_candidates:

158 if b not in cands:

159 for a in cands:

160 if self.comparator.looks_like(a, b):

161 break

162 else:

163 cands.append(b)

164 pop = cands[:self.pop_size]

165 return [a.copy() for a in pop]

166

167 def __add_candidate__(self, a):

168 """ Adds a single candidate to the population. """

169

170 # check if the structure is too low in raw score

171 raw_score_a = get_raw_score(a)

172 raw_score_worst = get_raw_score(self.pop[-1])

173 if raw_score_a < raw_score_worst \

174 and len(self.pop) == self.pop_size:

175 return

176

177 # check if the new candidate should

178 # replace a similar structure in the population

179 for (i, b) in enumerate(self.pop):

180 if self.comparator.looks_like(a, b):

181 if get_raw_score(b) < raw_score_a:

182 del self.pop[i]

183 a.info['looks_like'] = count_looks_like(a,

184 self.all_cand,

185 self.comparator)

186 self.pop.append(a)

187 self.pop.sort(key=get_raw_score,

188 reverse=True)

189 return

190

191 # the new candidate needs to be added, so remove the highest

192 # energy one

193 if len(self.pop) == self.pop_size:

194 del self.pop[-1]

195

196 # add the new candidate

197 a.info['looks_like'] = count_looks_like(a,

198 self.all_cand,

199 self.comparator)

200 self.pop.append(a)

201 self.pop.sort(key=get_raw_score, reverse=True)

202

203 def __get_fitness__(self, indecies, with_history=True):

204 """Calculates the fitness using the formula from

205 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816

206

207 Sign change on the fitness compared to the formulation in the

208 abovementioned paper due to maximizing raw_score instead of

209 minimizing energy. (Set raw_score=-energy to optimize the energy)

210 """

211

212 scores = [get_raw_score(x) for x in self.pop]

213 min_s = min(scores)

214 max_s = max(scores)

215 T = min_s - max_s

216 if isinstance(indecies, int):

217 indecies = [indecies]

218

219 f = [0.5 * (1. - tanh(2. * (scores[i] - max_s) / T - 1.))

220 for i in indecies]

221 if with_history:

222 M = [float(self.pop[i].info['n_paired']) for i in indecies]

223 L = [float(self.pop[i].info['looks_like']) for i in indecies]

224 f = [f[i] * 1. / sqrt(1. + M[i]) * 1. / sqrt(1. + L[i])

225 for i in range(len(f))]

226 return f

227

228 def get_two_candidates(self, with_history=True):

229 """ Returns two candidates for pairing employing the

230 fitness criteria from

231 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816

232 and the roulete wheel selection scheme described in

233 R.L. Johnston Dalton Transactions,

234 Vol. 22, No. 22. (2003), pp. 4193-4207

235 """

236

237 if len(self.pop) < 2:

238 self.update()

239

240 if len(self.pop) < 2:

241 return None

242

243 fit = self.__get_fitness__(range(len(self.pop)), with_history)

244 fmax = max(fit)

245 c1 = self.pop[0]

246 c2 = self.pop[0]

247 used_before = False

248 while c1.info['confid'] == c2.info['confid'] and not used_before:

249 nnf = True

250 while nnf:

251 t = self.rng.randint(len(self.pop))

252 if fit[t] > self.rng.random() * fmax:

253 c1 = self.pop[t]

254 nnf = False

255 nnf = True

256 while nnf:

257 t = self.rng.randint(len(self.pop))

258 if fit[t] > self.rng.random() * fmax:

259 c2 = self.pop[t]

260 nnf = False

261

262 c1id = c1.info['confid']

263 c2id = c2.info['confid']

264 used_before = (min([c1id, c2id]), max([c1id, c2id])) in self.pairs

265 return (c1.copy(), c2.copy())

266

267 def get_one_candidate(self, with_history=True):

268 """Returns one candidate for mutation employing the

269 fitness criteria from

270 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816

271 and the roulete wheel selection scheme described in

272 R.L. Johnston Dalton Transactions,

273 Vol. 22, No. 22. (2003), pp. 4193-4207

274 """

275 if len(self.pop) < 1:

276 self.update()

277

278 if len(self.pop) < 1:

279 return None

280

281 fit = self.__get_fitness__(range(len(self.pop)), with_history)

282 fmax = max(fit)

283 nnf = True

284 while nnf:

285 t = self.rng.randint(len(self.pop))

286 if fit[t] > self.rng.random() * fmax:

287 c1 = self.pop[t]

288 nnf = False

289

290 return c1.copy()

291

292 def _write_log(self):

293 """Writes the population to a logfile.

294

295 The format is::

296

297 timestamp: generation(if available): id1,id2,id3..."""

298 if self.logfile is not None:

299 ids = [str(a.info['relax_id']) for a in self.pop]

300 if ids != []:

301 try:

302 gen_nums = [c.info['key_value_pairs']['generation']

303 for c in self.all_cand]

304 max_gen = max(gen_nums)

305 except KeyError:

306 max_gen = ' '

307 fd = open(self.logfile, 'a')

308 fd.write('{time}: {gen}: {pop}\n'.format(time=now(),

309 pop=','.join(ids),

310 gen=max_gen))

311 fd.close()

312

313 def is_uniform(self, func, min_std, pop=None):

314 """Tests whether the current population is uniform or diverse.

315 Returns True if uniform, False otherwise.

316

317 Parameters:

318

319 func: function

320 that takes one argument an atoms object and returns a value that

321 will be used for testing against the rest of the population.

322

323 min_std: int or float

324 The minimum standard deviation, if the population has a lower

325 std dev it is uniform.

326

327 pop: list, optional

328 use this list of Atoms objects instead of the current population.

329 """

330 if pop is None:

331 pop = self.pop

332 vals = [func(a) for a in pop]

333 stddev = np.std(vals)

334 if stddev < min_std:

335 return True

336 return False

337

338 def mass_extinction(self, ids):

339 """Kills every candidate in the database with gaid in the

340 supplied list of ids. Typically used on the main part of the current

341 population if the diversity is to small.

342

343 Parameters:

344

345 ids: list

346 list of ids of candidates to be killed.

347

348 """

349 for confid in ids:

350 self.dc.kill_candidate(confid)

351 self.pop = []

352

353

354class RandomPopulation(Population):

355 def __init__(self, data_connection, population_size,

356 comparator=None, logfile=None, exclude_used_pairs=False,

357 bad_candidates=0, use_extinct=False):

358 self.exclude_used_pairs = exclude_used_pairs

359 self.bad_candidates = bad_candidates

360 Population.__init__(self, data_connection, population_size,

361 comparator, logfile, use_extinct)

362

363 def __initialize_pop__(self):

364 """ Private method that initializes the population when

365 the population is created. """

366

367 # Get all relaxed candidates from the database

368 ue = self.use_extinct

369 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)

370 all_cand.sort(key=get_raw_score, reverse=True)

371 # all_cand.sort(key=lambda x: x.get_potential_energy())

372

373 if len(all_cand) > 0:

374 # Fill up the population with the self.pop_size most stable

375 # unique candidates.

376 ratings = []

377 best_raw = get_raw_score(all_cand[0])

378 i = 0

379 while i < len(all_cand):

380 c = all_cand[i]

381 i += 1

382 eq = False

383 for a in self.pop:

384 if self.comparator.looks_like(a, c):

385 eq = True

386 break

387 if not eq:

388 if len(self.pop) < self.pop_size - self.bad_candidates:

389 self.pop.append(c)

390 else:

391 exp_fact = exp(get_raw_score(c) / best_raw)

392 ratings.append([c, (exp_fact - 1) * self.rng.random()])

393 ratings.sort(key=itemgetter(1), reverse=True)

394

395 for i in range(self.bad_candidates):

396 self.pop.append(ratings[i][0])

397

398 for a in self.pop:

399 a.info['looks_like'] = count_looks_like(a, all_cand,

400 self.comparator)

401

402 self.all_cand = all_cand

403 self.__calc_participation__()

404

405 def update(self):

406 """ The update method in Population will add to the end of

407 the population, that can't be used here since we might have

408 bad candidates that need to stay in the population, therefore

409 just recalc the population every time. """

410

411 self.pop = []

412 self.__initialize_pop__()

413

414 self._write_log()

415

416 def get_one_candidate(self):

417 """Returns one candidates at random."""

418 if len(self.pop) < 1:

419 self.update()

420

421 if len(self.pop) < 1:

422 return None

423

424 t = self.rng.randint(len(self.pop))

425 c = self.pop[t]

426

427 return c.copy()

428

429 def get_two_candidates(self):

430 """Returns two candidates at random."""

431 if len(self.pop) < 2:

432 self.update()

433

434 if len(self.pop) < 2:

435 return None

436

437 c1 = self.pop[0]

438 c2 = self.pop[0]

439 used_before = False

440 while c1.info['confid'] == c2.info['confid'] and not used_before:

441 t = self.rng.randint(len(self.pop))

442 c1 = self.pop[t]

443 t = self.rng.randint(len(self.pop))

444 c2 = self.pop[t]

445

446 c1id = c1.info['confid']

447 c2id = c2.info['confid']

448 used_before = (tuple(sorted([c1id, c2id])) in self.pairs and

449 self.exclude_used_pairs)

450 return (c1.copy(), c2.copy())

451

452

453class FitnessSharingPopulation(Population):

454 """ Fitness sharing population that penalizes structures if they are

455 too similar. This is determined by a distance measure

456

457 Parameters:

458

459 comp_key: string

460 Key where the distance measure can be found in the

461 atoms.info['key_value_pairs'] dictionary.

462

463 threshold: float or int

464 Value above which no penalization of the fitness takes place

465

466 alpha_sh: float or int

467 Determines the shape of the sharing function.

468 Default is 1, which gives a linear sharing function.

469

470 """

471

472 def __init__(self, data_connection, population_size,

473 comp_key, threshold, alpha_sh=1.,

474 comparator=None, logfile=None, use_extinct=False):

475 self.comp_key = comp_key

476 self.dt = threshold # dissimilarity threshold

477 self.alpha_sh = alpha_sh

478 self.fit_scaling = 1.

479

480 self.sh_cache = {}

481

482 Population.__init__(self, data_connection, population_size,

483 comparator, logfile, use_extinct)

484

485 def __get_fitness__(self, candidates):

486 """Input should be sorted according to raw_score."""

487 max_s = get_raw_score(candidates[0])

488 min_s = get_raw_score(candidates[-1])

489 T = min_s - max_s

490

491 shared_fit = []

492 for c in candidates:

493 sc = get_raw_score(c)

494 obj_fit = 0.5 * (1. - tanh(2. * (sc - max_s) / T - 1.))

495 m = 1.

496 ck = c.info['key_value_pairs'][self.comp_key]

497 for other in candidates:

498 if other != c:

499 name = tuple(sorted([c.info['confid'],

500 other.info['confid']]))

501 if name not in self.sh_cache:

502 ok = other.info['key_value_pairs'][self.comp_key]

503 d = abs(ck - ok)

504 if d < self.dt:

505 v = 1 - (d / self.dt)**self.alpha_sh

506 self.sh_cache[name] = v

507 else:

508 self.sh_cache[name] = 0

509 m += self.sh_cache[name]

510

511 shf = (obj_fit ** self.fit_scaling) / m

512 shared_fit.append(shf)

513 return shared_fit

514

515 def update(self):

516 """ The update method in Population will add to the end of

517 the population, that can't be used here since the shared fitness

518 will change for all candidates when new are added, therefore

519 just recalc the population every time. """

520

521 self.pop = []

522 self.__initialize_pop__()

523

524 self._write_log()

525

526 def __initialize_pop__(self):

527 # Get all relaxed candidates from the database

528 ue = self.use_extinct

529 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)

530 all_cand.sort(key=get_raw_score, reverse=True)

531

532 if len(all_cand) > 0:

533 shared_fit = self.__get_fitness__(all_cand)

534 all_sorted = list(zip(*sorted(zip(shared_fit, all_cand),

535 reverse=True)))[1]

536

537 # Fill up the population with the self.pop_size most stable

538 # unique candidates.

539 i = 0

540 while i < len(all_sorted) and len(self.pop) < self.pop_size:

541 c = all_sorted[i]

542 i += 1

543 eq = False

544 for a in self.pop:

545 if self.comparator.looks_like(a, c):

546 eq = True

547 break

548 if not eq:

549 self.pop.append(c)

550

551 for a in self.pop:

552 a.info['looks_like'] = count_looks_like(a, all_cand,

553 self.comparator)

554 self.all_cand = all_cand

555

556 def get_two_candidates(self):

557 """ Returns two candidates for pairing employing the

558 fitness criteria from

559 L.B. Vilhelmsen et al., JACS, 2012, 134 (30), pp 12807-12816

560 and the roulete wheel selection scheme described in

561 R.L. Johnston Dalton Transactions,

562 Vol. 22, No. 22. (2003), pp. 4193-4207

563 """

564

565 if len(self.pop) < 2:

566 self.update()

567

568 if len(self.pop) < 2:

569 return None

570

571 fit = self.__get_fitness__(self.pop)

572 fmax = max(fit)

573 c1 = self.pop[0]

574 c2 = self.pop[0]

575 while c1.info['confid'] == c2.info['confid']:

576 nnf = True

577 while nnf:

578 t = self.rng.randint(len(self.pop))

579 if fit[t] > self.rng.random() * fmax:

580 c1 = self.pop[t]

581 nnf = False

582 nnf = True

583 while nnf:

584 t = self.rng.randint(len(self.pop))

585 if fit[t] > self.rng.random() * fmax:

586 c2 = self.pop[t]

587 nnf = False

588

589 return (c1.copy(), c2.copy())

590

591

592class RankFitnessPopulation(Population):

593 """ Ranks the fitness relative to set variable to flatten the surface

594 in a certain direction such that mating across variable is equally

595 likely irrespective of raw_score.

596

597 Parameters:

598

599 variable_function: function

600 A function that takes as input an Atoms object and returns

601 the variable that differentiates the ranks.

602

603 exp_function: boolean

604 If True use an exponential function for ranking the fitness.

605 If False use the same as in Population. Default True.

606

607 exp_prefactor: float

608 The prefactor used in the exponential fitness scaling function.

609 Default 0.5

610 """

611

612 def __init__(self, data_connection, population_size, variable_function,

613 comparator=None, logfile=None, use_extinct=False,

614 exp_function=True, exp_prefactor=0.5):

615 self.exp_function = exp_function

616 self.exp_prefactor = exp_prefactor

617 self.vf = variable_function

618 # The current fitness is set at each update of the population

619 self.current_fitness = None

620

621 Population.__init__(self, data_connection, population_size,

622 comparator, logfile, use_extinct)

623

624 def get_rank(self, rcand, key=None):

625 # Set the initial order of the candidates, will need to

626 # be returned in this order at the end of ranking.

627 ordered = list(zip(range(len(rcand)), rcand))

628

629 # Niche and rank candidates.

630 rec_nic = []

631 rank_fit = []

632 for o, c in ordered:

633 if o not in rec_nic:

634 ntr = []

635 ce1 = self.vf(c)

636 rec_nic.append(o)

637 ntr.append([o, c])

638 for oother, cother in ordered:

639 if oother not in rec_nic:

640 ce2 = self.vf(cother)

641 if ce1 == ce2:

642 # put the now processed in oother

643 # in rec_nic as well

644 rec_nic.append(oother)

645 ntr.append([oother, cother])

646 # Each niche is sorted according to raw_score and

647 # assigned a fitness according to the ranking of

648 # the candidates

649 ntr.sort(key=lambda x: x[1].info['key_value_pairs'][key],

650 reverse=True)

651 start_rank = -1

652 cor = 0

653 for on, cn in ntr:

654 rank = start_rank - cor

655 rank_fit.append([on, cn, rank])

656 cor += 1

657 # The original order is reformed

658 rank_fit.sort(key=itemgetter(0), reverse=False)

659 return np.array(list(zip(*rank_fit))[2])

660

661 def __get_fitness__(self, candidates):

662 expf = self.exp_function

663 rfit = self.get_rank(candidates, key='raw_score')

664

665 if not expf:

666 rmax = max(rfit)

667 rmin = min(rfit)

668 T = rmin - rmax

669 # If using obj_rank probability, must have non-zero T val.

670 # pop_size must be greater than number of permutations.

671 # We test for this here

672 msg = "Equal fitness for best and worst candidate in the "

673 msg += "population! Fitness scaling is impossible! "

674 msg += "Try with a larger population."

675 assert T != 0., msg

676 return 0.5 * (1. - np.tanh(2. * (rfit - rmax) / T - 1.))

677 else:

678 return self.exp_prefactor ** (-rfit - 1)

679

680 def update(self):

681 """ The update method in Population will add to the end of

682 the population, that can't be used here since the fitness

683 will potentially change for all candidates when new are added,

684 therefore just recalc the population every time. """

685

686 self.pop = []

687 self.__initialize_pop__()

688 self.current_fitness = self.__get_fitness__(self.pop)

689

690 self._write_log()

691

692 def __initialize_pop__(self):

693 # Get all relaxed candidates from the database

694 ue = self.use_extinct

695 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)

696 all_cand.sort(key=get_raw_score, reverse=True)

697

698 if len(all_cand) > 0:

699 fitf = self.__get_fitness__(all_cand)

700 all_sorted = list(zip(fitf, all_cand))

701 all_sorted.sort(key=itemgetter(0), reverse=True)

702 sort_cand = []

703 for _, t2 in all_sorted:

704 sort_cand.append(t2)

705 all_sorted = sort_cand

706

707 # Fill up the population with the self.pop_size most stable

708 # unique candidates.

709 i = 0

710 while i < len(all_sorted) and len(self.pop) < self.pop_size:

711 c = all_sorted[i]

712 c_vf = self.vf(c)

713 i += 1

714 eq = False

715 for a in self.pop:

716 a_vf = self.vf(a)

717 # Only run comparator if the variable_function (self.vf)

718 # returns the same. If it returns something different the

719 # candidates are inherently different.

720 # This is done to speed up.

721 if a_vf == c_vf:

722 if self.comparator.looks_like(a, c):

723 eq = True

724 break

725 if not eq:

726 self.pop.append(c)

727 self.all_cand = all_cand

728

729 def get_two_candidates(self):

730 """ Returns two candidates for pairing employing the

731 roulete wheel selection scheme described in

732 R.L. Johnston Dalton Transactions,

733 Vol. 22, No. 22. (2003), pp. 4193-4207

734 """

735

736 if len(self.pop) < 2:

737 self.update()

738

739 if len(self.pop) < 2:

740 return None

741

742 # Use saved fitness

743 fit = self.current_fitness

744 fmax = max(fit)

745 c1 = self.pop[0]

746 c2 = self.pop[0]

747 while c1.info['confid'] == c2.info['confid']:

748 nnf = True

749 while nnf:

750 t = self.rng.randint(len(self.pop))

751 if fit[t] > self.rng.random() * fmax:

752 c1 = self.pop[t]

753 nnf = False

754 nnf = True

755 while nnf:

756 t = self.rng.randint(len(self.pop))

757 if fit[t] > self.rng.random() * fmax:

758 c2 = self.pop[t]

759 nnf = False

760

761 return (c1.copy(), c2.copy())

762

763

764class MultiObjectivePopulation(RankFitnessPopulation):

765 """ Allows for assignment of fitness based on a set of two variables

766 such that fitness is ranked according to a Pareto-front of

767 non-dominated candidates.

768

769 Parameters

770 ----------

771 abs_data: list

772 Set of key_value_pairs in atoms object for which fitness should

773 be assigned based on absolute value.

774

775 rank_data: list

776 Set of key_value_pairs in atoms object for which data should

777 be ranked in order to ascribe fitness.

778

779 variable_function: function

780 A function that takes as input an Atoms object and returns

781 the variable that differentiates the ranks. Only use if

782 data is ranked.

783

784 exp_function: boolean

785 If True use an exponential function for ranking the fitness.

786 If False use the same as in Population. Default True.

787

788 exp_prefactor: float

789 The prefactor used in the exponential fitness scaling function.

790 Default 0.5

791

792 """

793

794 def __init__(self, data_connection, population_size,

795 variable_function=None, comparator=None, logfile=None,

796 use_extinct=False, abs_data=None, rank_data=None,

797 exp_function=True, exp_prefactor=0.5):

798 # The current fitness is set at each update of the population

799 self.current_fitness = None

800

801 if rank_data is None:

802 rank_data = []

803 self.rank_data = rank_data

804

805 if abs_data is None:

806 abs_data = []

807 self.abs_data = abs_data

808

809 RankFitnessPopulation.__init__(self, data_connection, population_size,

810 variable_function, comparator, logfile,

811 use_extinct, exp_function,

812 exp_prefactor)

813

814 def get_nonrank(self, nrcand, key=None):

815 """"Returns a list of fitness values."""

816 nrc_list = []

817 for nrc in nrcand:

818 nrc_list.append(nrc.info['key_value_pairs'][key])

819 return nrc_list

820

821 def __get_fitness__(self, candidates):

822 # There are no defaults set for the datasets to be

823 # used in this function, as such we test that the

824 # user has specified at least two here.

825 msg = "This is a multi-objective fitness function"

826 msg += " so there must be at least two datasets"

827 msg += " stated in the rank_data and abs_data variables"

828 assert len(self.rank_data) + len(self.abs_data) >= 2, msg

829

830 expf = self.exp_function

831

832 all_fitnesses = []

833 used = set()

834 for rd in self.rank_data:

835 used.add(rd)

836 # Build ranked fitness based on rd

837 all_fitnesses.append(self.get_rank(candidates, key=rd))

838

839 for d in self.abs_data:

840 if d not in used:

841 used.add(d)

842 # Build fitness based on d

843 all_fitnesses.append(self.get_nonrank(candidates, key=d))

844

845 # Set the initial order of the ranks, will need to

846 # be returned in this order at the end.

847 fordered = list(zip(range(len(all_fitnesses[0])), *all_fitnesses))

848 mvf_rank = -1 # Start multi variable rank at -1.

849 rec_vrc = [] # A record of already ranked candidates.

850 mvf_list = [] # A list for all candidate ranks.

851 # Sort by raw_score_1 in case this is different from

852 # the stored raw_score() variable that all_cands are

853 # sorted by.

854 fordered.sort(key=itemgetter(1), reverse=True)

855 # Niche candidates with equal or better raw_score to

856 # current candidate.

857 for a in fordered:

858 order, rest = a[0], a[1:]

859 if order not in rec_vrc:

860 pff = []

861 pff2 = []

862 rec_vrc.append(order)

863 pff.append((order, rest))

864 for b in fordered:

865 border, brest = b[0], b[1:]

866 if border not in rec_vrc:

867 if np.any(np.array(brest) >= np.array(rest)):

868 pff.append((border, brest))

869 # Remove any candidate from pff list that is dominated

870 # by another in the list.

871 for na in pff:

872 norder, nrest = na[0], na[1:]

873 dom = False

874 for nb in pff:

875 nborder, nbrest = nb[0], nb[1:]

876 if norder != nborder:

877 if np.all(np.array(nbrest) > np.array(nrest)):

878 dom = True

879 if not dom:

880 pff2.append((norder, nrest))

881 # Assign pareto rank from -1 to -N niches.

882 for ffa in pff2:

883 fforder, ffrest = ffa[0], ffa[1:]

884 rec_vrc.append(fforder)

885 mvf_list.append((fforder, mvf_rank, ffrest))

886 mvf_rank = mvf_rank - 1

887 # The original order is reformed

888 mvf_list.sort(key=itemgetter(0), reverse=False)

889 rfro = np.array(list(zip(*mvf_list))[1])

890

891 if not expf:

892 rmax = max(rfro)

893 rmin = min(rfro)

894 T = rmin - rmax

895 # If using obj_rank probability, must have non-zero T val.

896 # pop_size must be greater than number of permutations.

897 # We test for this here

898 msg = "Equal fitness for best and worst candidate in the "

899 msg += "population! Fitness scaling is impossible! "

900 msg += "Try with a larger population."

901 assert T != 0., msg

902 return 0.5 * (1. - np.tanh(2. * (rfro - rmax) / T - 1.))

903 else:

904 return self.exp_prefactor ** (-rfro - 1)

905

906 def __initialize_pop__(self):

907 # Get all relaxed candidates from the database

908 ue = self.use_extinct

909 all_cand = self.dc.get_all_relaxed_candidates(use_extinct=ue)

910 all_cand.sort(key=get_raw_score, reverse=True)

911

912 if len(all_cand) > 0:

913 fitf = self.__get_fitness__(all_cand)

914 all_sorted = list(zip(fitf, all_cand))

915 all_sorted.sort(key=itemgetter(0), reverse=True)

916 sort_cand = []

917 for _, t2 in all_sorted:

918 sort_cand.append(t2)

919 all_sorted = sort_cand

920

921 # Fill up the population with the self.pop_size most stable

922 # unique candidates.

923 i = 0

924 while i < len(all_sorted) and len(self.pop) < self.pop_size:

925 c = all_sorted[i]

926 # Use variable_function to decide whether to run comparator

927 # if the function has been defined by the user. This does not

928 # need to be dependent on using the rank_data function.

929 if self.vf is not None:

930 c_vf = self.vf(c)

931 i += 1

932 eq = False

933 for a in self.pop:

934 if self.vf is not None:

935 a_vf = self.vf(a)

936 # Only run comparator if the variable_function

937 # (self.vf) returns the same. If it returns something

938 # different the candidates are inherently different.

939 # This is done to speed up.

940 if a_vf == c_vf:

941 if self.comparator.looks_like(a, c):

942 eq = True

943 break

944 else:

945 if self.comparator.looks_like(a, c):

946 eq = True

947 break

948 if not eq:

949 self.pop.append(c)

950 self.all_cand = all_cand