Coverage for /builds/kinetik161/ase/ase/ga/cutandsplicepairing.py: 90.19%

214 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-12-10 11:04 +0000

1"""Implementation of the cut-and-splice paring operator.""" 

2import numpy as np 

3 

4from ase import Atoms 

5from ase.ga.offspring_creator import OffspringCreator 

6from ase.ga.utilities import (atoms_too_close, atoms_too_close_two_sets, 

7 gather_atoms_by_tag) 

8from ase.geometry import find_mic 

9 

10 

11class Positions: 

12 """Helper object to simplify the pairing process. 

13 

14 Parameters: 

15 

16 scaled_positions: (Nx3) array 

17 Positions in scaled coordinates 

18 cop: (1x3) array 

19 Center-of-positions (also in scaled coordinates) 

20 symbols: str 

21 String with the atomic symbols 

22 distance: float 

23 Signed distance to the cutting plane 

24 origin: int (0 or 1) 

25 Determines at which side of the plane the position should be. 

26 """ 

27 

28 def __init__(self, scaled_positions, cop, symbols, distance, origin): 

29 self.scaled_positions = scaled_positions 

30 self.cop = cop 

31 self.symbols = symbols 

32 self.distance = distance 

33 self.origin = origin 

34 

35 def to_use(self): 

36 """Tells whether this position is at the right side.""" 

37 if self.distance > 0. and self.origin == 0: 

38 return True 

39 elif self.distance < 0. and self.origin == 1: 

40 return True 

41 else: 

42 return False 

43 

44 

45class CutAndSplicePairing(OffspringCreator): 

46 """The Cut and Splice operator by Deaven and Ho. 

47 

48 Creates offspring from two parent structures using 

49 a randomly generated cutting plane. 

50 

51 The parents may have different unit cells, in which 

52 case the offspring unit cell will be a random combination 

53 of the parent cells. 

54 

55 The basic implementation (for fixed unit cells) is 

56 described in: 

57 

58 :doi:`L.B. Vilhelmsen and B. Hammer, PRL, 108, 126101 (2012) 

59 <10.1103/PhysRevLett.108.126101`> 

60 

61 The extension to variable unit cells is similar to: 

62 

63 * :doi:`Glass, Oganov, Hansen, Comp. Phys. Comm. 175 (2006) 713-720 

64 <10.1016/j.cpc.2006.07.020>` 

65 

66 * :doi:`Lonie, Zurek, Comp. Phys. Comm. 182 (2011) 372-387 

67 <10.1016/j.cpc.2010.07.048>` 

68 

69 The operator can furthermore preserve molecular identity 

70 if desired (see the *use_tags* kwarg). Atoms with the same 

71 tag will then be considered as belonging to the same molecule, 

72 and their internal geometry will not be changed by the operator. 

73 

74 If use_tags is enabled, the operator will also conserve the 

75 number of molecules of each kind (in addition to conserving 

76 the overall stoichiometry). Currently, molecules are considered 

77 to be of the same kind if their chemical symbol strings are 

78 identical. In rare cases where this may not be sufficient 

79 (i.e. when desiring to keep the same ratio of isomers), the 

80 different isomers can be differentiated by giving them different 

81 elemental orderings (e.g. 'XY2' and 'Y2X'). 

82 

83 Parameters: 

84 

85 slab: Atoms object 

86 Specifies the cell vectors and periodic boundary conditions 

87 to be applied to the randomly generated structures. 

88 Any included atoms (e.g. representing an underlying slab) 

89 are copied to these new structures. 

90 

91 n_top: int 

92 The number of atoms to optimize 

93 

94 blmin: dict 

95 Dictionary with minimal interatomic distances. 

96 Note: when preserving molecular identity (see use_tags), 

97 the blmin dict will (naturally) only be applied 

98 to intermolecular distances (not the intramolecular 

99 ones). 

100 

101 number_of_variable_cell_vectors: int (default 0) 

102 The number of variable cell vectors (0, 1, 2 or 3). 

103 To keep things simple, it is the 'first' vectors which 

104 will be treated as variable, i.e. the 'a' vector in the 

105 univariate case, the 'a' and 'b' vectors in the bivariate 

106 case, etc. 

107 

108 p1: float or int between 0 and 1 

109 Probability that a parent is shifted over a random 

110 distance along the normal of the cutting plane 

111 (only operative if number_of_variable_cell_vectors > 0). 

112 

113 p2: float or int between 0 and 1 

114 Same as p1, but for shifting along the directions 

115 in the cutting plane (only operative if 

116 number_of_variable_cell_vectors > 0). 

117 

118 minfrac: float between 0 and 1, or None (default) 

119 Minimal fraction of atoms a parent must contribute 

120 to the child. If None, each parent must contribute 

121 at least one atom. 

122 

123 cellbounds: ase.ga.utilities.CellBounds instance 

124 Describing limits on the cell shape, see 

125 :class:`~ase.ga.utilities.CellBounds`. 

126 Note that it only make sense to impose conditions 

127 regarding cell vectors which have been marked as 

128 variable (see number_of_variable_cell_vectors). 

129 

130 use_tags: bool 

131 Whether to use the atomic tags to preserve 

132 molecular identity. 

133 

134 test_dist_to_slab: bool (default True) 

135 Whether to make sure that the distances between 

136 the atoms and the slab satisfy the blmin. 

137 

138 rng: Random number generator 

139 By default numpy.random. 

140 """ 

141 

142 def __init__(self, slab, n_top, blmin, number_of_variable_cell_vectors=0, 

143 p1=1, p2=0.05, minfrac=None, cellbounds=None, 

144 test_dist_to_slab=True, use_tags=False, rng=np.random, 

145 verbose=False): 

146 

147 OffspringCreator.__init__(self, verbose, rng=rng) 

148 self.slab = slab 

149 self.n_top = n_top 

150 self.blmin = blmin 

151 assert number_of_variable_cell_vectors in range(4) 

152 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors 

153 self.p1 = p1 

154 self.p2 = p2 

155 self.minfrac = minfrac 

156 self.cellbounds = cellbounds 

157 self.test_dist_to_slab = test_dist_to_slab 

158 self.use_tags = use_tags 

159 

160 self.scaling_volume = None 

161 self.descriptor = 'CutAndSplicePairing' 

162 self.min_inputs = 2 

163 

164 def update_scaling_volume(self, population, w_adapt=0.5, n_adapt=0): 

165 """Updates the scaling volume that is used in the pairing 

166 

167 w_adapt: weight of the new vs the old scaling volume 

168 n_adapt: number of best candidates in the population that 

169 are used to calculate the new scaling volume 

170 """ 

171 if not n_adapt: 

172 # take best 20% of the population 

173 n_adapt = int(np.ceil(0.2 * len(population))) 

174 v_new = np.mean([a.get_volume() for a in population[:n_adapt]]) 

175 

176 if not self.scaling_volume: 

177 self.scaling_volume = v_new 

178 else: 

179 volumes = [self.scaling_volume, v_new] 

180 weights = [1 - w_adapt, w_adapt] 

181 self.scaling_volume = np.average(volumes, weights=weights) 

182 

183 def get_new_individual(self, parents): 

184 """The method called by the user that 

185 returns the paired structure.""" 

186 f, m = parents 

187 

188 indi = self.cross(f, m) 

189 desc = 'pairing: {} {}'.format(f.info['confid'], 

190 m.info['confid']) 

191 # It is ok for an operator to return None 

192 # It means that it could not make a legal offspring 

193 # within a reasonable amount of time 

194 if indi is None: 

195 return indi, desc 

196 indi = self.initialize_individual(f, indi) 

197 indi.info['data']['parents'] = [f.info['confid'], 

198 m.info['confid']] 

199 

200 return self.finalize_individual(indi), desc 

201 

202 def cross(self, a1, a2): 

203 """Crosses the two atoms objects and returns one""" 

204 

205 if len(a1) != len(self.slab) + self.n_top: 

206 raise ValueError('Wrong size of structure to optimize') 

207 if len(a1) != len(a2): 

208 raise ValueError('The two structures do not have the same length') 

209 

210 N = self.n_top 

211 

212 # Only consider the atoms to optimize 

213 a1 = a1[len(a1) - N: len(a1)] 

214 a2 = a2[len(a2) - N: len(a2)] 

215 

216 if not np.array_equal(a1.numbers, a2.numbers): 

217 err = 'Trying to pair two structures with different stoichiometry' 

218 raise ValueError(err) 

219 

220 if self.use_tags and not np.array_equal(a1.get_tags(), a2.get_tags()): 

221 err = 'Trying to pair two structures with different tags' 

222 raise ValueError(err) 

223 

224 cell1 = a1.get_cell() 

225 cell2 = a2.get_cell() 

226 for i in range(self.number_of_variable_cell_vectors, 3): 

227 err = 'Unit cells are supposed to be identical in direction %d' 

228 assert np.allclose(cell1[i], cell2[i]), (err % i, cell1, cell2) 

229 

230 invalid = True 

231 counter = 0 

232 maxcount = 1000 

233 a1_copy = a1.copy() 

234 a2_copy = a2.copy() 

235 

236 # Run until a valid pairing is made or maxcount pairings are tested. 

237 while invalid and counter < maxcount: 

238 counter += 1 

239 

240 newcell = self.generate_unit_cell(cell1, cell2) 

241 if newcell is None: 

242 # No valid unit cell could be generated. 

243 # This strongly suggests that it is near-impossible 

244 # to generate one from these parent cells and it is 

245 # better to abort now. 

246 break 

247 

248 # Choose direction of cutting plane normal 

249 if self.number_of_variable_cell_vectors == 0: 

250 # Will be generated entirely at random 

251 theta = np.pi * self.rng.random() 

252 phi = 2. * np.pi * self.rng.random() 

253 cut_n = np.array([np.cos(phi) * np.sin(theta), 

254 np.sin(phi) * np.sin(theta), np.cos(theta)]) 

255 else: 

256 # Pick one of the 'variable' cell vectors 

257 cut_n = self.rng.choice(self.number_of_variable_cell_vectors) 

258 

259 # Randomly translate parent structures 

260 for a_copy, a in zip([a1_copy, a2_copy], [a1, a2]): 

261 a_copy.set_positions(a.get_positions()) 

262 

263 cell = a_copy.get_cell() 

264 for i in range(self.number_of_variable_cell_vectors): 

265 r = self.rng.random() 

266 cond1 = i == cut_n and r < self.p1 

267 cond2 = i != cut_n and r < self.p2 

268 if cond1 or cond2: 

269 a_copy.positions += self.rng.random() * cell[i] 

270 

271 if self.use_tags: 

272 # For correct determination of the center- 

273 # of-position of the multi-atom blocks, 

274 # we need to group their constituent atoms 

275 # together 

276 gather_atoms_by_tag(a_copy) 

277 else: 

278 a_copy.wrap() 

279 

280 # Generate the cutting point in scaled coordinates 

281 cosp1 = np.average(a1_copy.get_scaled_positions(), axis=0) 

282 cosp2 = np.average(a2_copy.get_scaled_positions(), axis=0) 

283 cut_p = np.zeros((1, 3)) 

284 for i in range(3): 

285 if i < self.number_of_variable_cell_vectors: 

286 cut_p[0, i] = self.rng.random() 

287 else: 

288 cut_p[0, i] = 0.5 * (cosp1[i] + cosp2[i]) 

289 

290 # Perform the pairing: 

291 child = self._get_pairing(a1_copy, a2_copy, cut_p, cut_n, newcell) 

292 if child is None: 

293 continue 

294 

295 # Verify whether the atoms are too close or not: 

296 if atoms_too_close(child, self.blmin, use_tags=self.use_tags): 

297 continue 

298 

299 if self.test_dist_to_slab and len(self.slab) > 0: 

300 if atoms_too_close_two_sets(self.slab, child, self.blmin): 

301 continue 

302 

303 # Passed all the tests 

304 child = self.slab + child 

305 child.set_cell(newcell, scale_atoms=False) 

306 child.wrap() 

307 return child 

308 

309 return None 

310 

311 def generate_unit_cell(self, cell1, cell2, maxcount=10000): 

312 """Generates a new unit cell by a random linear combination 

313 of the parent cells. The new cell must satisfy the 

314 self.cellbounds constraints. Returns None if no such cell 

315 was generated within a given number of attempts. 

316 

317 Parameters: 

318 

319 maxcount: int 

320 The maximal number of attempts. 

321 """ 

322 # First calculate the scaling volume 

323 if not self.scaling_volume: 

324 v1 = np.abs(np.linalg.det(cell1)) 

325 v2 = np.abs(np.linalg.det(cell2)) 

326 r = self.rng.random() 

327 v_ref = r * v1 + (1 - r) * v2 

328 else: 

329 v_ref = self.scaling_volume 

330 

331 # Now the cell vectors 

332 if self.number_of_variable_cell_vectors == 0: 

333 assert np.allclose(cell1, cell2), 'Parent cells are not the same' 

334 newcell = np.copy(cell1) 

335 else: 

336 count = 0 

337 while count < maxcount: 

338 r = self.rng.random() 

339 newcell = r * cell1 + (1 - r) * cell2 

340 

341 vol = abs(np.linalg.det(newcell)) 

342 scaling = v_ref / vol 

343 scaling **= 1. / self.number_of_variable_cell_vectors 

344 newcell[:self.number_of_variable_cell_vectors] *= scaling 

345 

346 found = True 

347 if self.cellbounds is not None: 

348 found = self.cellbounds.is_within_bounds(newcell) 

349 if found: 

350 break 

351 

352 count += 1 

353 else: 

354 # Did not find acceptable cell 

355 newcell = None 

356 

357 return newcell 

358 

359 def _get_pairing(self, a1, a2, cutting_point, cutting_normal, cell): 

360 """Creates a child from two parents using the given cut. 

361 

362 Returns None if the generated structure does not contain 

363 a large enough fraction of each parent (see self.minfrac). 

364 

365 Does not check whether atoms are too close. 

366 

367 Assumes the 'slab' parts have been removed from the parent 

368 structures and that these have been checked for equal 

369 lengths, stoichiometries, and tags (if self.use_tags). 

370 

371 Parameters: 

372 

373 cutting_normal: int or (1x3) array 

374 

375 cutting_point: (1x3) array 

376 In fractional coordinates 

377 

378 cell: (3x3) array 

379 The unit cell for the child structure 

380 """ 

381 symbols = a1.get_chemical_symbols() 

382 tags = a1.get_tags() if self.use_tags else np.arange(len(a1)) 

383 

384 # Generate list of all atoms / atom groups: 

385 p1, p2, sym = [], [], [] 

386 for i in np.unique(tags): 

387 indices = np.where(tags == i)[0] 

388 s = ''.join([symbols[j] for j in indices]) 

389 sym.append(s) 

390 

391 for i, (a, p) in enumerate(zip([a1, a2], [p1, p2])): 

392 c = a.get_cell() 

393 cop = np.mean(a.positions[indices], axis=0) 

394 cut_p = np.dot(cutting_point, c) 

395 if isinstance(cutting_normal, int): 

396 vecs = [c[j] for j in range(3) if j != cutting_normal] 

397 cut_n = np.cross(vecs[0], vecs[1]) 

398 else: 

399 cut_n = np.dot(cutting_normal, c) 

400 d = np.dot(cop - cut_p, cut_n) 

401 spos = a.get_scaled_positions()[indices] 

402 scop = np.mean(spos, axis=0) 

403 p.append(Positions(spos, scop, s, d, i)) 

404 

405 all_points = p1 + p2 

406 unique_sym = np.unique(sym) 

407 types = {s: sym.count(s) for s in unique_sym} 

408 

409 # Sort these by chemical symbols: 

410 all_points.sort(key=lambda x: x.symbols, reverse=True) 

411 

412 # For each atom type make the pairing 

413 unique_sym.sort() 

414 use_total = {} 

415 for s in unique_sym: 

416 used = [] 

417 not_used = [] 

418 # The list is looked trough in 

419 # reverse order so atoms can be removed 

420 # from the list along the way. 

421 for i in reversed(range(len(all_points))): 

422 # If there are no more atoms of this type 

423 if all_points[i].symbols != s: 

424 break 

425 # Check if the atom should be included 

426 if all_points[i].to_use(): 

427 used.append(all_points.pop(i)) 

428 else: 

429 not_used.append(all_points.pop(i)) 

430 

431 assert len(used) + len(not_used) == types[s] * 2 

432 

433 # While we have too few of the given atom type 

434 while len(used) < types[s]: 

435 index = self.rng.randint(len(not_used)) 

436 used.append(not_used.pop(index)) 

437 

438 # While we have too many of the given atom type 

439 while len(used) > types[s]: 

440 # remove randomly: 

441 index = self.rng.randint(len(used)) 

442 not_used.append(used.pop(index)) 

443 

444 use_total[s] = used 

445 

446 n_tot = sum([len(ll) for ll in use_total.values()]) 

447 assert n_tot == len(sym) 

448 

449 # check if the generated structure contains 

450 # atoms from both parents: 

451 count1, count2, N = 0, 0, len(a1) 

452 for x in use_total.values(): 

453 count1 += sum([y.origin == 0 for y in x]) 

454 count2 += sum([y.origin == 1 for y in x]) 

455 

456 nmin = 1 if self.minfrac is None else int(round(self.minfrac * N)) 

457 if count1 < nmin or count2 < nmin: 

458 return None 

459 

460 # Construct the cartesian positions and reorder the atoms 

461 # to follow the original order 

462 newpos = [] 

463 pbc = a1.get_pbc() 

464 for s in sym: 

465 p = use_total[s].pop() 

466 c = a1.get_cell() if p.origin == 0 else a2.get_cell() 

467 pos = np.dot(p.scaled_positions, c) 

468 cop = np.dot(p.cop, c) 

469 vectors, lengths = find_mic(pos - cop, c, pbc) 

470 newcop = np.dot(p.cop, cell) 

471 pos = newcop + vectors 

472 for row in pos: 

473 newpos.append(row) 

474 

475 newpos = np.reshape(newpos, (N, 3)) 

476 num = a1.get_atomic_numbers() 

477 child = Atoms(numbers=num, positions=newpos, pbc=pbc, cell=cell, 

478 tags=tags) 

479 child.wrap() 

480 return child