Coverage for /builds/kinetik161/ase/ase/ga/startgenerator.py: 95.17%

207 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-12-10 11:04 +0000

1"""Tools for generating new random starting candidates.""" 

2import numpy as np 

3 

4from ase import Atoms 

5from ase.build import molecule 

6from ase.data import atomic_numbers 

7from ase.ga.utilities import (atoms_too_close, atoms_too_close_two_sets, 

8 closest_distances_generator) 

9 

10 

11class StartGenerator: 

12 """Class for generating random starting candidates. 

13 

14 Its basic task consists of randomly placing atoms or 

15 molecules within a predescribed box, while respecting 

16 certain minimal interatomic distances. 

17 

18 Depending on the problem at hand, certain box vectors 

19 may not be known or chosen beforehand, and hence also 

20 need to be generated at random. Common cases include 

21 bulk crystals, films and chains, with respectively 

22 3, 2 and 1 unknown cell vectors. 

23 

24 Parameters: 

25 

26 slab: Atoms object 

27 Specifies the cell vectors and periodic boundary conditions 

28 to be applied to the randomly generated structures. 

29 Any included atoms (e.g. representing an underlying slab) 

30 are copied to these new structures. 

31 Variable cell vectors (see number_of_variable_cell_vectors) 

32 will be ignored because these will be generated at random. 

33 

34 blocks: list 

35 List of building units for the structure. Each item can be: 

36 

37 * an integer: representing a single atom by its atomic number, 

38 * a string: for a single atom (a chemical symbol) or a 

39 molecule (name recognized by ase.build.molecule), 

40 * an Atoms object, 

41 * an (A, B) tuple or list where A is any of the above 

42 and B is the number of A units to include. 

43 

44 A few examples: 

45 

46 >>> blocks = ['Ti'] * 4 + ['O'] * 8 

47 >>> blocks = [('Ti', 4), ('O', 8)] 

48 >>> blocks = [('CO2', 3)] # 3 CO2 molecules 

49 >>> co = Atoms('CO', positions=[[0, 0, 0], [1.4, 0, 0]]) 

50 >>> blocks = [(co, 3)] 

51 

52 Each individual block (single atom or molecule) in the 

53 randomly generated candidates is given a unique integer 

54 tag. These can be used to preserve the molecular identity 

55 of these subunits. 

56 

57 blmin: dict or float 

58 Dictionary with minimal interatomic distances. 

59 If a number is provided instead, the dictionary will 

60 be generated with this ratio of covalent bond radii. 

61 Note: when preserving molecular identity (see use_tags), 

62 the blmin dict will (naturally) only be applied 

63 to intermolecular distances (not the intramolecular 

64 ones). 

65 

66 number_of_variable_cell_vectors: int (default 0) 

67 The number of variable cell vectors (0, 1, 2 or 3). 

68 To keep things simple, it is the 'first' vectors which 

69 will be treated as variable, i.e. the 'a' vector in the 

70 univariate case, the 'a' and 'b' vectors in the bivariate 

71 case, etc. 

72 

73 box_to_place_in: [list, list of lists] (default None) 

74 The box in which the atoms can be placed. 

75 The default (None) means the box is equal to the 

76 entire unit cell of the 'slab' object. 

77 In many cases, however, smaller boxes are desired 

78 (e.g. for adsorbates on a slab surface or for isolated 

79 clusters). Then, box_to_place_in can be set as 

80 [p0, [v1, v2, v3]] with positions being generated as 

81 p0 + r1 * v1 + r2 * v2 + r3 + v3. 

82 In case of one or more variable cell vectors, 

83 the corresponding items in p0/v1/v2/v3 will be ignored. 

84 

85 box_volume: int or float or None (default) 

86 Initial guess for the box volume in cubic Angstrom 

87 (used in generating the variable cell vectors). 

88 Typical values in the solid state are 8-12 A^3 per atom. 

89 If there are no variable cell vectors, the default None 

90 is required (box volume equal to the box_to_place_in 

91 volume). 

92 

93 splits: dict or None 

94 Splitting scheme for increasing the translational symmetry 

95 in the random candidates, based on: 

96 

97 * `Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-32`__ 

98 

99 __ http://dx.doi.org/10.1016/j.cpc.2010.06.007 

100 

101 This should be a dict specifying the relative probabilities 

102 for each split, written as tuples. For example, 

103 

104 >>> splits = {(2,): 3, (1,): 1} 

105 

106 This means that, for each structure, either a splitting 

107 factor of 2 is applied to one randomly chosen axis, 

108 or a splitting factor of 1 is applied (i.e., no splitting). 

109 The probability ratio of the two scenararios will be 3:1, 

110 i.e. 75% chance for the former and 25% chance for the latter 

111 splitting scheme. Only the directions in which the 'slab' 

112 object is periodic are eligible for splitting. 

113 

114 To e.g. always apply splitting factors of 2 and 3 along two 

115 randomly chosen axes: 

116 

117 >>> splits = {(2, 3): 1} 

118 

119 By default, no splitting is applied (splits = None = {(1,): 1}). 

120 

121 cellbounds: ase.ga.utilities.CellBounds instance 

122 Describing limits on the cell shape, see 

123 :class:`~ase.ga.utilities.CellBounds`. 

124 Note that it only make sense to impose conditions 

125 regarding cell vectors which have been marked as 

126 variable (see number_of_variable_cell_vectors). 

127 

128 test_dist_to_slab: bool (default True) 

129 Whether to make sure that the distances between 

130 the atoms and the slab satisfy the blmin. 

131 

132 test_too_far: bool (default True) 

133 Whether to also make sure that there are no isolated 

134 atoms or molecules with nearest-neighbour bond lengths 

135 larger than 2x the value in the blmin dict. 

136 

137 rng: Random number generator 

138 By default numpy.random. 

139 """ 

140 

141 def __init__(self, slab, blocks, blmin, number_of_variable_cell_vectors=0, 

142 box_to_place_in=None, box_volume=None, splits=None, 

143 cellbounds=None, test_dist_to_slab=True, test_too_far=True, 

144 rng=np.random): 

145 

146 self.slab = slab 

147 

148 self.blocks = [] 

149 for item in blocks: 

150 if isinstance(item, (tuple, list)): 

151 assert len(item) == 2, 'Item length %d != 2' % len(item) 

152 block, count = item 

153 else: 

154 block, count = item, 1 

155 

156 # Convert block into Atoms object 

157 if isinstance(block, Atoms): 

158 pass 

159 elif block in atomic_numbers: 

160 block = Atoms(block) 

161 elif isinstance(block, str): 

162 block = molecule(block) 

163 elif block in atomic_numbers.values(): 

164 block = Atoms(numbers=[block]) 

165 else: 

166 raise ValueError('Cannot parse this block:', block) 

167 

168 # Add to self.blocks, taking into account that 

169 # we want to group the same blocks together. 

170 # This is important for the cell splitting. 

171 for i, (b, c) in enumerate(self.blocks): 

172 if block == b: 

173 self.blocks[i][1] += count 

174 break 

175 else: 

176 self.blocks.append([block, count]) 

177 

178 if isinstance(blmin, dict): 

179 self.blmin = blmin 

180 else: 

181 numbers = np.unique([b.get_atomic_numbers() for b in self.blocks]) 

182 self.blmin = closest_distances_generator( 

183 numbers, 

184 ratio_of_covalent_radii=blmin) 

185 

186 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors 

187 assert self.number_of_variable_cell_vectors in range(4) 

188 if len(self.slab) > 0: 

189 msg = 'Including atoms in the slab only makes sense' 

190 msg += ' if there are no variable unit cell vectors' 

191 assert self.number_of_variable_cell_vectors == 0, msg 

192 for i in range(self.number_of_variable_cell_vectors): 

193 msg = f'Unit cell {("abc"[i])}-vector is marked as variable ' 

194 msg += 'and slab must then also be periodic in this direction' 

195 assert self.slab.pbc[i], msg 

196 

197 if box_to_place_in is None: 

198 p0 = np.array([0., 0., 0.]) 

199 cell = self.slab.get_cell() 

200 self.box_to_place_in = [p0, [cell[0, :], cell[1, :], cell[2, :]]] 

201 else: 

202 self.box_to_place_in = box_to_place_in 

203 

204 if box_volume is None: 

205 assert self.number_of_variable_cell_vectors == 0 

206 box_volume = abs(np.linalg.det(self.box_to_place_in[1])) 

207 else: 

208 assert self.number_of_variable_cell_vectors > 0 

209 self.box_volume = box_volume 

210 assert self.box_volume > 0 

211 

212 if splits is None: 

213 splits = {(1,): 1} 

214 tot = sum([v for v in splits.values()]) # normalization 

215 self.splits = {k: v * 1. / tot for k, v in splits.items()} 

216 

217 self.cellbounds = cellbounds 

218 self.test_too_far = test_too_far 

219 self.test_dist_to_slab = test_dist_to_slab 

220 self.rng = rng 

221 

222 def get_new_candidate(self, maxiter=None): 

223 """Returns a new candidate. 

224 

225 maxiter: upper bound on the total number of times 

226 the random position generator is called 

227 when generating the new candidate. 

228 

229 By default (maxiter=None) no such bound 

230 is imposed. If the generator takes too 

231 long time to create a new candidate, it 

232 may be suitable to specify a finite value. 

233 When the bound is exceeded, None is returned. 

234 """ 

235 pbc = self.slab.get_pbc() 

236 

237 # Choose cell splitting 

238 r = self.rng.random() 

239 cumprob = 0 

240 for split, prob in self.splits.items(): 

241 cumprob += prob 

242 if cumprob > r: 

243 break 

244 

245 # Choose direction(s) along which to split 

246 # and by how much 

247 directions = [i for i in range(3) if pbc[i]] 

248 repeat = [1, 1, 1] 

249 if len(directions) > 0: 

250 for number in split: 

251 d = self.rng.choice(directions) 

252 repeat[d] = number 

253 repeat = tuple(repeat) 

254 

255 # Generate the 'full' unit cell 

256 # for the eventual candidates 

257 cell = self.generate_unit_cell(repeat) 

258 if self.number_of_variable_cell_vectors == 0: 

259 assert np.allclose(cell, self.slab.get_cell()) 

260 

261 # Make the smaller 'box' in which we are 

262 # allowed to place the atoms and which will 

263 # then be repeated to fill the 'full' unit cell 

264 box = np.copy(cell) 

265 for i in range(self.number_of_variable_cell_vectors, 3): 

266 box[i] = np.array(self.box_to_place_in[1][i]) 

267 box /= np.array([repeat]).T 

268 

269 # Here we gather the (reduced) number of blocks 

270 # to put in the smaller box, and the 'surplus' 

271 # occurring when the block count is not divisible 

272 # by the number of repetitions. 

273 # E.g. if we have a ('Ti', 4) block and do a 

274 # [2, 3, 1] repetition, we employ a ('Ti', 1) 

275 # block in the smaller box and delete 2 out 6 

276 # Ti atoms afterwards 

277 nrep = int(np.prod(repeat)) 

278 blocks, ids, surplus = [], [], [] 

279 for i, (block, count) in enumerate(self.blocks): 

280 count_part = int(np.ceil(count * 1. / nrep)) 

281 blocks.extend([block] * count_part) 

282 surplus.append(nrep * count_part - count) 

283 ids.extend([i] * count_part) 

284 

285 N_blocks = len(blocks) 

286 

287 # Shuffle the ordering so different blocks 

288 # are added in random order 

289 order = np.arange(N_blocks) 

290 self.rng.shuffle(order) 

291 blocks = [blocks[i] for i in order] 

292 ids = np.array(ids)[order] 

293 

294 # Add blocks one by one until we have found 

295 # a valid candidate 

296 blmin = self.blmin 

297 blmin_too_far = {key: 2 * val for key, val in blmin.items()} 

298 

299 niter = 0 

300 while maxiter is None or niter < maxiter: 

301 cand = Atoms('', cell=box, pbc=pbc) 

302 

303 for i in range(N_blocks): 

304 atoms = blocks[i].copy() 

305 atoms.set_tags(i) 

306 atoms.set_pbc(pbc) 

307 atoms.set_cell(box, scale_atoms=False) 

308 

309 while maxiter is None or niter < maxiter: 

310 niter += 1 

311 cop = atoms.get_positions().mean(axis=0) 

312 pos = np.dot(self.rng.random((1, 3)), box) 

313 atoms.translate(pos - cop) 

314 

315 if len(atoms) > 1: 

316 # Apply a random rotation to multi-atom blocks 

317 phi, theta, psi = 360 * self.rng.random(3) 

318 atoms.euler_rotate(phi=phi, theta=0.5 * theta, psi=psi, 

319 center=pos) 

320 

321 if not atoms_too_close_two_sets(cand, atoms, blmin): 

322 cand += atoms 

323 break 

324 else: 

325 # Reached maximum iteration number 

326 # Break out of the for loop above 

327 cand = None 

328 break 

329 

330 if cand is None: 

331 # Exit the main while loop 

332 break 

333 

334 # Rebuild the candidate after repeating, 

335 # randomly deleting surplus blocks and 

336 # sorting back to the original order 

337 cand_full = cand.repeat(repeat) 

338 

339 tags_full = cand_full.get_tags() 

340 for i in range(nrep): 

341 tags_full[len(cand) * i:len(cand) * (i + 1)] += i * N_blocks 

342 cand_full.set_tags(tags_full) 

343 

344 cand = Atoms('', cell=cell, pbc=pbc) 

345 ids_full = np.tile(ids, nrep) 

346 

347 tag_counter = 0 

348 if len(self.slab) > 0: 

349 tag_counter = int(max(self.slab.get_tags())) + 1 

350 

351 for i, (block, count) in enumerate(self.blocks): 

352 tags = np.where(ids_full == i)[0] 

353 bad = self.rng.choice(tags, size=surplus[i], replace=False) 

354 for tag in tags: 

355 if tag not in bad: 

356 select = [a.index for a in cand_full if a.tag == tag] 

357 atoms = cand_full[select] # is indeed a copy! 

358 atoms.set_tags(tag_counter) 

359 assert len(atoms) == len(block) 

360 cand += atoms 

361 tag_counter += 1 

362 

363 for i in range(self.number_of_variable_cell_vectors, 3): 

364 cand.positions[:, i] += self.box_to_place_in[0][i] 

365 

366 # By construction, the minimal interatomic distances 

367 # within the structure should already be respected 

368 assert not atoms_too_close(cand, blmin, use_tags=True), \ 

369 'This is not supposed to happen; please report this bug' 

370 

371 if self.test_dist_to_slab and len(self.slab) > 0: 

372 if atoms_too_close_two_sets(self.slab, cand, blmin): 

373 continue 

374 

375 if self.test_too_far: 

376 tags = cand.get_tags() 

377 

378 for tag in np.unique(tags): 

379 too_far = True 

380 indices_i = np.where(tags == tag)[0] 

381 indices_j = np.where(tags != tag)[0] 

382 too_far = not atoms_too_close_two_sets(cand[indices_i], 

383 cand[indices_j], 

384 blmin_too_far) 

385 

386 if too_far and len(self.slab) > 0: 

387 # the block is too far from the rest 

388 # but might still be sufficiently 

389 # close to the slab 

390 too_far = not atoms_too_close_two_sets(cand[indices_i], 

391 self.slab, 

392 blmin_too_far) 

393 if too_far: 

394 break 

395 else: 

396 too_far = False 

397 

398 if too_far: 

399 continue 

400 

401 # Passed all the tests 

402 cand = self.slab + cand 

403 cand.set_cell(cell, scale_atoms=False) 

404 break 

405 else: 

406 # Reached max iteration count in the while loop 

407 return None 

408 

409 return cand 

410 

411 def generate_unit_cell(self, repeat): 

412 """Generates a random unit cell. 

413 

414 For this, we use the vectors in self.slab.cell 

415 in the fixed directions and randomly generate 

416 the variable ones. For such a cell to be valid, 

417 it has to satisfy the self.cellbounds constraints. 

418 

419 The cell will also be such that the volume of the 

420 box in which the atoms can be placed (box limits 

421 described by self.box_to_place_in) is equal to 

422 self.box_volume. 

423 

424 Parameters: 

425 

426 repeat: tuple of 3 integers 

427 Indicates by how much each cell vector 

428 will later be reduced by cell splitting. 

429 

430 This is used to ensure that the original 

431 cell is large enough so that the cell lengths 

432 of the smaller cell exceed the largest 

433 (X,X)-minimal-interatomic-distance in self.blmin. 

434 """ 

435 # Find the minimal cell length 'Lmin' 

436 # that we need in order to ensure that 

437 # an added atom or molecule will never 

438 # be 'too close to itself' 

439 Lmin = 0. 

440 for atoms, count in self.blocks: 

441 dist = atoms.get_all_distances(mic=False, vector=False) 

442 num = atoms.get_atomic_numbers() 

443 

444 for i in range(len(atoms)): 

445 dist[i, i] += self.blmin[(num[i], num[i])] 

446 for j in range(i): 

447 bl = self.blmin[(num[i], num[j])] 

448 dist[i, j] += bl 

449 dist[j, i] += bl 

450 

451 L = np.max(dist) 

452 if L > Lmin: 

453 Lmin = L 

454 

455 # Generate a suitable unit cell 

456 valid = False 

457 while not valid: 

458 cell = np.zeros((3, 3)) 

459 

460 for i in range(self.number_of_variable_cell_vectors): 

461 # on-diagonal values 

462 cell[i, i] = self.rng.random() * np.cbrt(self.box_volume) 

463 cell[i, i] *= repeat[i] 

464 for j in range(i): 

465 # off-diagonal values 

466 cell[i, j] = (self.rng.random() - 0.5) * cell[i - 1, i - 1] 

467 

468 # volume scaling 

469 for i in range(self.number_of_variable_cell_vectors, 3): 

470 cell[i] = self.box_to_place_in[1][i] 

471 

472 if self.number_of_variable_cell_vectors > 0: 

473 volume = abs(np.linalg.det(cell)) 

474 scaling = self.box_volume / volume 

475 scaling **= 1. / self.number_of_variable_cell_vectors 

476 cell[:self.number_of_variable_cell_vectors] *= scaling 

477 

478 for i in range(self.number_of_variable_cell_vectors, 3): 

479 cell[i] = self.slab.get_cell()[i] 

480 

481 # bounds checking 

482 valid = True 

483 

484 if self.cellbounds is not None: 

485 if not self.cellbounds.is_within_bounds(cell): 

486 valid = False 

487 

488 if valid: 

489 for i in range(3): 

490 if np.linalg.norm(cell[i]) < repeat[i] * Lmin: 

491 assert self.number_of_variable_cell_vectors > 0 

492 valid = False 

493 

494 return cell