Coverage for /builds/kinetik161/ase/ase/ga/startgenerator.py: 95.17%
207 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
1"""Tools for generating new random starting candidates."""
2import numpy as np
4from ase import Atoms
5from ase.build import molecule
6from ase.data import atomic_numbers
7from ase.ga.utilities import (atoms_too_close, atoms_too_close_two_sets,
8 closest_distances_generator)
11class StartGenerator:
12 """Class for generating random starting candidates.
14 Its basic task consists of randomly placing atoms or
15 molecules within a predescribed box, while respecting
16 certain minimal interatomic distances.
18 Depending on the problem at hand, certain box vectors
19 may not be known or chosen beforehand, and hence also
20 need to be generated at random. Common cases include
21 bulk crystals, films and chains, with respectively
22 3, 2 and 1 unknown cell vectors.
24 Parameters:
26 slab: Atoms object
27 Specifies the cell vectors and periodic boundary conditions
28 to be applied to the randomly generated structures.
29 Any included atoms (e.g. representing an underlying slab)
30 are copied to these new structures.
31 Variable cell vectors (see number_of_variable_cell_vectors)
32 will be ignored because these will be generated at random.
34 blocks: list
35 List of building units for the structure. Each item can be:
37 * an integer: representing a single atom by its atomic number,
38 * a string: for a single atom (a chemical symbol) or a
39 molecule (name recognized by ase.build.molecule),
40 * an Atoms object,
41 * an (A, B) tuple or list where A is any of the above
42 and B is the number of A units to include.
44 A few examples:
46 >>> blocks = ['Ti'] * 4 + ['O'] * 8
47 >>> blocks = [('Ti', 4), ('O', 8)]
48 >>> blocks = [('CO2', 3)] # 3 CO2 molecules
49 >>> co = Atoms('CO', positions=[[0, 0, 0], [1.4, 0, 0]])
50 >>> blocks = [(co, 3)]
52 Each individual block (single atom or molecule) in the
53 randomly generated candidates is given a unique integer
54 tag. These can be used to preserve the molecular identity
55 of these subunits.
57 blmin: dict or float
58 Dictionary with minimal interatomic distances.
59 If a number is provided instead, the dictionary will
60 be generated with this ratio of covalent bond radii.
61 Note: when preserving molecular identity (see use_tags),
62 the blmin dict will (naturally) only be applied
63 to intermolecular distances (not the intramolecular
64 ones).
66 number_of_variable_cell_vectors: int (default 0)
67 The number of variable cell vectors (0, 1, 2 or 3).
68 To keep things simple, it is the 'first' vectors which
69 will be treated as variable, i.e. the 'a' vector in the
70 univariate case, the 'a' and 'b' vectors in the bivariate
71 case, etc.
73 box_to_place_in: [list, list of lists] (default None)
74 The box in which the atoms can be placed.
75 The default (None) means the box is equal to the
76 entire unit cell of the 'slab' object.
77 In many cases, however, smaller boxes are desired
78 (e.g. for adsorbates on a slab surface or for isolated
79 clusters). Then, box_to_place_in can be set as
80 [p0, [v1, v2, v3]] with positions being generated as
81 p0 + r1 * v1 + r2 * v2 + r3 + v3.
82 In case of one or more variable cell vectors,
83 the corresponding items in p0/v1/v2/v3 will be ignored.
85 box_volume: int or float or None (default)
86 Initial guess for the box volume in cubic Angstrom
87 (used in generating the variable cell vectors).
88 Typical values in the solid state are 8-12 A^3 per atom.
89 If there are no variable cell vectors, the default None
90 is required (box volume equal to the box_to_place_in
91 volume).
93 splits: dict or None
94 Splitting scheme for increasing the translational symmetry
95 in the random candidates, based on:
97 * `Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-32`__
99 __ http://dx.doi.org/10.1016/j.cpc.2010.06.007
101 This should be a dict specifying the relative probabilities
102 for each split, written as tuples. For example,
104 >>> splits = {(2,): 3, (1,): 1}
106 This means that, for each structure, either a splitting
107 factor of 2 is applied to one randomly chosen axis,
108 or a splitting factor of 1 is applied (i.e., no splitting).
109 The probability ratio of the two scenararios will be 3:1,
110 i.e. 75% chance for the former and 25% chance for the latter
111 splitting scheme. Only the directions in which the 'slab'
112 object is periodic are eligible for splitting.
114 To e.g. always apply splitting factors of 2 and 3 along two
115 randomly chosen axes:
117 >>> splits = {(2, 3): 1}
119 By default, no splitting is applied (splits = None = {(1,): 1}).
121 cellbounds: ase.ga.utilities.CellBounds instance
122 Describing limits on the cell shape, see
123 :class:`~ase.ga.utilities.CellBounds`.
124 Note that it only make sense to impose conditions
125 regarding cell vectors which have been marked as
126 variable (see number_of_variable_cell_vectors).
128 test_dist_to_slab: bool (default True)
129 Whether to make sure that the distances between
130 the atoms and the slab satisfy the blmin.
132 test_too_far: bool (default True)
133 Whether to also make sure that there are no isolated
134 atoms or molecules with nearest-neighbour bond lengths
135 larger than 2x the value in the blmin dict.
137 rng: Random number generator
138 By default numpy.random.
139 """
141 def __init__(self, slab, blocks, blmin, number_of_variable_cell_vectors=0,
142 box_to_place_in=None, box_volume=None, splits=None,
143 cellbounds=None, test_dist_to_slab=True, test_too_far=True,
144 rng=np.random):
146 self.slab = slab
148 self.blocks = []
149 for item in blocks:
150 if isinstance(item, (tuple, list)):
151 assert len(item) == 2, 'Item length %d != 2' % len(item)
152 block, count = item
153 else:
154 block, count = item, 1
156 # Convert block into Atoms object
157 if isinstance(block, Atoms):
158 pass
159 elif block in atomic_numbers:
160 block = Atoms(block)
161 elif isinstance(block, str):
162 block = molecule(block)
163 elif block in atomic_numbers.values():
164 block = Atoms(numbers=[block])
165 else:
166 raise ValueError('Cannot parse this block:', block)
168 # Add to self.blocks, taking into account that
169 # we want to group the same blocks together.
170 # This is important for the cell splitting.
171 for i, (b, c) in enumerate(self.blocks):
172 if block == b:
173 self.blocks[i][1] += count
174 break
175 else:
176 self.blocks.append([block, count])
178 if isinstance(blmin, dict):
179 self.blmin = blmin
180 else:
181 numbers = np.unique([b.get_atomic_numbers() for b in self.blocks])
182 self.blmin = closest_distances_generator(
183 numbers,
184 ratio_of_covalent_radii=blmin)
186 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors
187 assert self.number_of_variable_cell_vectors in range(4)
188 if len(self.slab) > 0:
189 msg = 'Including atoms in the slab only makes sense'
190 msg += ' if there are no variable unit cell vectors'
191 assert self.number_of_variable_cell_vectors == 0, msg
192 for i in range(self.number_of_variable_cell_vectors):
193 msg = f'Unit cell {("abc"[i])}-vector is marked as variable '
194 msg += 'and slab must then also be periodic in this direction'
195 assert self.slab.pbc[i], msg
197 if box_to_place_in is None:
198 p0 = np.array([0., 0., 0.])
199 cell = self.slab.get_cell()
200 self.box_to_place_in = [p0, [cell[0, :], cell[1, :], cell[2, :]]]
201 else:
202 self.box_to_place_in = box_to_place_in
204 if box_volume is None:
205 assert self.number_of_variable_cell_vectors == 0
206 box_volume = abs(np.linalg.det(self.box_to_place_in[1]))
207 else:
208 assert self.number_of_variable_cell_vectors > 0
209 self.box_volume = box_volume
210 assert self.box_volume > 0
212 if splits is None:
213 splits = {(1,): 1}
214 tot = sum([v for v in splits.values()]) # normalization
215 self.splits = {k: v * 1. / tot for k, v in splits.items()}
217 self.cellbounds = cellbounds
218 self.test_too_far = test_too_far
219 self.test_dist_to_slab = test_dist_to_slab
220 self.rng = rng
222 def get_new_candidate(self, maxiter=None):
223 """Returns a new candidate.
225 maxiter: upper bound on the total number of times
226 the random position generator is called
227 when generating the new candidate.
229 By default (maxiter=None) no such bound
230 is imposed. If the generator takes too
231 long time to create a new candidate, it
232 may be suitable to specify a finite value.
233 When the bound is exceeded, None is returned.
234 """
235 pbc = self.slab.get_pbc()
237 # Choose cell splitting
238 r = self.rng.random()
239 cumprob = 0
240 for split, prob in self.splits.items():
241 cumprob += prob
242 if cumprob > r:
243 break
245 # Choose direction(s) along which to split
246 # and by how much
247 directions = [i for i in range(3) if pbc[i]]
248 repeat = [1, 1, 1]
249 if len(directions) > 0:
250 for number in split:
251 d = self.rng.choice(directions)
252 repeat[d] = number
253 repeat = tuple(repeat)
255 # Generate the 'full' unit cell
256 # for the eventual candidates
257 cell = self.generate_unit_cell(repeat)
258 if self.number_of_variable_cell_vectors == 0:
259 assert np.allclose(cell, self.slab.get_cell())
261 # Make the smaller 'box' in which we are
262 # allowed to place the atoms and which will
263 # then be repeated to fill the 'full' unit cell
264 box = np.copy(cell)
265 for i in range(self.number_of_variable_cell_vectors, 3):
266 box[i] = np.array(self.box_to_place_in[1][i])
267 box /= np.array([repeat]).T
269 # Here we gather the (reduced) number of blocks
270 # to put in the smaller box, and the 'surplus'
271 # occurring when the block count is not divisible
272 # by the number of repetitions.
273 # E.g. if we have a ('Ti', 4) block and do a
274 # [2, 3, 1] repetition, we employ a ('Ti', 1)
275 # block in the smaller box and delete 2 out 6
276 # Ti atoms afterwards
277 nrep = int(np.prod(repeat))
278 blocks, ids, surplus = [], [], []
279 for i, (block, count) in enumerate(self.blocks):
280 count_part = int(np.ceil(count * 1. / nrep))
281 blocks.extend([block] * count_part)
282 surplus.append(nrep * count_part - count)
283 ids.extend([i] * count_part)
285 N_blocks = len(blocks)
287 # Shuffle the ordering so different blocks
288 # are added in random order
289 order = np.arange(N_blocks)
290 self.rng.shuffle(order)
291 blocks = [blocks[i] for i in order]
292 ids = np.array(ids)[order]
294 # Add blocks one by one until we have found
295 # a valid candidate
296 blmin = self.blmin
297 blmin_too_far = {key: 2 * val for key, val in blmin.items()}
299 niter = 0
300 while maxiter is None or niter < maxiter:
301 cand = Atoms('', cell=box, pbc=pbc)
303 for i in range(N_blocks):
304 atoms = blocks[i].copy()
305 atoms.set_tags(i)
306 atoms.set_pbc(pbc)
307 atoms.set_cell(box, scale_atoms=False)
309 while maxiter is None or niter < maxiter:
310 niter += 1
311 cop = atoms.get_positions().mean(axis=0)
312 pos = np.dot(self.rng.random((1, 3)), box)
313 atoms.translate(pos - cop)
315 if len(atoms) > 1:
316 # Apply a random rotation to multi-atom blocks
317 phi, theta, psi = 360 * self.rng.random(3)
318 atoms.euler_rotate(phi=phi, theta=0.5 * theta, psi=psi,
319 center=pos)
321 if not atoms_too_close_two_sets(cand, atoms, blmin):
322 cand += atoms
323 break
324 else:
325 # Reached maximum iteration number
326 # Break out of the for loop above
327 cand = None
328 break
330 if cand is None:
331 # Exit the main while loop
332 break
334 # Rebuild the candidate after repeating,
335 # randomly deleting surplus blocks and
336 # sorting back to the original order
337 cand_full = cand.repeat(repeat)
339 tags_full = cand_full.get_tags()
340 for i in range(nrep):
341 tags_full[len(cand) * i:len(cand) * (i + 1)] += i * N_blocks
342 cand_full.set_tags(tags_full)
344 cand = Atoms('', cell=cell, pbc=pbc)
345 ids_full = np.tile(ids, nrep)
347 tag_counter = 0
348 if len(self.slab) > 0:
349 tag_counter = int(max(self.slab.get_tags())) + 1
351 for i, (block, count) in enumerate(self.blocks):
352 tags = np.where(ids_full == i)[0]
353 bad = self.rng.choice(tags, size=surplus[i], replace=False)
354 for tag in tags:
355 if tag not in bad:
356 select = [a.index for a in cand_full if a.tag == tag]
357 atoms = cand_full[select] # is indeed a copy!
358 atoms.set_tags(tag_counter)
359 assert len(atoms) == len(block)
360 cand += atoms
361 tag_counter += 1
363 for i in range(self.number_of_variable_cell_vectors, 3):
364 cand.positions[:, i] += self.box_to_place_in[0][i]
366 # By construction, the minimal interatomic distances
367 # within the structure should already be respected
368 assert not atoms_too_close(cand, blmin, use_tags=True), \
369 'This is not supposed to happen; please report this bug'
371 if self.test_dist_to_slab and len(self.slab) > 0:
372 if atoms_too_close_two_sets(self.slab, cand, blmin):
373 continue
375 if self.test_too_far:
376 tags = cand.get_tags()
378 for tag in np.unique(tags):
379 too_far = True
380 indices_i = np.where(tags == tag)[0]
381 indices_j = np.where(tags != tag)[0]
382 too_far = not atoms_too_close_two_sets(cand[indices_i],
383 cand[indices_j],
384 blmin_too_far)
386 if too_far and len(self.slab) > 0:
387 # the block is too far from the rest
388 # but might still be sufficiently
389 # close to the slab
390 too_far = not atoms_too_close_two_sets(cand[indices_i],
391 self.slab,
392 blmin_too_far)
393 if too_far:
394 break
395 else:
396 too_far = False
398 if too_far:
399 continue
401 # Passed all the tests
402 cand = self.slab + cand
403 cand.set_cell(cell, scale_atoms=False)
404 break
405 else:
406 # Reached max iteration count in the while loop
407 return None
409 return cand
411 def generate_unit_cell(self, repeat):
412 """Generates a random unit cell.
414 For this, we use the vectors in self.slab.cell
415 in the fixed directions and randomly generate
416 the variable ones. For such a cell to be valid,
417 it has to satisfy the self.cellbounds constraints.
419 The cell will also be such that the volume of the
420 box in which the atoms can be placed (box limits
421 described by self.box_to_place_in) is equal to
422 self.box_volume.
424 Parameters:
426 repeat: tuple of 3 integers
427 Indicates by how much each cell vector
428 will later be reduced by cell splitting.
430 This is used to ensure that the original
431 cell is large enough so that the cell lengths
432 of the smaller cell exceed the largest
433 (X,X)-minimal-interatomic-distance in self.blmin.
434 """
435 # Find the minimal cell length 'Lmin'
436 # that we need in order to ensure that
437 # an added atom or molecule will never
438 # be 'too close to itself'
439 Lmin = 0.
440 for atoms, count in self.blocks:
441 dist = atoms.get_all_distances(mic=False, vector=False)
442 num = atoms.get_atomic_numbers()
444 for i in range(len(atoms)):
445 dist[i, i] += self.blmin[(num[i], num[i])]
446 for j in range(i):
447 bl = self.blmin[(num[i], num[j])]
448 dist[i, j] += bl
449 dist[j, i] += bl
451 L = np.max(dist)
452 if L > Lmin:
453 Lmin = L
455 # Generate a suitable unit cell
456 valid = False
457 while not valid:
458 cell = np.zeros((3, 3))
460 for i in range(self.number_of_variable_cell_vectors):
461 # on-diagonal values
462 cell[i, i] = self.rng.random() * np.cbrt(self.box_volume)
463 cell[i, i] *= repeat[i]
464 for j in range(i):
465 # off-diagonal values
466 cell[i, j] = (self.rng.random() - 0.5) * cell[i - 1, i - 1]
468 # volume scaling
469 for i in range(self.number_of_variable_cell_vectors, 3):
470 cell[i] = self.box_to_place_in[1][i]
472 if self.number_of_variable_cell_vectors > 0:
473 volume = abs(np.linalg.det(cell))
474 scaling = self.box_volume / volume
475 scaling **= 1. / self.number_of_variable_cell_vectors
476 cell[:self.number_of_variable_cell_vectors] *= scaling
478 for i in range(self.number_of_variable_cell_vectors, 3):
479 cell[i] = self.slab.get_cell()[i]
481 # bounds checking
482 valid = True
484 if self.cellbounds is not None:
485 if not self.cellbounds.is_within_bounds(cell):
486 valid = False
488 if valid:
489 for i in range(3):
490 if np.linalg.norm(cell[i]) < repeat[i] * Lmin:
491 assert self.number_of_variable_cell_vectors > 0
492 valid = False
494 return cell