Coverage for /builds/kinetik161/ase/ase/ga/startgenerator.py: 95.17%

1"""Tools for generating new random starting candidates."""

2import numpy as np

4from ase import Atoms

5from ase.build import molecule

6from ase.data import atomic_numbers

7from ase.ga.utilities import (atoms_too_close, atoms_too_close_two_sets,

8 closest_distances_generator)

11class StartGenerator:

12 """Class for generating random starting candidates.

14 Its basic task consists of randomly placing atoms or

15 molecules within a predescribed box, while respecting

16 certain minimal interatomic distances.

18 Depending on the problem at hand, certain box vectors

19 may not be known or chosen beforehand, and hence also

20 need to be generated at random. Common cases include

21 bulk crystals, films and chains, with respectively

22 3, 2 and 1 unknown cell vectors.

24 Parameters:

26 slab: Atoms object

27 Specifies the cell vectors and periodic boundary conditions

28 to be applied to the randomly generated structures.

29 Any included atoms (e.g. representing an underlying slab)

30 are copied to these new structures.

31 Variable cell vectors (see number_of_variable_cell_vectors)

32 will be ignored because these will be generated at random.

34 blocks: list

35 List of building units for the structure. Each item can be:

37 * an integer: representing a single atom by its atomic number,

38 * a string: for a single atom (a chemical symbol) or a

39 molecule (name recognized by ase.build.molecule),

40 * an Atoms object,

41 * an (A, B) tuple or list where A is any of the above

42 and B is the number of A units to include.

44 A few examples:

46 >>> blocks = ['Ti'] * 4 + ['O'] * 8

47 >>> blocks = [('Ti', 4), ('O', 8)]

48 >>> blocks = [('CO2', 3)] # 3 CO2 molecules

49 >>> co = Atoms('CO', positions=[[0, 0, 0], [1.4, 0, 0]])

50 >>> blocks = [(co, 3)]

52 Each individual block (single atom or molecule) in the

53 randomly generated candidates is given a unique integer

54 tag. These can be used to preserve the molecular identity

55 of these subunits.

57 blmin: dict or float

58 Dictionary with minimal interatomic distances.

59 If a number is provided instead, the dictionary will

60 be generated with this ratio of covalent bond radii.

61 Note: when preserving molecular identity (see use_tags),

62 the blmin dict will (naturally) only be applied

63 to intermolecular distances (not the intramolecular

64 ones).

66 number_of_variable_cell_vectors: int (default 0)

67 The number of variable cell vectors (0, 1, 2 or 3).

68 To keep things simple, it is the 'first' vectors which

69 will be treated as variable, i.e. the 'a' vector in the

70 univariate case, the 'a' and 'b' vectors in the bivariate

71 case, etc.

73 box_to_place_in: [list, list of lists] (default None)

74 The box in which the atoms can be placed.

75 The default (None) means the box is equal to the

76 entire unit cell of the 'slab' object.

77 In many cases, however, smaller boxes are desired

78 (e.g. for adsorbates on a slab surface or for isolated

79 clusters). Then, box_to_place_in can be set as

80 [p0, [v1, v2, v3]] with positions being generated as

81 p0 + r1 * v1 + r2 * v2 + r3 + v3.

82 In case of one or more variable cell vectors,

83 the corresponding items in p0/v1/v2/v3 will be ignored.

85 box_volume: int or float or None (default)

86 Initial guess for the box volume in cubic Angstrom

87 (used in generating the variable cell vectors).

88 Typical values in the solid state are 8-12 A^3 per atom.

89 If there are no variable cell vectors, the default None

90 is required (box volume equal to the box_to_place_in

91 volume).

93 splits: dict or None

94 Splitting scheme for increasing the translational symmetry

95 in the random candidates, based on:

97 * `Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-32`__

99 __ http://dx.doi.org/10.1016/j.cpc.2010.06.007

100

101 This should be a dict specifying the relative probabilities

102 for each split, written as tuples. For example,

103

104 >>> splits = {(2,): 3, (1,): 1}

105

106 This means that, for each structure, either a splitting

107 factor of 2 is applied to one randomly chosen axis,

108 or a splitting factor of 1 is applied (i.e., no splitting).

109 The probability ratio of the two scenararios will be 3:1,

110 i.e. 75% chance for the former and 25% chance for the latter

111 splitting scheme. Only the directions in which the 'slab'

112 object is periodic are eligible for splitting.

113

114 To e.g. always apply splitting factors of 2 and 3 along two

115 randomly chosen axes:

116

117 >>> splits = {(2, 3): 1}

118

119 By default, no splitting is applied (splits = None = {(1,): 1}).

120

121 cellbounds: ase.ga.utilities.CellBounds instance

122 Describing limits on the cell shape, see

123 :class:`~ase.ga.utilities.CellBounds`.

124 Note that it only make sense to impose conditions

125 regarding cell vectors which have been marked as

126 variable (see number_of_variable_cell_vectors).

127

128 test_dist_to_slab: bool (default True)

129 Whether to make sure that the distances between

130 the atoms and the slab satisfy the blmin.

131

132 test_too_far: bool (default True)

133 Whether to also make sure that there are no isolated

134 atoms or molecules with nearest-neighbour bond lengths

135 larger than 2x the value in the blmin dict.

136

137 rng: Random number generator

138 By default numpy.random.

139 """

140

141 def __init__(self, slab, blocks, blmin, number_of_variable_cell_vectors=0,

142 box_to_place_in=None, box_volume=None, splits=None,

143 cellbounds=None, test_dist_to_slab=True, test_too_far=True,

144 rng=np.random):

145

146 self.slab = slab

147

148 self.blocks = []

149 for item in blocks:

150 if isinstance(item, (tuple, list)):

151 assert len(item) == 2, 'Item length %d != 2' % len(item)

152 block, count = item

153 else:

154 block, count = item, 1

155

156 # Convert block into Atoms object

157 if isinstance(block, Atoms):

158 pass

159 elif block in atomic_numbers:

160 block = Atoms(block)

161 elif isinstance(block, str):

162 block = molecule(block)

163 elif block in atomic_numbers.values():

164 block = Atoms(numbers=[block])

165 else:

166 raise ValueError('Cannot parse this block:', block)

167

168 # Add to self.blocks, taking into account that

169 # we want to group the same blocks together.

170 # This is important for the cell splitting.

171 for i, (b, c) in enumerate(self.blocks):

172 if block == b:

173 self.blocks[i][1] += count

174 break

175 else:

176 self.blocks.append([block, count])

177

178 if isinstance(blmin, dict):

179 self.blmin = blmin

180 else:

181 numbers = np.unique([b.get_atomic_numbers() for b in self.blocks])

182 self.blmin = closest_distances_generator(

183 numbers,

184 ratio_of_covalent_radii=blmin)

185

186 self.number_of_variable_cell_vectors = number_of_variable_cell_vectors

187 assert self.number_of_variable_cell_vectors in range(4)

188 if len(self.slab) > 0:

189 msg = 'Including atoms in the slab only makes sense'

190 msg += ' if there are no variable unit cell vectors'

191 assert self.number_of_variable_cell_vectors == 0, msg

192 for i in range(self.number_of_variable_cell_vectors):

193 msg = f'Unit cell {("abc"[i])}-vector is marked as variable '

194 msg += 'and slab must then also be periodic in this direction'

195 assert self.slab.pbc[i], msg

196

197 if box_to_place_in is None:

198 p0 = np.array([0., 0., 0.])

199 cell = self.slab.get_cell()

200 self.box_to_place_in = [p0, [cell[0, :], cell[1, :], cell[2, :]]]

201 else:

202 self.box_to_place_in = box_to_place_in

203

204 if box_volume is None:

205 assert self.number_of_variable_cell_vectors == 0

206 box_volume = abs(np.linalg.det(self.box_to_place_in[1]))

207 else:

208 assert self.number_of_variable_cell_vectors > 0

209 self.box_volume = box_volume

210 assert self.box_volume > 0

211

212 if splits is None:

213 splits = {(1,): 1}

214 tot = sum([v for v in splits.values()]) # normalization

215 self.splits = {k: v * 1. / tot for k, v in splits.items()}

216

217 self.cellbounds = cellbounds

218 self.test_too_far = test_too_far

219 self.test_dist_to_slab = test_dist_to_slab

220 self.rng = rng

221

222 def get_new_candidate(self, maxiter=None):

223 """Returns a new candidate.

224

225 maxiter: upper bound on the total number of times

226 the random position generator is called

227 when generating the new candidate.

228

229 By default (maxiter=None) no such bound

230 is imposed. If the generator takes too

231 long time to create a new candidate, it

232 may be suitable to specify a finite value.

233 When the bound is exceeded, None is returned.

234 """

235 pbc = self.slab.get_pbc()

236

237 # Choose cell splitting

238 r = self.rng.random()

239 cumprob = 0

240 for split, prob in self.splits.items():

241 cumprob += prob

242 if cumprob > r:

243 break

244

245 # Choose direction(s) along which to split

246 # and by how much

247 directions = [i for i in range(3) if pbc[i]]

248 repeat = [1, 1, 1]

249 if len(directions) > 0:

250 for number in split:

251 d = self.rng.choice(directions)

252 repeat[d] = number

253 repeat = tuple(repeat)

254

255 # Generate the 'full' unit cell

256 # for the eventual candidates

257 cell = self.generate_unit_cell(repeat)

258 if self.number_of_variable_cell_vectors == 0:

259 assert np.allclose(cell, self.slab.get_cell())

260

261 # Make the smaller 'box' in which we are

262 # allowed to place the atoms and which will

263 # then be repeated to fill the 'full' unit cell

264 box = np.copy(cell)

265 for i in range(self.number_of_variable_cell_vectors, 3):

266 box[i] = np.array(self.box_to_place_in[1][i])

267 box /= np.array([repeat]).T

268

269 # Here we gather the (reduced) number of blocks

270 # to put in the smaller box, and the 'surplus'

271 # occurring when the block count is not divisible

272 # by the number of repetitions.

273 # E.g. if we have a ('Ti', 4) block and do a

274 # [2, 3, 1] repetition, we employ a ('Ti', 1)

275 # block in the smaller box and delete 2 out 6

276 # Ti atoms afterwards

277 nrep = int(np.prod(repeat))

278 blocks, ids, surplus = [], [], []

279 for i, (block, count) in enumerate(self.blocks):

280 count_part = int(np.ceil(count * 1. / nrep))

281 blocks.extend([block] * count_part)

282 surplus.append(nrep * count_part - count)

283 ids.extend([i] * count_part)

284

285 N_blocks = len(blocks)

286

287 # Shuffle the ordering so different blocks

288 # are added in random order

289 order = np.arange(N_blocks)

290 self.rng.shuffle(order)

291 blocks = [blocks[i] for i in order]

292 ids = np.array(ids)[order]

293

294 # Add blocks one by one until we have found

295 # a valid candidate

296 blmin = self.blmin

297 blmin_too_far = {key: 2 * val for key, val in blmin.items()}

298

299 niter = 0

300 while maxiter is None or niter < maxiter:

301 cand = Atoms('', cell=box, pbc=pbc)

302

303 for i in range(N_blocks):

304 atoms = blocks[i].copy()

305 atoms.set_tags(i)

306 atoms.set_pbc(pbc)

307 atoms.set_cell(box, scale_atoms=False)

308

309 while maxiter is None or niter < maxiter:

310 niter += 1

311 cop = atoms.get_positions().mean(axis=0)

312 pos = np.dot(self.rng.random((1, 3)), box)

313 atoms.translate(pos - cop)

314

315 if len(atoms) > 1:

316 # Apply a random rotation to multi-atom blocks

317 phi, theta, psi = 360 * self.rng.random(3)

318 atoms.euler_rotate(phi=phi, theta=0.5 * theta, psi=psi,

319 center=pos)

320

321 if not atoms_too_close_two_sets(cand, atoms, blmin):

322 cand += atoms

323 break

324 else:

325 # Reached maximum iteration number

326 # Break out of the for loop above

327 cand = None

328 break

329

330 if cand is None:

331 # Exit the main while loop

332 break

333

334 # Rebuild the candidate after repeating,

335 # randomly deleting surplus blocks and

336 # sorting back to the original order

337 cand_full = cand.repeat(repeat)

338

339 tags_full = cand_full.get_tags()

340 for i in range(nrep):

341 tags_full[len(cand) * i:len(cand) * (i + 1)] += i * N_blocks

342 cand_full.set_tags(tags_full)

343

344 cand = Atoms('', cell=cell, pbc=pbc)

345 ids_full = np.tile(ids, nrep)

346

347 tag_counter = 0

348 if len(self.slab) > 0:

349 tag_counter = int(max(self.slab.get_tags())) + 1

350

351 for i, (block, count) in enumerate(self.blocks):

352 tags = np.where(ids_full == i)[0]

353 bad = self.rng.choice(tags, size=surplus[i], replace=False)

354 for tag in tags:

355 if tag not in bad:

356 select = [a.index for a in cand_full if a.tag == tag]

357 atoms = cand_full[select] # is indeed a copy!

358 atoms.set_tags(tag_counter)

359 assert len(atoms) == len(block)

360 cand += atoms

361 tag_counter += 1

362

363 for i in range(self.number_of_variable_cell_vectors, 3):

364 cand.positions[:, i] += self.box_to_place_in[0][i]

365

366 # By construction, the minimal interatomic distances

367 # within the structure should already be respected

368 assert not atoms_too_close(cand, blmin, use_tags=True), \

369 'This is not supposed to happen; please report this bug'

370

371 if self.test_dist_to_slab and len(self.slab) > 0:

372 if atoms_too_close_two_sets(self.slab, cand, blmin):

373 continue

374

375 if self.test_too_far:

376 tags = cand.get_tags()

377

378 for tag in np.unique(tags):

379 too_far = True

380 indices_i = np.where(tags == tag)[0]

381 indices_j = np.where(tags != tag)[0]

382 too_far = not atoms_too_close_two_sets(cand[indices_i],

383 cand[indices_j],

384 blmin_too_far)

385

386 if too_far and len(self.slab) > 0:

387 # the block is too far from the rest

388 # but might still be sufficiently

389 # close to the slab

390 too_far = not atoms_too_close_two_sets(cand[indices_i],

391 self.slab,

392 blmin_too_far)

393 if too_far:

394 break

395 else:

396 too_far = False

397

398 if too_far:

399 continue

400

401 # Passed all the tests

402 cand = self.slab + cand

403 cand.set_cell(cell, scale_atoms=False)

404 break

405 else:

406 # Reached max iteration count in the while loop

407 return None

408

409 return cand

410

411 def generate_unit_cell(self, repeat):

412 """Generates a random unit cell.

413

414 For this, we use the vectors in self.slab.cell

415 in the fixed directions and randomly generate

416 the variable ones. For such a cell to be valid,

417 it has to satisfy the self.cellbounds constraints.

418

419 The cell will also be such that the volume of the

420 box in which the atoms can be placed (box limits

421 described by self.box_to_place_in) is equal to

422 self.box_volume.

423

424 Parameters:

425

426 repeat: tuple of 3 integers

427 Indicates by how much each cell vector

428 will later be reduced by cell splitting.

429

430 This is used to ensure that the original

431 cell is large enough so that the cell lengths

432 of the smaller cell exceed the largest

433 (X,X)-minimal-interatomic-distance in self.blmin.

434 """

435 # Find the minimal cell length 'Lmin'

436 # that we need in order to ensure that

437 # an added atom or molecule will never

438 # be 'too close to itself'

439 Lmin = 0.

440 for atoms, count in self.blocks:

441 dist = atoms.get_all_distances(mic=False, vector=False)

442 num = atoms.get_atomic_numbers()

443

444 for i in range(len(atoms)):

445 dist[i, i] += self.blmin[(num[i], num[i])]

446 for j in range(i):

447 bl = self.blmin[(num[i], num[j])]

448 dist[i, j] += bl

449 dist[j, i] += bl

450

451 L = np.max(dist)

452 if L > Lmin:

453 Lmin = L

454

455 # Generate a suitable unit cell

456 valid = False

457 while not valid:

458 cell = np.zeros((3, 3))

459

460 for i in range(self.number_of_variable_cell_vectors):

461 # on-diagonal values

462 cell[i, i] = self.rng.random() * np.cbrt(self.box_volume)

463 cell[i, i] *= repeat[i]

464 for j in range(i):

465 # off-diagonal values

466 cell[i, j] = (self.rng.random() - 0.5) * cell[i - 1, i - 1]

467

468 # volume scaling

469 for i in range(self.number_of_variable_cell_vectors, 3):

470 cell[i] = self.box_to_place_in[1][i]

471

472 if self.number_of_variable_cell_vectors > 0:

473 volume = abs(np.linalg.det(cell))

474 scaling = self.box_volume / volume

475 scaling **= 1. / self.number_of_variable_cell_vectors

476 cell[:self.number_of_variable_cell_vectors] *= scaling

477

478 for i in range(self.number_of_variable_cell_vectors, 3):

479 cell[i] = self.slab.get_cell()[i]

480

481 # bounds checking

482 valid = True

483

484 if self.cellbounds is not None:

485 if not self.cellbounds.is_within_bounds(cell):

486 valid = False

487

488 if valid:

489 for i in range(3):

490 if np.linalg.norm(cell[i]) < repeat[i] * Lmin:

491 assert self.number_of_variable_cell_vectors > 0

492 valid = False

493

494 return cell