Coverage for /builds/kinetik161/ase/ase/ga/ofp_comparator.py: 49.68%

310 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-12-10 11:04 +0000

1from itertools import combinations_with_replacement 

2from math import erf 

3 

4import matplotlib.pyplot as plt 

5import numpy as np 

6from scipy.spatial.distance import cdist 

7 

8from ase.neighborlist import NeighborList 

9from ase.utils import pbc2pbc 

10 

11 

12class OFPComparator: 

13 """Implementation of comparison using Oganov's fingerprint (OFP) 

14 functions, based on: 

15 

16 * :doi:`Oganov, Valle, J. Chem. Phys. 130, 104504 (2009) 

17 <10.1063/1.3079326>` 

18 

19 * :doi:`Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-1632 

20 <10.1016/j.cpc.2010.06.007>` 

21 

22 Parameters: 

23 

24 n_top: int or None 

25 The number of atoms to optimize (None = include all). 

26 

27 dE: float 

28 Energy difference above which two structures are 

29 automatically considered to be different. (Default 1 eV) 

30 

31 cos_dist_max: float 

32 Maximal cosine distance between two structures in 

33 order to be still considered the same structure. Default 5e-3 

34 

35 rcut: float 

36 Cutoff radius in Angstrom for the fingerprints. 

37 (Default 20 Angstrom) 

38 

39 binwidth: float 

40 Width in Angstrom of the bins over which the fingerprints 

41 are discretized. (Default 0.05 Angstrom) 

42 

43 pbc: list of three booleans or None 

44 Specifies whether to apply periodic boundary conditions 

45 along each of the three unit cell vectors when calculating 

46 the fingerprint. The default (None) is to apply PBCs in all 

47 3 directions. 

48 

49 Note: for isolated systems (pbc = [False, False, False]), 

50 the pair correlation function itself is always short-ranged 

51 (decays to zero beyond a certain radius), so unity is not 

52 subtracted for calculating the fingerprint. Also the 

53 volume normalization disappears. 

54 

55 maxdims: list of three floats or None 

56 If PBCs in only 1 or 2 dimensions are specified, the 

57 maximal thicknesses along the non-periodic directions can 

58 be specified here (the values given for the periodic 

59 directions will not be used). If set to None (the 

60 default), the length of the cell vector along the 

61 non-periodic direction is used. 

62 

63 Note: in this implementation, the cell vectors are 

64 assumed to be orthogonal. 

65 

66 sigma: float 

67 Standard deviation of the gaussian smearing to be applied 

68 in the calculation of the fingerprints (in 

69 Angstrom). Default 0.02 Angstrom. 

70 

71 nsigma: int 

72 Distance (as the number of standard deviations sigma) at 

73 which the gaussian smearing is cut off (i.e. no smearing 

74 beyond that distance). (Default 4) 

75 

76 recalculate: boolean 

77 If True, ignores the fingerprints stored in 

78 atoms.info and recalculates them. (Default False) 

79 

80 """ 

81 

82 def __init__(self, n_top=None, dE=1.0, cos_dist_max=5e-3, rcut=20., 

83 binwidth=0.05, sigma=0.02, nsigma=4, pbc=True, 

84 maxdims=None, recalculate=False): 

85 self.n_top = n_top or 0 

86 self.dE = dE 

87 self.cos_dist_max = cos_dist_max 

88 self.rcut = rcut 

89 self.binwidth = binwidth 

90 self.pbc = pbc2pbc(pbc) 

91 

92 if maxdims is None: 

93 self.maxdims = [None] * 3 

94 else: 

95 self.maxdims = maxdims 

96 

97 self.sigma = sigma 

98 self.nsigma = nsigma 

99 self.recalculate = recalculate 

100 self.dimensions = self.pbc.sum() 

101 

102 if self.dimensions == 1 or self.dimensions == 2: 

103 for direction in range(3): 

104 if not self.pbc[direction]: 

105 if self.maxdims[direction] is not None: 

106 if self.maxdims[direction] <= 0: 

107 e = '''If a max thickness is specificed in maxdims 

108 for a non-periodic direction, it has to be 

109 strictly positive.''' 

110 raise ValueError(e) 

111 

112 def looks_like(self, a1, a2): 

113 """ Return if structure a1 or a2 are similar or not. """ 

114 if len(a1) != len(a2): 

115 raise Exception('The two configurations are not the same size.') 

116 

117 # first we check the energy criteria 

118 if a1.calc is not None and a2.calc is not None: 

119 dE = abs(a1.get_potential_energy() - a2.get_potential_energy()) 

120 if dE >= self.dE: 

121 return False 

122 

123 # then we check the structure 

124 cos_dist = self._compare_structure(a1, a2) 

125 verdict = cos_dist < self.cos_dist_max 

126 return verdict 

127 

128 def _json_encode(self, fingerprints, typedic): 

129 """ json does not accept tuples nor integers as dict keys, 

130 so in order to write the fingerprints to atoms.info, we need 

131 to convert them to strings """ 

132 fingerprints_encoded = {} 

133 for key, val in fingerprints.items(): 

134 try: 

135 newkey = "_".join(map(str, list(key))) 

136 except TypeError: 

137 newkey = str(key) 

138 if isinstance(val, dict): 

139 fingerprints_encoded[newkey] = {} 

140 for key2, val2 in val.items(): 

141 fingerprints_encoded[newkey][str(key2)] = val2 

142 else: 

143 fingerprints_encoded[newkey] = val 

144 typedic_encoded = {} 

145 for key, val in typedic.items(): 

146 newkey = str(key) 

147 typedic_encoded[newkey] = val 

148 return [fingerprints_encoded, typedic_encoded] 

149 

150 def _json_decode(self, fingerprints, typedic): 

151 """ This is the reverse operation of _json_encode """ 

152 fingerprints_decoded = {} 

153 for key, val in fingerprints.items(): 

154 newkey = list(map(int, key.split("_"))) 

155 if len(newkey) > 1: 

156 newkey = tuple(newkey) 

157 else: 

158 newkey = newkey[0] 

159 

160 if isinstance(val, dict): 

161 fingerprints_decoded[newkey] = {} 

162 for key2, val2 in val.items(): 

163 fingerprints_decoded[newkey][int(key2)] = np.array(val2) 

164 else: 

165 fingerprints_decoded[newkey] = np.array(val) 

166 typedic_decoded = {} 

167 for key, val in typedic.items(): 

168 newkey = int(key) 

169 typedic_decoded[newkey] = val 

170 return [fingerprints_decoded, typedic_decoded] 

171 

172 def _compare_structure(self, a1, a2): 

173 """ Returns the cosine distance between the two structures, 

174 using their fingerprints. """ 

175 

176 if len(a1) != len(a2): 

177 raise Exception('The two configurations are not the same size.') 

178 

179 a1top = a1[-self.n_top:] 

180 a2top = a2[-self.n_top:] 

181 

182 if 'fingerprints' in a1.info and not self.recalculate: 

183 fp1, typedic1 = a1.info['fingerprints'] 

184 fp1, typedic1 = self._json_decode(fp1, typedic1) 

185 else: 

186 fp1, typedic1 = self._take_fingerprints(a1top) 

187 a1.info['fingerprints'] = self._json_encode(fp1, typedic1) 

188 

189 if 'fingerprints' in a2.info and not self.recalculate: 

190 fp2, typedic2 = a2.info['fingerprints'] 

191 fp2, typedic2 = self._json_decode(fp2, typedic2) 

192 else: 

193 fp2, typedic2 = self._take_fingerprints(a2top) 

194 a2.info['fingerprints'] = self._json_encode(fp2, typedic2) 

195 

196 if sorted(fp1) != sorted(fp2): 

197 raise AssertionError('The two structures have fingerprints ' 

198 'with different compounds.') 

199 for key in typedic1: 

200 if not np.array_equal(typedic1[key], typedic2[key]): 

201 raise AssertionError('The two structures have a different ' 

202 'stoichiometry or ordering!') 

203 

204 cos_dist = self._cosine_distance(fp1, fp2, typedic1) 

205 return cos_dist 

206 

207 def _get_volume(self, a): 

208 ''' Calculates the normalizing value, and other parameters 

209 (pmin,pmax,qmin,qmax) that are used for surface area calculation 

210 in the case of 1 or 2-D periodicity.''' 

211 

212 cell = a.get_cell() 

213 scalpos = a.get_scaled_positions() 

214 

215 # defaults: 

216 volume = 1. 

217 pmin, pmax, qmin, qmax = [0.] * 4 

218 

219 if self.dimensions == 1 or self.dimensions == 2: 

220 for direction in range(3): 

221 if not self.pbc[direction]: 

222 if self.maxdims[direction] is None: 

223 maxdim = np.linalg.norm(cell[direction, :]) 

224 self.maxdims[direction] = maxdim 

225 

226 pbc_dirs = [i for i in range(3) if self.pbc[i]] 

227 non_pbc_dirs = [i for i in range(3) if not self.pbc[i]] 

228 

229 if self.dimensions == 3: 

230 volume = abs(np.dot(np.cross(cell[0, :], cell[1, :]), cell[2, :])) 

231 

232 elif self.dimensions == 2: 

233 non_pbc_dir = non_pbc_dirs[0] 

234 

235 a = np.cross(cell[pbc_dirs[0], :], cell[pbc_dirs[1], :]) 

236 b = self.maxdims[non_pbc_dir] 

237 b /= np.linalg.norm(cell[non_pbc_dir, :]) 

238 

239 volume = np.abs(np.dot(a, b * cell[non_pbc_dir, :])) 

240 

241 maxpos = np.max(scalpos[:, non_pbc_dir]) 

242 minpos = np.min(scalpos[:, non_pbc_dir]) 

243 pwidth = maxpos - minpos 

244 pmargin = 0.5 * (b - pwidth) 

245 # note: here is a place where we assume that the 

246 # non-periodic direction is orthogonal to the periodic ones: 

247 pmin = np.min(scalpos[:, non_pbc_dir]) - pmargin 

248 pmin *= np.linalg.norm(cell[non_pbc_dir, :]) 

249 pmax = np.max(scalpos[:, non_pbc_dir]) + pmargin 

250 pmax *= np.linalg.norm(cell[non_pbc_dir, :]) 

251 

252 elif self.dimensions == 1: 

253 pbc_dir = pbc_dirs[0] 

254 

255 v0 = cell[non_pbc_dirs[0], :] 

256 b0 = self.maxdims[non_pbc_dirs[0]] 

257 b0 /= np.linalg.norm(cell[non_pbc_dirs[0], :]) 

258 v1 = cell[non_pbc_dirs[1], :] 

259 b1 = self.maxdims[non_pbc_dirs[1]] 

260 b1 /= np.linalg.norm(cell[non_pbc_dirs[1], :]) 

261 

262 volume = np.abs(np.dot(np.cross(b0 * v0, b1 * v1), 

263 cell[pbc_dir, :])) 

264 

265 # note: here is a place where we assume that the 

266 # non-periodic direction is orthogonal to the periodic ones: 

267 maxpos = np.max(scalpos[:, non_pbc_dirs[0]]) 

268 minpos = np.min(scalpos[:, non_pbc_dirs[0]]) 

269 pwidth = maxpos - minpos 

270 pmargin = 0.5 * (b0 - pwidth) 

271 

272 pmin = np.min(scalpos[:, non_pbc_dirs[0]]) - pmargin 

273 pmin *= np.linalg.norm(cell[non_pbc_dirs[0], :]) 

274 pmax = np.max(scalpos[:, non_pbc_dirs[0]]) + pmargin 

275 pmax *= np.linalg.norm(cell[non_pbc_dirs[0], :]) 

276 

277 maxpos = np.max(scalpos[:, non_pbc_dirs[1]]) 

278 minpos = np.min(scalpos[:, non_pbc_dirs[1]]) 

279 qwidth = maxpos - minpos 

280 qmargin = 0.5 * (b1 - qwidth) 

281 

282 qmin = np.min(scalpos[:, non_pbc_dirs[1]]) - qmargin 

283 qmin *= np.linalg.norm(cell[non_pbc_dirs[1], :]) 

284 qmax = np.max(scalpos[:, non_pbc_dirs[1]]) + qmargin 

285 qmax *= np.linalg.norm(cell[non_pbc_dirs[1], :]) 

286 

287 elif self.dimensions == 0: 

288 volume = 1. 

289 

290 return [volume, pmin, pmax, qmin, qmax] 

291 

292 def _take_fingerprints(self, atoms, individual=False): 

293 """ Returns a [fingerprints,typedic] list, where fingerprints 

294 is a dictionary with the fingerprints, and typedic is a 

295 dictionary with the list of atom indices for each element 

296 (or "type") in the atoms object. 

297 The keys in the fingerprints dictionary are the (A,B) tuples, 

298 which are the different element-element combinations in the 

299 atoms object (A and B are the atomic numbers). 

300 When A != B, the (A,B) tuple is sorted (A < B). 

301 

302 If individual=True, a dict is returned, where each atom index 

303 has an {atomic_number:fingerprint} dict as value. 

304 If individual=False, the fingerprints from atoms of the same 

305 atomic number are added together.""" 

306 

307 pos = atoms.get_positions() 

308 num = atoms.get_atomic_numbers() 

309 cell = atoms.get_cell() 

310 

311 unique_types = np.unique(num) 

312 posdic = {} 

313 typedic = {} 

314 for t in unique_types: 

315 tlist = [i for i, atom in enumerate(atoms) if atom.number == t] 

316 typedic[t] = tlist 

317 posdic[t] = pos[tlist] 

318 

319 # determining the volume normalization and other parameters 

320 volume, pmin, pmax, qmin, qmax = self._get_volume(atoms) 

321 

322 # functions for calculating the surface area 

323 non_pbc_dirs = [i for i in range(3) if not self.pbc[i]] 

324 

325 def surface_area_0d(r): 

326 return 4 * np.pi * (r**2) 

327 

328 def surface_area_1d(r, pos): 

329 q0 = pos[non_pbc_dirs[1]] 

330 phi1 = np.lib.scimath.arccos((qmax - q0) / r).real 

331 phi2 = np.pi - np.lib.scimath.arccos((qmin - q0) / r).real 

332 factor = 1 - (phi1 + phi2) / np.pi 

333 return surface_area_2d(r, pos) * factor 

334 

335 def surface_area_2d(r, pos): 

336 p0 = pos[non_pbc_dirs[0]] 

337 area = np.minimum(pmax - p0, r) + np.minimum(p0 - pmin, r) 

338 area *= 2 * np.pi * r 

339 return area 

340 

341 def surface_area_3d(r): 

342 return 4 * np.pi * (r**2) 

343 

344 # build neighborlist 

345 # this is computationally the most intensive part 

346 a = atoms.copy() 

347 a.set_pbc(self.pbc) 

348 nl = NeighborList([self.rcut / 2.] * len(a), skin=0., 

349 self_interaction=False, bothways=True) 

350 nl.update(a) 

351 

352 # parameters for the binning: 

353 m = int(np.ceil(self.nsigma * self.sigma / self.binwidth)) 

354 x = 0.25 * np.sqrt(2) * self.binwidth * (2 * m + 1) * 1. / self.sigma 

355 smearing_norm = erf(x) 

356 nbins = int(np.ceil(self.rcut * 1. / self.binwidth)) 

357 bindist = self.binwidth * np.arange(1, nbins + 1) 

358 

359 def take_individual_rdf(index, unique_type): 

360 # Computes the radial distribution function of atoms 

361 # of type unique_type around the atom with index "index". 

362 rdf = np.zeros(nbins) 

363 

364 if self.dimensions == 3: 

365 weights = 1. / surface_area_3d(bindist) 

366 elif self.dimensions == 2: 

367 weights = 1. / surface_area_2d(bindist, pos[index]) 

368 elif self.dimensions == 1: 

369 weights = 1. / surface_area_1d(bindist, pos[index]) 

370 elif self.dimensions == 0: 

371 weights = 1. / surface_area_0d(bindist) 

372 weights /= self.binwidth 

373 

374 indices, offsets = nl.get_neighbors(index) 

375 valid = np.where(num[indices] == unique_type) 

376 p = pos[indices[valid]] + np.dot(offsets[valid], cell) 

377 r = cdist(p, [pos[index]]) 

378 bins = np.floor(r / self.binwidth) 

379 

380 for i in range(-m, m + 1): 

381 newbins = bins + i 

382 valid = np.where((newbins >= 0) & (newbins < nbins)) 

383 valid_bins = newbins[valid].astype(int) 

384 values = weights[valid_bins] 

385 

386 c = 0.25 * np.sqrt(2) * self.binwidth * 1. / self.sigma 

387 values *= 0.5 * erf(c * (2 * i + 1)) - \ 

388 0.5 * erf(c * (2 * i - 1)) 

389 values /= smearing_norm 

390 

391 for j, valid_bin in enumerate(valid_bins): 

392 rdf[valid_bin] += values[j] 

393 

394 rdf /= len(typedic[unique_type]) * 1. / volume 

395 return rdf 

396 

397 fingerprints = {} 

398 if individual: 

399 for i in range(len(atoms)): 

400 fingerprints[i] = {} 

401 for unique_type in unique_types: 

402 fingerprint = take_individual_rdf(i, unique_type) 

403 if self.dimensions > 0: 

404 fingerprint -= 1 

405 fingerprints[i][unique_type] = fingerprint 

406 else: 

407 for t1, t2 in combinations_with_replacement(unique_types, r=2): 

408 key = (t1, t2) 

409 fingerprint = np.zeros(nbins) 

410 for i in typedic[t1]: 

411 fingerprint += take_individual_rdf(i, t2) 

412 fingerprint /= len(typedic[t1]) 

413 if self.dimensions > 0: 

414 fingerprint -= 1 

415 fingerprints[key] = fingerprint 

416 

417 return [fingerprints, typedic] 

418 

419 def _calculate_local_orders(self, individual_fingerprints, typedic, 

420 volume): 

421 """ Returns a list with the local order for every atom, 

422 using the definition of local order from 

423 Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-1632 

424 :doi:`10.1016/j.cpc.2010.06.007`""" 

425 

426 # total number of atoms: 

427 n_tot = sum([len(typedic[key]) for key in typedic]) 

428 inv_n_tot = 1. / n_tot 

429 

430 local_orders = [] 

431 for fingerprints in individual_fingerprints.values(): 

432 local_order = 0 

433 for unique_type, fingerprint in fingerprints.items(): 

434 term = np.linalg.norm(fingerprint)**2 

435 term *= self.binwidth 

436 term *= (volume * inv_n_tot)**(-1 / 3) 

437 term *= len(typedic[unique_type]) * inv_n_tot 

438 local_order += term 

439 local_orders.append(np.sqrt(local_order)) 

440 

441 return local_orders 

442 

443 def get_local_orders(self, a): 

444 """ Returns the local orders of all the atoms.""" 

445 

446 a_top = a[-self.n_top:] 

447 key = 'individual_fingerprints' 

448 

449 if key in a.info and not self.recalculate: 

450 fp, typedic = self._json_decode(*a.info[key]) 

451 else: 

452 fp, typedic = self._take_fingerprints(a_top, individual=True) 

453 a.info[key] = self._json_encode(fp, typedic) 

454 

455 volume, pmin, pmax, qmin, qmax = self._get_volume(a_top) 

456 return self._calculate_local_orders(fp, typedic, volume) 

457 

458 def _cosine_distance(self, fp1, fp2, typedic): 

459 """ Returns the cosine distance from two fingerprints. 

460 It also needs information about the number of atoms from 

461 each element, which is included in "typedic".""" 

462 

463 keys = sorted(fp1) 

464 

465 # calculating the weights: 

466 w = {} 

467 wtot = 0 

468 for key in keys: 

469 weight = len(typedic[key[0]]) * len(typedic[key[1]]) 

470 wtot += weight 

471 w[key] = weight 

472 for key in keys: 

473 w[key] *= 1. / wtot 

474 

475 # calculating the fingerprint norms: 

476 norm1 = 0 

477 norm2 = 0 

478 for key in keys: 

479 norm1 += (np.linalg.norm(fp1[key])**2) * w[key] 

480 norm2 += (np.linalg.norm(fp2[key])**2) * w[key] 

481 norm1 = np.sqrt(norm1) 

482 norm2 = np.sqrt(norm2) 

483 

484 # calculating the distance: 

485 distance = 0 

486 for key in keys: 

487 distance += np.sum(fp1[key] * fp2[key]) * w[key] / (norm1 * norm2) 

488 

489 distance = 0.5 * (1 - distance) 

490 return distance 

491 

492 def plot_fingerprints(self, a, prefix=''): 

493 """ Function for quickly plotting all the fingerprints. 

494 Prefix = a prefix you want to give to the resulting PNG file.""" 

495 

496 if 'fingerprints' in a.info and not self.recalculate: 

497 fp, typedic = a.info['fingerprints'] 

498 fp, typedic = self._json_decode(fp, typedic) 

499 else: 

500 a_top = a[-self.n_top:] 

501 fp, typedic = self._take_fingerprints(a_top) 

502 a.info['fingerprints'] = self._json_encode(fp, typedic) 

503 

504 npts = int(np.ceil(self.rcut * 1. / self.binwidth)) 

505 x = np.linspace(0, self.rcut, npts, endpoint=False) 

506 

507 for key, val in fp.items(): 

508 plt.plot(x, val) 

509 suffix = f"_fp_{key[0]}_{key[1]}.png" 

510 plt.savefig(prefix + suffix) 

511 plt.clf() 

512 

513 def plot_individual_fingerprints(self, a, prefix=''): 

514 """ Function for plotting all the individual fingerprints. 

515 Prefix = a prefix for the resulting PNG file.""" 

516 if 'individual_fingerprints' in a.info and not self.recalculate: 

517 fp, typedic = a.info['individual_fingerprints'] 

518 else: 

519 a_top = a[-self.n_top:] 

520 fp, typedic = self._take_fingerprints(a_top, individual=True) 

521 a.info['individual_fingerprints'] = [fp, typedic] 

522 

523 npts = int(np.ceil(self.rcut * 1. / self.binwidth)) 

524 x = np.linspace(0, self.rcut, npts, endpoint=False) 

525 

526 for key, val in fp.items(): 

527 for key2, val2 in val.items(): 

528 plt.plot(x, val2) 

529 plt.ylim([-1, 10]) 

530 suffix = f"_individual_fp_{key}_{key2}.png" 

531 plt.savefig(prefix + suffix) 

532 plt.clf()