Coverage for /builds/kinetik161/ase/ase/utils/linesearcharmijo.py: 61.39%

158 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-12-10 11:04 +0000

1# flake8: noqa 

2import logging 

3import math 

4 

5import numpy as np 

6 

7# CO <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 

8try: 

9 import scipy 

10 import scipy.linalg 

11 have_scipy = True 

12except ImportError: 

13 have_scipy = False 

14# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 

15 

16from ase.utils import longsum 

17 

18logger = logging.getLogger(__name__) 

19 

20# CO <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 

21 

22 

23class LinearPath: 

24 """Describes a linear search path of the form t -> t g 

25 """ 

26 

27 def __init__(self, dirn): 

28 """Initialise LinearPath object 

29 

30 Args: 

31 dirn : search direction 

32 """ 

33 self.dirn = dirn 

34 

35 def step(self, alpha): 

36 return alpha * self.dirn 

37 

38 

39def nullspace(A, myeps=1e-10): 

40 """The RumPath class needs the ability to compute the null-space of 

41 a small matrix. This is provided here. But we now also need scipy! 

42 

43 This routine was copy-pasted from 

44 http://stackoverflow.com/questions/5889142/python-numpy-scipy-finding-the-null-space-of-a-matrix 

45 How the h*** does numpy/scipy not have a null-space implemented? 

46 """ 

47 u, s, vh = scipy.linalg.svd(A) 

48 padding = max(0, np.shape(A)[1] - np.shape(s)[0]) 

49 null_mask = np.concatenate(((s <= myeps), 

50 np.ones((padding,), dtype=bool)), 

51 axis=0) 

52 null_space = scipy.compress(null_mask, vh, axis=0) 

53 return scipy.transpose(null_space) 

54 

55 

56class RumPath: 

57 """Describes a curved search path, taking into account information 

58 about (near-) rigid unit motions (RUMs). 

59 

60 One can tag sub-molecules of the system, which are collections of 

61 particles that form a (near-)rigid unit. Let x1, ... xn be the positions 

62 of one such molecule, then we construct a path of the form 

63 xi(t) = xi(0) + (exp(K t) - I) yi + t wi + t c 

64 where yi = xi - <x>, c = <g> is a rigid translation, K is anti-symmetric 

65 so that exp(tK) yi denotes a rotation about the centre of mass, and wi 

66 is the remainind stretch of the molecule. 

67 

68 The following variables are stored: 

69 * rotation_factors : array of acceleration factors 

70 * rigid_units : array of molecule indices 

71 * stretch : w 

72 * K : list of K matrices 

73 * y : list of y-vectors 

74 """ 

75 

76 def __init__(self, x_start, dirn, rigid_units, rotation_factors): 

77 """Initialise a `RumPath` 

78 

79 Args: 

80 x_start : vector containing the positions in d x nAt shape 

81 dirn : search direction, same shape as x_start vector 

82 rigid_units : array of arrays of molecule indices 

83 rotation_factors : factor by which the rotation of each molecular 

84 is accelerated; array of scalars, same length as 

85 rigid_units 

86 """ 

87 

88 if not have_scipy: 

89 raise RuntimeError( 

90 "RumPath depends on scipy, which could not be imported") 

91 

92 # keep some stuff stored 

93 self.rotation_factors = rotation_factors 

94 self.rigid_units = rigid_units 

95 # create storage for more stuff 

96 self.K = [] 

97 self.y = [] 

98 # We need to reshape x_start and dirn since we want to apply 

99 # rotations to individual position vectors! 

100 # we will eventually store the stretch in w, X is just a reference 

101 # to x_start with different shape 

102 w = dirn.copy().reshape([3, len(dirn) / 3]) 

103 X = x_start.reshape([3, len(dirn) / 3]) 

104 

105 for I in rigid_units: # I is a list of indices for one molecule 

106 # get the positions of the i-th molecule, subtract mean 

107 x = X[:, I] 

108 y = x - x.mean(0).T # PBC? 

109 # same for forces >>> translation component 

110 g = w[:, I] 

111 f = g - g.mean(0).T 

112 # compute the system to solve for K (see accompanying note!) 

113 # A = \sum_j Yj Yj' 

114 # b = \sum_j Yj' fj 

115 A = np.zeros((3, 3)) 

116 b = np.zeros(3) 

117 for j in range(len(I)): 

118 Yj = np.array([[y[1, j], 0.0, -y[2, j]], 

119 [-y[0, j], y[2, j], 0.0], 

120 [0.0, -y[1, j], y[0, j]]]) 

121 A += np.dot(Yj.T, Yj) 

122 b += np.dot(Yj.T, f[:, j]) 

123 # If the directions y[:,j] span all of R^3 (canonically this is true 

124 # when there are at least three atoms in the molecule) but if 

125 # not, then A is singular so we cannot solve A k = b. In this case 

126 # we solve Ak = b in the space orthogonal to the null-space of A. 

127 # TODO: 

128 # this can get unstable if A is "near-singular"! We may 

129 # need to revisit this idea at some point to get something 

130 # more robust 

131 N = nullspace(A) 

132 b -= np.dot(np.dot(N, N.T), b) 

133 A += np.dot(N, N.T) 

134 k = scipy.linalg.solve(A, b, sym_pos=True) 

135 K = np.array([[0.0, k[0], -k[2]], 

136 [-k[0], 0.0, k[1]], 

137 [k[2], -k[1], 0.0]]) 

138 # now remove the rotational component from the search direction 

139 # ( we actually keep the translational component as part of w, 

140 # but this could be changed as well! ) 

141 w[:, I] -= np.dot(K, y) 

142 # store K and y 

143 self.K.append(K) 

144 self.y.append(y) 

145 

146 # store the stretch (no need to copy here, since w is already a copy) 

147 self.stretch = w 

148 

149 def step(self, alpha): 

150 """perform a step in the line-search, given a step-length alpha 

151 

152 Args: 

153 alpha : step-length 

154 

155 Returns: 

156 s : update for positions 

157 """ 

158 # translation and stretch 

159 s = alpha * self.stretch 

160 # loop through rigid_units 

161 for (I, K, y, rf) in zip(self.rigid_units, self.K, self.y, 

162 self.rotation_factors): 

163 # with matrix exponentials: 

164 # s[:, I] += expm(K * alpha * rf) * p.y - p.y 

165 # third-order taylor approximation: 

166 # I + t K + 1/2 t^2 K^2 + 1/6 t^3 K^3 - I 

167 # = t K (I + 1/2 t K (I + 1/3 t K)) 

168 aK = alpha * rf * K 

169 s[:, I] += np.dot(aK, y + 0.5 * np.dot(aK, 

170 y + 1 / 3. * np.dot(aK, y))) 

171 

172 return s.ravel() 

173# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 

174 

175 

176class LineSearchArmijo: 

177 

178 def __init__(self, func, c1=0.1, tol=1e-14): 

179 """Initialise the linesearch with set parameters and functions. 

180 

181 Args: 

182 func: the function we are trying to minimise (energy), which should 

183 take an array of positions for its argument 

184 c1: parameter for the sufficient decrease condition in (0.0 0.5) 

185 tol: tolerance for evaluating equality 

186 

187 """ 

188 

189 self.tol = tol 

190 self.func = func 

191 

192 if not (0 < c1 < 0.5): 

193 logger.error("c1 outside of allowed interval (0, 0.5). Replacing with " 

194 "default value.") 

195 print("Warning: C1 outside of allowed interval. Replacing with " 

196 "default value.") 

197 c1 = 0.1 

198 

199 self.c1 = c1 

200 

201 # CO : added rigid_units and rotation_factors 

202 

203 def run(self, x_start, dirn, a_max=None, a_min=None, a1=None, 

204 func_start=None, func_old=None, func_prime_start=None, 

205 rigid_units=None, rotation_factors=None, maxstep=None): 

206 """Perform a backtracking / quadratic-interpolation linesearch 

207 to find an appropriate step length with Armijo condition. 

208 NOTE THIS LINESEARCH DOES NOT IMPOSE WOLFE CONDITIONS! 

209 

210 The idea is to do backtracking via quadratic interpolation, stabilised 

211 by putting a lower bound on the decrease at each linesearch step. 

212 To ensure BFGS-behaviour, whenever "reasonable" we take 1.0 as the 

213 starting step. 

214 

215 Since Armijo does not guarantee convergence of BFGS, the outer 

216 BFGS algorithm must restart when the current search direction 

217 ceases to be a descent direction. 

218 

219 Args: 

220 x_start: vector containing the position to begin the linesearch 

221 from (ie the current location of the optimisation) 

222 dirn: vector pointing in the direction to search in (pk in [NW]). 

223 Note that this does not have to be a unit vector, but the 

224 function will return a value scaled with respect to dirn. 

225 a_max: an upper bound on the maximum step length allowed. Default is 2.0. 

226 a_min: a lower bound on the minimum step length allowed. Default is 1e-10. 

227 A RuntimeError is raised if this bound is violated 

228 during the line search. 

229 a1: the initial guess for an acceptable step length. If no value is 

230 given, this will be set automatically, using quadratic 

231 interpolation using func_old, or "rounded" to 1.0 if the 

232 initial guess lies near 1.0. (specifically for LBFGS) 

233 func_start: the value of func at the start of the linesearch, ie 

234 phi(0). Passing this information avoids potentially expensive 

235 re-calculations 

236 func_prime_start: the value of func_prime at the start of the 

237 linesearch (this will be dotted with dirn to find phi_prime(0)) 

238 func_old: the value of func_start at the previous step taken in 

239 the optimisation (this will be used to calculate the initial 

240 guess for the step length if it is not provided) 

241 rigid_units, rotationfactors : see documentation of RumPath, if it is 

242 unclear what these parameters are, then leave them at None 

243 maxstep: maximum allowed displacement in Angstrom. Default is 0.2. 

244 

245 Returns: 

246 A tuple: (step, func_val, no_update) 

247 

248 step: the final chosen step length, representing the number of 

249 multiples of the direction vector to move 

250 func_val: the value of func after taking this step, ie phi(step) 

251 no_update: true if the linesearch has not performed any updates of 

252 phi or alpha, due to errors or immediate convergence 

253 

254 Raises: 

255 ValueError for problems with arguments 

256 RuntimeError for problems encountered during iteration 

257 """ 

258 

259 a1 = self.handle_args(x_start, dirn, a_max, a_min, a1, func_start, 

260 func_old, func_prime_start, maxstep) 

261 

262 # DEBUG 

263 logger.debug("a1(auto) = %e", a1) 

264 

265 if abs(a1 - 1.0) <= 0.5: 

266 a1 = 1.0 

267 

268 logger.debug("-----------NEW LINESEARCH STARTED---------") 

269 

270 a_final = None 

271 phi_a_final = None 

272 num_iter = 0 

273 

274 # CO <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 

275 # create a search-path 

276 if rigid_units is None: 

277 # standard linear search-path 

278 logger.debug("-----using LinearPath-----") 

279 path = LinearPath(dirn) 

280 else: 

281 logger.debug("-----using RumPath------") 

282 # if rigid_units != None, but rotation_factors == None, then 

283 # raise an error. 

284 if rotation_factors == None: 

285 raise RuntimeError( 

286 'RumPath cannot be created since rotation_factors == None') 

287 path = RumPath(x_start, dirn, rigid_units, rotation_factors) 

288 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 

289 

290 while (True): 

291 

292 logger.debug("-----------NEW ITERATION OF LINESEARCH----------") 

293 logger.debug("Number of linesearch iterations: %d", num_iter) 

294 logger.debug("a1 = %e", a1) 

295 

296 # CO replaced: func_a1 = self.func(x_start + a1 * self.dirn) 

297 func_a1 = self.func(x_start + path.step(a1)) 

298 phi_a1 = func_a1 

299 # compute sufficient decrease (Armijo) condition 

300 suff_dec = (phi_a1 <= self.func_start + 

301 self.c1 * a1 * self.phi_prime_start) 

302 

303 # DEBUG 

304 # print("c1*a1*phi_prime_start = ", self.c1*a1*self.phi_prime_start, 

305 # " | phi_a1 - phi_0 = ", phi_a1 - self.func_start) 

306 logger.info("a1 = %.3f, suff_dec = %r", a1, suff_dec) 

307 if a1 < self.a_min: 

308 raise RuntimeError('a1 < a_min, giving up') 

309 if self.phi_prime_start > 0.0: 

310 raise RuntimeError("self.phi_prime_start > 0.0") 

311 

312 # check sufficient decrease (Armijo condition) 

313 if suff_dec: 

314 a_final = a1 

315 phi_a_final = phi_a1 

316 logger.debug("Linesearch returned a = %e, phi_a = %e", 

317 a_final, phi_a_final) 

318 logger.debug("-----------LINESEARCH COMPLETE-----------") 

319 return a_final, phi_a_final, num_iter == 0 

320 

321 # we don't have sufficient decrease, so we need to compute a 

322 # new trial step-length 

323 at = - ((self.phi_prime_start * a1) / 

324 (2 * ((phi_a1 - self.func_start) / a1 - self.phi_prime_start))) 

325 logger.debug("quadratic_min: initial at = %e", at) 

326 

327 # because a1 does not satisfy Armijo it follows that at must 

328 # lie between 0 and a1. In fact, more strongly, 

329 # at \leq (2 (1-c1))^{-1} a1, which is a back-tracking condition 

330 # therefore, we should now only check that at has not become too small, 

331 # in which case it is likely that nonlinearity has played a big role 

332 # here, so we take an ultra-conservative backtracking step 

333 a1 = max(at, a1 / 10.0) 

334 if a1 > at: 

335 logger.debug( 

336 "at (%e) < a1/10: revert to backtracking a1/10", at) 

337 

338 # (end of while(True) line-search loop) 

339 # (end of run()) 

340 

341 def handle_args(self, x_start, dirn, a_max, a_min, a1, func_start, func_old, 

342 func_prime_start, maxstep): 

343 """Verify passed parameters and set appropriate attributes accordingly. 

344 

345 A suitable value for the initial step-length guess will be either 

346 verified or calculated, stored in the attribute self.a_start, and 

347 returned. 

348 

349 Args: 

350 The args should be identical to those of self.run(). 

351 

352 Returns: 

353 The suitable initial step-length guess a_start 

354 

355 Raises: 

356 ValueError for problems with arguments 

357 

358 """ 

359 

360 self.a_max = a_max 

361 self.a_min = a_min 

362 self.x_start = x_start 

363 self.dirn = dirn 

364 self.func_old = func_old 

365 self.func_start = func_start 

366 self.func_prime_start = func_prime_start 

367 

368 if a_max is None: 

369 a_max = 2.0 

370 

371 if a_max < self.tol: 

372 logger.warning("a_max too small relative to tol. Reverting to " 

373 "default value a_max = 2.0 (twice the <ideal> step).") 

374 a_max = 2.0 # THIS ASSUMES NEWTON/BFGS TYPE BEHAVIOUR! 

375 

376 if self.a_min is None: 

377 self.a_min = 1e-10 

378 

379 if func_start is None: 

380 logger.debug("Setting func_start") 

381 self.func_start = self.func(x_start) 

382 

383 self.phi_prime_start = longsum(self.func_prime_start * self.dirn) 

384 if self.phi_prime_start >= 0: 

385 logger.error( 

386 "Passed direction which is not downhill. Aborting...: %e", 

387 self.phi_prime_start 

388 ) 

389 raise ValueError("Direction is not downhill.") 

390 elif math.isinf(self.phi_prime_start): 

391 logger.error("Passed func_prime_start and dirn which are too big. " 

392 "Aborting...") 

393 raise ValueError("func_prime_start and dirn are too big.") 

394 

395 if a1 is None: 

396 if func_old is not None: 

397 # Interpolating a quadratic to func and func_old - see NW 

398 # equation 3.60 

399 a1 = 2 * (self.func_start - self.func_old) / \ 

400 self.phi_prime_start 

401 logger.debug("Interpolated quadratic, obtained a1 = %e", a1) 

402 if a1 is None or a1 > a_max: 

403 logger.debug("a1 greater than a_max. Reverting to default value " 

404 "a1 = 1.0") 

405 a1 = 1.0 

406 if a1 is None or a1 < self.tol: 

407 logger.debug("a1 is None or a1 < self.tol. Reverting to default value " 

408 "a1 = 1.0") 

409 a1 = 1.0 

410 if a1 is None or a1 < self.a_min: 

411 logger.debug("a1 is None or a1 < a_min. Reverting to default value " 

412 "a1 = 1.0") 

413 a1 = 1.0 

414 

415 if maxstep is None: 

416 maxstep = 0.2 

417 logger.debug("maxstep = %e", maxstep) 

418 

419 r = np.reshape(dirn, (-1, 3)) 

420 steplengths = ((a1 * r)**2).sum(1)**0.5 

421 maxsteplength = np.max(steplengths) 

422 if maxsteplength >= maxstep: 

423 a1 *= maxstep / maxsteplength 

424 logger.debug("Rescaled a1 to fulfill maxstep criterion") 

425 

426 self.a_start = a1 

427 

428 logger.debug("phi_start = %e, phi_prime_start = %e", self.func_start, 

429 self.phi_prime_start) 

430 logger.debug("func_start = %s, self.func_old = %s", self.func_start, 

431 self.func_old) 

432 logger.debug("a1 = %e, a_max = %e, a_min = %e", a1, a_max, self.a_min) 

433 

434 return a1