Coverage for /builds/kinetik161/ase/ase/io/onetep.py: 81.16%

552 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-12-10 11:04 +0000

1import re 

2import warnings 

3from copy import deepcopy 

4from os.path import basename, dirname, isfile 

5from pathlib import Path 

6 

7import numpy as np 

8 

9from ase.atoms import Atoms 

10from ase.calculators.singlepoint import SinglePointDFTCalculator 

11from ase.cell import Cell 

12from ase.units import Bohr 

13 

14no_positions_error = ( 

15 'no positions can be read from this onetep output ' 

16 'if you wish to use ASE to read onetep outputs ' 

17 'please use uppercase block positions in your calculations') 

18 

19unable_to_read = ( 

20 'unable to read this onetep output file, ending' 

21) 

22 

23# taken from onetep source code, 

24# does not seem to be from any known NIST data 

25units = { 

26 'Hartree': 27.2116529, 

27 'Bohr': 1 / 1.889726134583548707935 

28} 

29 

30# Want to add a functionality? add a global constant below 

31ONETEP_START = "Linear-Scaling Ab Initio Total Energy Program" 

32ONETEP_STOP = "Job completed:" 

33ONETEP_TOTAL_ENERGY = "*** NGWF optimisation converged ***" 

34ONETEP_FORCE = "* Forces *" 

35ONETEP_MULLIKEN = "Mulliken Atomic Populations" 

36ONETEP_IBFGS_ITER = "starting iteration" 

37ONETEP_POSITION = "Cell Contents" 

38ONETEP_FIRST_POSITION = "%BLOCK POSITIONS_" 

39ONETEP_WRONG_FIRST_POSITION = '%block positions_' 

40ONETEP_RESUMING_GEOM = 'Resuming previous ONETEP Geometry Optimisation' 

41# ONETEP_CELL = "NOT IMPLEMENTED YET" 

42# ONETEP_STRESS = "NOT IMPLEMENTED YET" 

43ONETEP_ATOM_COUNT = "Totals:" 

44ONETEP_IBFGS_IMPROVE = "improving iteration" 

45ONETEP_START_GEOM = "Starting ONETEP Geometry Optimisation" 

46ONETEP_SPECIES = "%BLOCK SPECIES " 

47ONETEP_SPECIESL = "%block species " 

48ONETEP_FIRST_CELLL = "%block lattice_cart" 

49ONETEP_FIRST_CELL = "%BLOCK LATTICE_CART" 

50ONETEP_STRESS_CELL = "stress_calculation: cell geometry" 

51 

52 

53# def _alphanum_sorted(arr): 

54""" 

55Sort an array of strings alphanumerically. 

56""" 

57# def alphanum_key(key): 

58# digits = re.split('([0-9]+)', key) 

59# return [int(char) if char.isdigit() else char for char in digits] 

60# return sorted(arr, key=alphanum_key) 

61 

62 

63def get_onetep_keywords(path): 

64 

65 if isinstance(path, str): 

66 with open(path) as fd: 

67 results = read_onetep_in(fd, only_keywords=True) 

68 else: 

69 results = read_onetep_in(path, only_keywords=True) 

70 

71 # If there is an include file, the entire 

72 # file keyword's will be included in the dict 

73 # and the include_file keyword will be deleted 

74 if 'include_file' in results['keywords']: 

75 warnings.warn('include_file will be deleted from the dict') 

76 del results['keywords']['include_file'] 

77 return results['keywords'] 

78 

79 

80def read_onetep_in(fd, **kwargs): 

81 """ 

82 Read a single ONETEP input. 

83 

84 This function can be used to visually check ONETEP inputs, 

85 using the ase gui. It can also be used to get access to 

86 the input parameters attached to the ONETEP calculator 

87 returned 

88 

89 The function should work on inputs which contain 

90 'include_file' command(s), (possibly recursively 

91 but untested) 

92 

93 The function should work on input which contains 

94 exotic element(s) name(s) if the specie block is 

95 present to map them back to real element(s) 

96 

97 Parameters 

98 ---------- 

99 fd : io-object 

100 File to read. 

101 

102 Return 

103 ------ 

104 structure: Atoms 

105 Atoms object with cell and a Onetep calculator 

106 attached which contains the keywords dictionary 

107 """ 

108 fdi_lines = fd.readlines() 

109 

110 try: 

111 fd_path = Path(fd.name).resolve() 

112 fd_parent = fd_path.parent 

113 include_files = [fd_path] 

114 except AttributeError: 

115 # We are in a StringIO or something similar 

116 fd_path = Path().cwd() 

117 fd_parent = fd_path 

118 include_files = [Path().cwd()] 

119 

120 def clean_lines(lines): 

121 """ 

122 Remove indesirable line from the input 

123 """ 

124 new_lines = [] 

125 for line in lines: 

126 sep = re.split(r'[!#]', line.strip())[0] 

127 if sep: 

128 new_lines.append(sep) 

129 return new_lines 

130 

131 # Skip comments and empty lines 

132 fdi_lines = clean_lines(fdi_lines) 

133 

134 # Are we in a block? 

135 block_start = 0 

136 

137 keywords = {} 

138 atoms = Atoms() 

139 cell = np.zeros((3, 3)) 

140 fractional = False 

141 positions = False 

142 symbols = False 

143 

144 # Main loop reading the input 

145 for n, line in enumerate(fdi_lines): 

146 line_lower = line.lower() 

147 if '%block' in line_lower: 

148 block_start = n + 1 

149 if 'lattice_cart' in line_lower: 

150 if 'ang' in fdi_lines[block_start]: 

151 cell = np.loadtxt(fdi_lines[n + 2:n + 5]) 

152 else: 

153 cell = np.loadtxt(fdi_lines[n + 1:n + 4]) 

154 cell *= Bohr 

155 

156 if not block_start: 

157 if 'devel_code' in line_lower: 

158 warnings.warn('devel_code is not supported') 

159 continue 

160 # Splits line on any valid onetep separator 

161 sep = re.split(r'[:=\s]+', line) 

162 keywords[sep[0]] = ' '.join(sep[1:]) 

163 # If include_file is used, we open the included file 

164 # and insert it in the current fdi_lines... 

165 # ONETEP does not work with cascade 

166 # and this SHOULD NOT work with cascade 

167 if 'include_file' == sep[0]: 

168 name = sep[1].replace('\'', '') 

169 name = name.replace('\"', '') 

170 new_path = fd_parent / name 

171 for path in include_files: 

172 if new_path.samefile(path): 

173 raise ValueError('invalid/recursive include_file') 

174 new_fd = open(new_path) 

175 new_lines = new_fd.readlines() 

176 new_lines = clean_lines(new_lines) 

177 for include_line in new_lines: 

178 sep = re.split(r'[:=\s]+', include_line) 

179 if 'include_file' == sep[0]: 

180 raise ValueError('nested include_file') 

181 fdi_lines[:] = fdi_lines[:n + 1] + \ 

182 new_lines + \ 

183 fdi_lines[n + 1:] 

184 include_files.append(new_path) 

185 continue 

186 

187 if '%endblock' in line_lower: 

188 if 'positions_' in line_lower: 

189 if 'ang' in fdi_lines[block_start]: 

190 to_read = fdi_lines[block_start + 1:n] 

191 positions = np.loadtxt(to_read, usecols=(1, 2, 3)) 

192 else: 

193 to_read = fdi_lines[block_start:n] 

194 positions = np.loadtxt(to_read, usecols=(1, 2, 3)) 

195 positions *= units['Bohr'] 

196 symbols = np.loadtxt(to_read, usecols=(0), dtype='str') 

197 if 'frac' in line_lower: 

198 fractional = True 

199 elif '%endblock species' == line_lower.strip(): 

200 els = fdi_lines[block_start:n] 

201 species = {} 

202 for el in els: 

203 sep = el.split() 

204 species[sep[0]] = sep[1] 

205 to_read = [i.strip() for i in fdi_lines[block_start:n]] 

206 keywords['species'] = to_read 

207 elif 'lattice_cart' in line_lower: 

208 pass 

209 else: 

210 to_read = [i.strip() for i in fdi_lines[block_start:n]] 

211 block_title = line_lower.replace('%endblock', '').strip() 

212 keywords[block_title] = to_read 

213 block_start = 0 

214 

215 # We don't need a fully valid onetep 

216 # input to read the keywords, just 

217 # the keywords 

218 if kwargs.get('only_keywords', False): 

219 return {'keywords': keywords} 

220 # Necessary if we have only one atom 

221 # Check if the cell is valid (3D) 

222 if not cell.any(axis=1).all(): 

223 raise ValueError('invalid cell specified') 

224 

225 if positions is False: 

226 raise ValueError('invalid position specified') 

227 

228 if symbols is False: 

229 raise ValueError('no symbols found') 

230 

231 positions = positions.reshape(-1, 3) 

232 symbols = symbols.reshape(-1) 

233 tags = [] 

234 info = {'onetep_species': []} 

235 for symbol in symbols: 

236 label = symbol.replace(species[symbol], '') 

237 if label.isdigit(): 

238 tags.append(int(label)) 

239 else: 

240 tags.append(0) 

241 info['onetep_species'].append(symbol) 

242 atoms = Atoms([species[i] for i in symbols], 

243 cell=cell, 

244 pbc=True, 

245 tags=tags, 

246 info=info) 

247 if fractional: 

248 atoms.set_scaled_positions(positions / units['Bohr']) 

249 else: 

250 atoms.set_positions(positions) 

251 results = {'atoms': atoms, 'keywords': keywords} 

252 return results 

253 

254 

255def write_onetep_in( 

256 fd, 

257 atoms, 

258 edft=False, 

259 xc='PBE', 

260 ngwf_count=-1, 

261 ngwf_radius=9.0, 

262 keywords={}, 

263 pseudopotentials={}, 

264 pseudo_path=".", 

265 pseudo_suffix=None, 

266 **kwargs): 

267 """ 

268 Write a single ONETEP input. 

269 

270 This function will be used by ASE to perform 

271 various workflows (Opt, NEB...) or can be used 

272 manually to quickly create ONETEP input file(s). 

273 

274 The function will first write keywords in 

275 alphabetic order in lowercase. Secondly, blocks 

276 will be written in alphabetic order in uppercase. 

277 

278 Two ways to work with the function: 

279 

280 - By providing only (simple) keywords present in 

281 the parameters. ngwf_count and ngwf_radius 

282 accept multiple types as described in the Parameters 

283 section. 

284 

285 - If the keywords parameters is provided as a dictionary 

286 these keywords will be used to write the input file and 

287 will take priority. 

288 

289 If no pseudopotentials are provided in the parameters and 

290 the function will try to look for suitable pseudopotential 

291 in the pseudo_path. 

292 

293 Parameters 

294 ---------- 

295 fd : file 

296 File to write. 

297 atoms: Atoms 

298 Atoms including Cell object to write. 

299 edft: Bool 

300 Activate EDFT. 

301 xc: str 

302 DFT xc to use e.g (PBE, RPBE, ...) 

303 ngwf_count: int|list|dict 

304 Behaviour depends on the type: 

305 int: every species will have this amount 

306 of ngwfs. 

307 list: list of int, will be attributed 

308 alphabetically to species: 

309 dict: keys are species name(s), 

310 value are their number: 

311 ngwf_radius: int|list|dict 

312 Behaviour depends on the type: 

313 float: every species will have this radius. 

314 list: list of float, will be attributed 

315 alphabetically to species: 

316 [10.0, 9.0] 

317 dict: keys are species name(s), 

318 value are their radius: 

319 {'Na': 9.0, 'Cl': 10.0} 

320 keywords: dict 

321 Dictionary with ONETEP keywords to write, 

322 keywords with lists as values will be 

323 treated like blocks, with each element 

324 of list being a different line. 

325 pseudopotentials: dict 

326 Behaviour depends on the type: 

327 keys are species name(s) their 

328 value are the pseudopotential file to use: 

329 {'Na': 'Na.usp', 'Cl': 'Cl.usp'} 

330 pseudo_path: str 

331 Where to look for pseudopotential, correspond 

332 to the pseudo_path keyword of ONETEP. 

333 pseudo_suffix: str 

334 Suffix for the pseudopotential filename 

335 to look for, useful if you have multiple sets of 

336 pseudopotentials in pseudo_path. 

337 """ 

338 

339 label = kwargs.get('label', 'onetep') 

340 try: 

341 directory = kwargs.get('directory', Path(dirname(fd.name))) 

342 except AttributeError: 

343 directory = '.' 

344 autorestart = kwargs.get('autorestart', False) 

345 elements = np.array(atoms.symbols) 

346 tags = np.array(atoms.get_tags()) 

347 species_maybe = atoms.info.get('onetep_species', False) 

348 #  We look if the atom.info contains onetep species information 

349 # If it does, we use it, as it might contains character 

350 #  which are not allowed in ase tags, if not we fall back 

351 # to tags and use them instead. 

352 if species_maybe: 

353 if set(species_maybe) != set(elements): 

354 species = np.array(species_maybe) 

355 else: 

356 formatted_tags = np.array(['' if i == 0 else str(i) for i in tags]) 

357 species = np.char.add(elements, formatted_tags) 

358 numbers = np.array(atoms.numbers) 

359 tmp = np.argsort(species) 

360 # We sort both Z and name the same 

361 numbers = np.take_along_axis(numbers, tmp, axis=0) 

362 # u_elements = np.take_along_axis(elements, tmp, axis=0) 

363 u_species = np.take_along_axis(species, tmp, axis=0) 

364 elements = np.take_along_axis(elements, tmp, axis=0) 

365 # We want to keep unique but without sort: small trick with index 

366 idx = np.unique(u_species, return_index=True)[1] 

367 elements = elements[idx] 

368 # Unique species 

369 u_species = u_species[idx] 

370 numbers = numbers[idx] 

371 n_sp = len(u_species) 

372 

373 if isinstance(ngwf_count, int): 

374 ngwf_count = dict(zip(u_species, [ngwf_count] * n_sp)) 

375 elif isinstance(ngwf_count, list): 

376 ngwf_count = dict(zip(u_species, ngwf_count)) 

377 elif isinstance(ngwf_count, dict): 

378 pass 

379 else: 

380 raise TypeError('ngwf_count can only be int|list|dict') 

381 

382 if isinstance(ngwf_radius, float): 

383 ngwf_radius = dict(zip(u_species, [ngwf_radius] * n_sp)) 

384 elif isinstance(ngwf_radius, list): 

385 ngwf_radius = dict(zip(u_species, ngwf_radius)) 

386 elif isinstance(ngwf_radius, dict): 

387 pass 

388 else: 

389 raise TypeError('ngwf_radius can only be float|list|dict') 

390 

391 pp_files = keywords.get('pseudo_path', pseudo_path) 

392 pp_files = pp_files.replace('\'', '') 

393 pp_files = pp_files.replace('\"', '') 

394 pp_files = Path(pp_files).glob('*') 

395 pp_files = [i for i in sorted(pp_files) if i.is_file()] 

396 pp_is_manual = keywords.get('species_pot', False) 

397 common_suffix = ['.usp', '.recpot', '.upf', '.paw', '.psp', '.pspnc'] 

398 if pseudo_suffix: 

399 common_suffix = [pseudo_suffix] 

400 # Transform to list 

401 if pp_is_manual: 

402 pp_list = keywords['species_pot'] 

403 elif isinstance(pseudopotentials, dict): 

404 pp_list = [] 

405 for idx, el in enumerate(u_species): 

406 try: 

407 pp_list.append(el + ' ' + pseudopotentials[el]) 

408 except KeyError: 

409 for i in pp_files: 

410 if elements[idx] in basename(i)[:2]: 

411 for j in common_suffix: 

412 if basename(i).endswith(j): 

413 pp_list.append(el + ' ' + basename(i)) 

414 # pp_maybe = attempt_to_find_pp(elements[idx]) 

415 # if pp_maybe: 

416 # pp_list.append(el + ' ' + pp_maybe) 

417 # else: 

418 # warnings.warn('No pseudopotential found for element {}' 

419 # .format(el)) 

420 else: 

421 raise TypeError('pseudopotentials object can only be dict') 

422 

423 default_species = [] 

424 for idx, el in enumerate(u_species): 

425 tmp = "" 

426 tmp += u_species[idx] + " " + elements[idx] + " " 

427 tmp += str(numbers[idx]) + " " 

428 try: 

429 tmp += str(ngwf_count[el]) + " " 

430 except KeyError: 

431 tmp += str(ngwf_count[elements[idx]]) + " " 

432 try: 

433 tmp += str(ngwf_radius[el]) 

434 except KeyError: 

435 tmp += str(ngwf_radius[elements[idx]]) 

436 default_species.append(tmp) 

437 

438 positions_abs = ['ang'] 

439 for s, p in zip(species, atoms.get_positions()): 

440 line = '{s:>5} {0:>12.6f} {1:>12.6f} {2:>12.6f}'.format(s=s, *p) 

441 positions_abs.append(line) 

442 

443 lattice_cart = ['ang'] 

444 for axis in atoms.get_cell(): 

445 line = '{:>16.8f} {:>16.8f} {:>16.8f}'.format(*axis) 

446 lattice_cart.append(line) 

447 

448 # Default keywords if not provided by the user, 

449 # most of them are ONETEP default, except write_forces 

450 # which is always turned on. 

451 default_keywords = { 

452 "xc_functional": "pbe", 

453 "edft": edft, 

454 "cutoff_energy": 20, 

455 "paw": False, 

456 "task": "singlepoint", 

457 "output_detail": "normal", 

458 "species": default_species, 

459 "pseudo_path": pseudo_path, 

460 "species_pot": pp_list, 

461 "positions_abs": positions_abs, 

462 "lattice_cart": lattice_cart, 

463 "write_forces": True, 

464 "forces_output_detail": 'verbose' 

465 } 

466 

467 # Main loop, fill the keyword dictionary 

468 keywords = {key.lower(): value for key, value in keywords.items()} 

469 for value in default_keywords: 

470 if not keywords.get(value, None): 

471 keywords[value] = default_keywords[value] 

472 

473 # No pseudopotential provided, we look for them in pseudo_path 

474 # If autorestart is True, we look for restart files, 

475 # and turn on relevant keywords... 

476 if autorestart: 

477 keywords['read_denskern'] = \ 

478 isfile(directory / (label + '.dkn')) 

479 keywords['read_tightbox_ngwfs'] = \ 

480 isfile(directory / (label + '.tightbox_ngwfs')) 

481 keywords['read_hamiltonian'] = \ 

482 isfile(directory / (label + '.ham')) 

483 

484 # If not EDFT, hamiltonian is irrelevant. 

485 # print(keywords.get('edft', False)) 

486 # keywords['read_hamiltonian'] = \ 

487 # keywords.get('read_hamiltonian', False) & keywords.get('edft', False) 

488 

489 keywords = dict(sorted(keywords.items())) 

490 

491 lines = [] 

492 block_lines = [] 

493 

494 for key, value in keywords.items(): 

495 if isinstance(value, (list, np.ndarray)): 

496 if not all(isinstance(_, str) for _ in value): 

497 raise TypeError('list values for blocks must be strings only') 

498 block_lines.append(('\n%block ' + key).upper()) 

499 block_lines.extend(value) 

500 block_lines.append(('%endblock ' + key).upper()) 

501 elif isinstance(value, bool): 

502 lines.append(str(key) + " : " + str(value)[0]) 

503 elif isinstance(value, (str, int, float)): 

504 lines.append(str(key) + " : " + str(value)) 

505 else: 

506 raise TypeError('keyword values must be list|str|bool') 

507 input_header = '!' + '-' * 78 + '!\n' + \ 

508 '!' + '-' * 33 + ' INPUT FILE ' + '-' * 33 + '!\n' + \ 

509 '!' + '-' * 78 + '!\n\n' 

510 

511 input_footer = '\n!' + '-' * 78 + '!\n' + \ 

512 '!' + '-' * 32 + ' END OF INPUT ' + '-' * 32 + '!\n' + \ 

513 '!' + '-' * 78 + '!' 

514 

515 fd.write(input_header) 

516 fd.writelines(line + '\n' for line in lines) 

517 fd.writelines(b_line + '\n' for b_line in block_lines) 

518 

519 if 'devel_code' in kwargs: 

520 warnings.warn('writing devel code as it is, at the end of the file') 

521 fd.writelines('\n' + line for line in kwargs['devel_code']) 

522 

523 fd.write(input_footer) 

524 

525 

526def read_onetep_out(fd, index=-1, improving=False, **kwargs): 

527 """ 

528 Read ONETEP output(s). 

529 

530 !!! 

531 This function will be used by ASE when performing 

532 various workflows (Opt, NEB...) 

533 !!! 

534 

535 Parameters 

536 ---------- 

537 fd : file 

538 File to read. 

539 index: slice 

540 Which atomic configuration to read 

541 improving: Bool 

542 If the output is a geometry optimisation, 

543 improving = True will keep line search 

544 configuration from BFGS 

545 

546 Yields 

547 ------ 

548 structure: Atoms|list of Atoms 

549 """ 

550 # Put everything in memory 

551 fdo_lines = fd.readlines() 

552 n_lines = len(fdo_lines) 

553 

554 # Used to store index of important elements 

555 output = { 

556 ONETEP_START: [], 

557 ONETEP_STOP: [], 

558 ONETEP_TOTAL_ENERGY: [], 

559 ONETEP_FORCE: [], 

560 ONETEP_MULLIKEN: [], 

561 ONETEP_POSITION: [], 

562 ONETEP_FIRST_POSITION: [], 

563 ONETEP_WRONG_FIRST_POSITION: [], 

564 ONETEP_ATOM_COUNT: [], 

565 ONETEP_IBFGS_IMPROVE: [], 

566 ONETEP_IBFGS_ITER: [], 

567 ONETEP_START_GEOM: [], 

568 ONETEP_RESUMING_GEOM: [], 

569 ONETEP_SPECIES: [], 

570 ONETEP_SPECIESL: [], 

571 ONETEP_FIRST_CELL: [], 

572 ONETEP_FIRST_CELLL: [], 

573 ONETEP_STRESS_CELL: [] 

574 } 

575 

576 # Index will be treated to get rid of duplicate or improving iterations 

577 output_corr = deepcopy(output) 

578 

579 # Core properties that will be used in Yield 

580 properties = [ONETEP_TOTAL_ENERGY, ONETEP_FORCE, 

581 ONETEP_MULLIKEN, ONETEP_FIRST_CELL, 

582 ONETEP_FIRST_CELLL] 

583 

584 # Find all matches append them to the dictionary 

585 for idx, line in enumerate(fdo_lines): 

586 match = False 

587 for key in output: 

588 # The second condition is for species block where 

589 # we have to make sure there is nothing after the word 

590 # 'species' but sometimes no trailing space will 

591 # be present. 

592 if key in line or \ 

593 key.strip() == line.strip(): 

594 match = key 

595 break 

596 if match: 

597 output[match].append(idx) 

598 # output[match].append(idx) 

599 # If a calculation died in the middle of nowhere... 

600 # Might be needed, keeping it here 

601 # if len(output[ONETEP_START]) - len(output[ONETEP_STOP]) > 1: 

602 # output[ONETEP_STOP].append(i - 1) 

603 

604 # Everything is numpy 

605 output = {key: np.array(value) for key, value in output.items()} 

606 # Conveniance notation (pointers: no overhead, no additional memory) 

607 ibfgs_iter = output[ONETEP_IBFGS_ITER] 

608 ibfgs_start = output[ONETEP_START_GEOM] 

609 ibfgs_improve = output[ONETEP_IBFGS_IMPROVE] 

610 ibfgs_resume = output[ONETEP_RESUMING_GEOM] 

611 i_first_positions = output[ONETEP_FIRST_POSITION] 

612 is_frac_positions = [i for i in i_first_positions if 'FRAC' in fdo_lines[i]] 

613 

614 # In onetep species can have arbritary names, 

615 # We want to map them to real element names 

616 # Via the species block 

617 species = np.concatenate((output[ONETEP_SPECIES], 

618 output[ONETEP_SPECIESL])).astype(np.int32) 

619 

620 icells = np.hstack( 

621 (output[ONETEP_FIRST_CELL], 

622 output[ONETEP_FIRST_CELLL], 

623 output[ONETEP_STRESS_CELL]) 

624 ) 

625 icells = icells.astype(np.int32) 

626 # Using the fact that 0 == False and > 0 == True 

627 has_bfgs = len(ibfgs_iter) \ 

628 + len(output[ONETEP_START_GEOM]) \ 

629 + len(output[ONETEP_RESUMING_GEOM]) 

630 

631 has_bfgs_improve = len(ibfgs_improve) 

632 has_bfgs_resume = len(ibfgs_resume) 

633 # When the input block position is written in lowercase 

634 # ONETEP does not print the initial position but a hash 

635 # of it, might be needed 

636 has_hash = len(output[ONETEP_WRONG_FIRST_POSITION]) 

637 

638 def is_in_bfgs(idx): 

639 """ 

640 Check if a given index is in a BFGS block 

641 """ 

642 for past, future in zip(output[ONETEP_START], np.hstack( 

643 (output[ONETEP_START][1:], [n_lines]))): 

644 if past < idx < future: 

645 if np.any((past < ibfgs_start) & (ibfgs_start < future)) or \ 

646 np.any((past < ibfgs_resume) & (ibfgs_resume < future)): 

647 return True 

648 return False 

649 

650 # If onetep has bfgs, the first atomic positions 

651 # Will be printed multiple times, we don't add them 

652 if has_bfgs: 

653 to_del = [] 

654 for idx, tmp in enumerate(i_first_positions): 

655 if is_in_bfgs(tmp): 

656 to_del.append(idx) 

657 i_first_positions = np.delete(i_first_positions, to_del) 

658 

659 ipositions = np.hstack((output[ONETEP_POSITION], 

660 i_first_positions)).astype(np.int32) 

661 ipositions = np.sort(ipositions) 

662 

663 n_pos = len(ipositions) 

664 

665 # Some ONETEP files will not have any positions 

666 # due to how the software is coded. As a last 

667 # resort we look for a geom file with the same label. 

668 if n_pos == 0: 

669 name = fd.name 

670 label_maybe = basename(name).split('.')[0] 

671 geom_maybe = label_maybe + '.geom' 

672 if isfile(geom_maybe): 

673 from ase.io import read 

674 positions = read(geom_maybe, index="::", 

675 format='castep-geom', 

676 units={ 

677 'Eh': units['Hartree'], 

678 'a0': units['Bohr'] 

679 } 

680 ) 

681 forces = [i.get_forces() for i in positions] 

682 has_bfgs = False 

683 has_bfgs_improve = False 

684 # way to make everything work 

685 ipositions = np.hstack(([0], output[ONETEP_IBFGS_ITER])) 

686 else: 

687 if has_hash: 

688 raise RuntimeError(no_positions_error) 

689 raise RuntimeError(unable_to_read) 

690 

691 to_del = [] 

692 

693 # Important loop which: 

694 # - Get rid of improving BFGS iteration if improving == False 

695 # - Append None to properties to make sure each properties will 

696 # have the same length and each index correspond to the right 

697 # atomic configuration (hopefully). 

698 # Past is the index of the current atomic conf, future is the 

699 # index of the next one. 

700 for idx, (past, future) in enumerate( 

701 zip(ipositions, np.hstack((ipositions[1:], [n_lines])))): 

702 if has_bfgs: 

703 # BFGS resume prints the configuration at the beggining, 

704 # we don't want it 

705 if has_bfgs_resume: 

706 closest_resume = np.min(np.abs(past - ibfgs_resume)) 

707 closest_starting = np.min(np.abs(past - ibfgs_iter)) 

708 if closest_resume < closest_starting: 

709 to_del.append(idx) 

710 continue 

711 if has_bfgs_improve and not improving: 

712 # Find closest improve iteration index 

713 closest_improve = np.min(np.abs(past - ibfgs_improve)) 

714 # Find closest normal iteration index 

715 closest_iter = np.min(np.abs(past - ibfgs_iter)) 

716 if len(ibfgs_start): 

717 closest_starting = np.min(np.abs(past - ibfgs_start)) 

718 closest = np.min([closest_iter, closest_starting]) 

719 else: 

720 closest = closest_iter 

721 # If improve is closer we delete 

722 if closest_improve < closest: 

723 to_del.append(idx) 

724 continue 

725 

726 # We append None if no properties in contained for 

727 # one specific atomic configurations. 

728 for prop in properties: 

729 tmp, = np.where((past < output[prop]) & (output[prop] <= future)) 

730 if len(tmp) == 0: 

731 output_corr[prop].append(None) 

732 else: 

733 output_corr[prop].extend(output[prop][tmp[:1]]) 

734 

735 # We effectively delete unwanted atomic configurations 

736 if to_del: 

737 new_indices = np.setdiff1d(np.arange(n_pos), to_del) 

738 ipositions = ipositions[new_indices] 

739 

740 # Bunch of methods to grep properties from output. 

741 def parse_cell(idx): 

742 a, b, c = np.loadtxt([fdo_lines[idx + 2]]) * units['Bohr'] 

743 al, be, ga = np.loadtxt([fdo_lines[idx + 4]]) 

744 cell = Cell.fromcellpar([a, b, c, al, be, ga]) 

745 return np.array(cell) 

746 

747 def parse_charge(idx): 

748 n = 0 

749 offset = 4 

750 while idx + n < len(fdo_lines): 

751 if not fdo_lines[idx + n].strip(): 

752 tmp_charges = np.loadtxt( 

753 fdo_lines[idx + offset:idx + n - 1], 

754 usecols=3) 

755 return tmp_charges 

756 n += 1 

757 return None 

758 #  In ONETEP there is no way to differentiate electronic entropy 

759 #  and entropy due to solvent, therefore there is no way to 

760 # extrapolate the energy at 0 K. We return the last energy 

761 #  instead. 

762 

763 def parse_energy(idx): 

764 n = 0 

765 energies = [] 

766 while idx + n < len(fdo_lines): 

767 if '| Total' in fdo_lines[idx + n]: 

768 energies.append(float(fdo_lines[idx + n].split()[-2])) 

769 if 'LOCAL ENERGY COMPONENTS FROM MATRIX TRACES' \ 

770 in fdo_lines[idx + n]: 

771 return energies[-1] * units['Hartree'] 

772 # Something is wrong with this ONETEP output 

773 if len(energies) > 2: 

774 raise RuntimeError('something is wrong with this ONETEP output') 

775 n += 1 

776 return None 

777 

778 def parse_first_cell(idx): 

779 n = 0 

780 offset = 1 

781 while idx + n < len(fdo_lines): 

782 if 'ang' in fdo_lines[idx + n].lower(): 

783 offset += 1 

784 if '%endblock lattice_cart' in fdo_lines[idx + n].lower(): 

785 cell = np.loadtxt( 

786 fdo_lines[idx + offset:idx + n] 

787 ) 

788 return cell if offset == 2 else cell * units['Bohr'] 

789 n += 1 

790 return None 

791 

792 def parse_first_positions(idx): 

793 n = 0 

794 offset = 1 

795 while idx + n < len(fdo_lines): 

796 if 'ang' in fdo_lines[idx + n]: 

797 offset += 1 

798 if '%ENDBLOCK POSITIONS_' in fdo_lines[idx + n]: 

799 if 'FRAC' in fdo_lines[idx + n]: 

800 conv_factor = 1 

801 else: 

802 conv_factor = units['Bohr'] 

803 tmp = np.loadtxt(fdo_lines[idx + offset:idx + n], 

804 dtype='str').reshape(-1, 4) 

805 els = np.char.array(tmp[:, 0]) 

806 if offset == 2: 

807 pos = tmp[:, 1:].astype(np.float32) 

808 else: 

809 pos = tmp[:, 1:].astype(np.float32) * conv_factor 

810 try: 

811 atoms = Atoms(els, pos) 

812 # ASE doesn't recognize names used in ONETEP 

813 # as chemical symbol: dig deeper 

814 except KeyError: 

815 tags, real_elements = find_correct_species( 

816 els, 

817 idx, 

818 first=True 

819 ) 

820 atoms = Atoms(real_elements, pos) 

821 atoms.set_tags(tags) 

822 atoms.info['onetep_species'] = list(els) 

823 return atoms 

824 n += 1 

825 return None 

826 

827 def parse_force(idx): 

828 n = 0 

829 while idx + n < len(fdo_lines): 

830 if '* TOTAL:' in fdo_lines[idx + n]: 

831 tmp = np.loadtxt(fdo_lines[idx + 6:idx + n - 2], 

832 dtype=np.float64, usecols=(3, 4, 5)) 

833 return tmp * units['Hartree'] / units['Bohr'] 

834 n += 1 

835 return None 

836 

837 def parse_positions(idx): 

838 n = 0 

839 offset = 7 

840 stop = 0 

841 while idx + n < len(fdo_lines): 

842 if 'xxxxxxxxxxxxxxxxxxxxxxxxx' in fdo_lines[idx + n].strip(): 

843 stop += 1 

844 if stop == 2: 

845 tmp = np.loadtxt(fdo_lines[idx + offset:idx + n], 

846 dtype='str', usecols=(1, 3, 4, 5)) 

847 els = np.char.array(tmp[:, 0]) 

848 pos = tmp[:, 1:].astype(np.float32) * units['Bohr'] 

849 try: 

850 atoms = Atoms(els, pos) 

851 # ASE doesn't recognize names used in ONETEP 

852 # as chemical symbol: dig deeper 

853 except KeyError: 

854 tags, real_elements = find_correct_species(els, idx) 

855 atoms = Atoms(real_elements, pos) 

856 atoms.set_tags(tags) 

857 atoms.info['onetep_species'] = list(els) 

858 return atoms 

859 n += 1 

860 return None 

861 

862 def parse_species(idx): 

863 n = 1 

864 element_map = {} 

865 while idx + n < len(fdo_lines): 

866 sep = fdo_lines[idx + n].split() 

867 lsline = fdo_lines[idx + n].lower().strip() 

868 if '%endblock species' == lsline: 

869 return element_map 

870 element_map[sep[0]] = sep[1] 

871 n += 1 

872 return None 

873 

874 def parse_spin(idx): 

875 n = 0 

876 offset = 4 

877 while idx + n < len(fdo_lines): 

878 if not fdo_lines[idx + n].strip(): 

879 # If no spin is present we return None 

880 try: 

881 tmp_spins = np.loadtxt( 

882 fdo_lines[idx + offset:idx + n - 1], 

883 usecols=4) 

884 except ValueError: 

885 tmp_spins = None 

886 return tmp_spins 

887 n += 1 

888 return None 

889 

890 # This is needed if ASE doesn't recognize the element 

891 def find_correct_species(els, idx, first=False): 

892 real_elements = [] 

893 tags = [] 

894 # Find nearest species block in case of 

895 # multi-output file with different species blocks. 

896 if first: 

897 closest_species = np.argmin(abs(idx - species)) 

898 else: 

899 tmp = idx - species 

900 tmp[tmp < 0] = 9999999999 

901 closest_species = np.argmin(tmp) 

902 elements_map = real_species[closest_species] 

903 for el in els: 

904 real_elements.append(elements_map[el]) 

905 tag_maybe = el.replace(elements_map[el], '') 

906 if tag_maybe.isdigit(): 

907 tags.append(int(tag_maybe)) 

908 else: 

909 tags.append(False) 

910 return tags, real_elements 

911 

912 cells = [] 

913 for idx in icells: 

914 if idx in output[ONETEP_STRESS_CELL]: 

915 cell = parse_cell(idx) if idx else None 

916 else: 

917 cell = parse_first_cell(idx) if idx else None 

918 cells.append(cell) 

919 

920 charges = [] 

921 for idx in output_corr[ONETEP_MULLIKEN]: 

922 charge = parse_charge(idx) if idx else None 

923 charges.append(charge) 

924 

925 energies = [] 

926 for idx in output_corr[ONETEP_TOTAL_ENERGY]: 

927 energy = parse_energy(idx) if idx else None 

928 energies.append(energy) 

929 

930 magmoms = [] 

931 for idx in output_corr[ONETEP_MULLIKEN]: 

932 magmom = parse_spin(idx) if idx else None 

933 magmoms.append(magmom) 

934 

935 real_species = [] 

936 for idx in species: 

937 real_specie = parse_species(idx) 

938 real_species.append(real_specie) 

939 

940 # If you are here and n_pos == 0 then it 

941 # means you read a CASTEP geom file (see line ~ 522) 

942 if n_pos > 0: 

943 positions, forces = [], [] 

944 for idx in ipositions: 

945 if idx in i_first_positions: 

946 position = parse_first_positions(idx) 

947 else: 

948 position = parse_positions(idx) 

949 if position: 

950 positions.append(position) 

951 else: 

952 n_pos -= 1 

953 break 

954 for idx in output_corr[ONETEP_FORCE]: 

955 force = parse_force(idx) if idx else None 

956 forces.append(force) 

957 

958 n_pos = len(positions) 

959 # Numpy trick to get rid of configuration that are essentially the same 

960 # in a regular geometry optimisation with internal BFGS, the first 

961 # configuration is printed three time, we get rid of it 

962 properties = [energies, forces, charges, magmoms] 

963 

964 if has_bfgs: 

965 tmp = [i.positions for i in positions] 

966 to_del = [] 

967 for i in range(len(tmp[:-1])): 

968 if is_in_bfgs(ipositions[i]): 

969 if np.array_equal(tmp[i], tmp[i + 1]): 

970 # If the deleted configuration has a property 

971 # we want to keep it 

972 for prop in properties: 

973 if prop[i + 1] is not None and prop[i] is None: 

974 prop[i] = prop[i + 1] 

975 to_del.append(i + 1) 

976 c = np.full((len(tmp)), True) 

977 c[to_del] = False 

978 energies = [energies[i] for i in range(n_pos) if c[i]] 

979 forces = [forces[i] for i in range(n_pos) if c[i]] 

980 charges = [charges[i] for i in range(n_pos) if c[i]] 

981 positions = [positions[i] for i in range(n_pos) if c[i]] 

982 ipositions = [ipositions[i] for i in range(n_pos) if c[i]] 

983 magmoms = [magmoms[i] for i in range(n_pos) if c[i]] 

984 

985 n_pos = len(positions) 

986 

987 # Prepare atom objects with all the properties 

988 if isinstance(index, int): 

989 indices = [range(n_pos)[index]] 

990 else: 

991 indices = range(n_pos)[index] 

992 

993 for idx in indices: 

994 if cells: 

995 tmp = ipositions[idx] - icells 

996 p, = np.where(tmp >= 0) 

997 tmp = tmp[p] 

998 closest_cell = np.argmin(tmp) 

999 cell = cells[p[closest_cell]] 

1000 positions[idx].set_cell(cell) 

1001 if ipositions[idx] in is_frac_positions: 

1002 positions[idx].set_scaled_positions( 

1003 positions[idx].get_positions() 

1004 ) 

1005 else: 

1006 raise RuntimeError( 

1007 'No cell found, are you sure this is a onetep output?') 

1008 positions[idx].set_initial_charges(charges[idx]) 

1009 calc = SinglePointDFTCalculator( 

1010 positions[idx], 

1011 energy=energies[idx] if energies else None, 

1012 free_energy=energies[idx] if energies else None, 

1013 forces=forces[idx] if forces else None, 

1014 charges=charges[idx] if charges else None, 

1015 magmoms=magmoms[idx] if magmoms else None, 

1016 ) 

1017 positions[idx].calc = calc 

1018 yield positions[idx]