Coverage for /builds/kinetik161/ase/ase/io/onetep.py: 81.16%

1import re

2import warnings

3from copy import deepcopy

4from os.path import basename, dirname, isfile

5from pathlib import Path

7import numpy as np

9from ase.atoms import Atoms

10from ase.calculators.singlepoint import SinglePointDFTCalculator

11from ase.cell import Cell

12from ase.units import Bohr

14no_positions_error = (

15 'no positions can be read from this onetep output '

16 'if you wish to use ASE to read onetep outputs '

17 'please use uppercase block positions in your calculations')

19unable_to_read = (

20 'unable to read this onetep output file, ending'

21)

23# taken from onetep source code,

24# does not seem to be from any known NIST data

25units = {

26 'Hartree': 27.2116529,

27 'Bohr': 1 / 1.889726134583548707935

28}

30# Want to add a functionality? add a global constant below

31ONETEP_START = "Linear-Scaling Ab Initio Total Energy Program"

32ONETEP_STOP = "Job completed:"

33ONETEP_TOTAL_ENERGY = "*** NGWF optimisation converged ***"

34ONETEP_FORCE = "* Forces *"

35ONETEP_MULLIKEN = "Mulliken Atomic Populations"

36ONETEP_IBFGS_ITER = "starting iteration"

37ONETEP_POSITION = "Cell Contents"

38ONETEP_FIRST_POSITION = "%BLOCK POSITIONS_"

39ONETEP_WRONG_FIRST_POSITION = '%block positions_'

40ONETEP_RESUMING_GEOM = 'Resuming previous ONETEP Geometry Optimisation'

41# ONETEP_CELL = "NOT IMPLEMENTED YET"

42# ONETEP_STRESS = "NOT IMPLEMENTED YET"

43ONETEP_ATOM_COUNT = "Totals:"

44ONETEP_IBFGS_IMPROVE = "improving iteration"

45ONETEP_START_GEOM = "Starting ONETEP Geometry Optimisation"

46ONETEP_SPECIES = "%BLOCK SPECIES "

47ONETEP_SPECIESL = "%block species "

48ONETEP_FIRST_CELLL = "%block lattice_cart"

49ONETEP_FIRST_CELL = "%BLOCK LATTICE_CART"

50ONETEP_STRESS_CELL = "stress_calculation: cell geometry"

53# def _alphanum_sorted(arr):

54"""

55Sort an array of strings alphanumerically.

56"""

57# def alphanum_key(key):

58# digits = re.split('([0-9]+)', key)

59# return [int(char) if char.isdigit() else char for char in digits]

60# return sorted(arr, key=alphanum_key)

63def get_onetep_keywords(path):

65 if isinstance(path, str):

66 with open(path) as fd:

67 results = read_onetep_in(fd, only_keywords=True)

68 else:

69 results = read_onetep_in(path, only_keywords=True)

71 # If there is an include file, the entire

72 # file keyword's will be included in the dict

73 # and the include_file keyword will be deleted

74 if 'include_file' in results['keywords']:

75 warnings.warn('include_file will be deleted from the dict')

76 del results['keywords']['include_file']

77 return results['keywords']

80def read_onetep_in(fd, **kwargs):

81 """

82 Read a single ONETEP input.

84 This function can be used to visually check ONETEP inputs,

85 using the ase gui. It can also be used to get access to

86 the input parameters attached to the ONETEP calculator

87 returned

89 The function should work on inputs which contain

90 'include_file' command(s), (possibly recursively

91 but untested)

93 The function should work on input which contains

94 exotic element(s) name(s) if the specie block is

95 present to map them back to real element(s)

97 Parameters

98 ----------

99 fd : io-object

100 File to read.

101

102 Return

103 ------

104 structure: Atoms

105 Atoms object with cell and a Onetep calculator

106 attached which contains the keywords dictionary

107 """

108 fdi_lines = fd.readlines()

109

110 try:

111 fd_path = Path(fd.name).resolve()

112 fd_parent = fd_path.parent

113 include_files = [fd_path]

114 except AttributeError:

115 # We are in a StringIO or something similar

116 fd_path = Path().cwd()

117 fd_parent = fd_path

118 include_files = [Path().cwd()]

119

120 def clean_lines(lines):

121 """

122 Remove indesirable line from the input

123 """

124 new_lines = []

125 for line in lines:

126 sep = re.split(r'[!#]', line.strip())[0]

127 if sep:

128 new_lines.append(sep)

129 return new_lines

130

131 # Skip comments and empty lines

132 fdi_lines = clean_lines(fdi_lines)

133

134 # Are we in a block?

135 block_start = 0

136

137 keywords = {}

138 atoms = Atoms()

139 cell = np.zeros((3, 3))

140 fractional = False

141 positions = False

142 symbols = False

143

144 # Main loop reading the input

145 for n, line in enumerate(fdi_lines):

146 line_lower = line.lower()

147 if '%block' in line_lower:

148 block_start = n + 1

149 if 'lattice_cart' in line_lower:

150 if 'ang' in fdi_lines[block_start]:

151 cell = np.loadtxt(fdi_lines[n + 2:n + 5])

152 else:

153 cell = np.loadtxt(fdi_lines[n + 1:n + 4])

154 cell *= Bohr

155

156 if not block_start:

157 if 'devel_code' in line_lower:

158 warnings.warn('devel_code is not supported')

159 continue

160 # Splits line on any valid onetep separator

161 sep = re.split(r'[:=\s]+', line)

162 keywords[sep[0]] = ' '.join(sep[1:])

163 # If include_file is used, we open the included file

164 # and insert it in the current fdi_lines...

165 # ONETEP does not work with cascade

166 # and this SHOULD NOT work with cascade

167 if 'include_file' == sep[0]:

168 name = sep[1].replace('\'', '')

169 name = name.replace('\"', '')

170 new_path = fd_parent / name

171 for path in include_files:

172 if new_path.samefile(path):

173 raise ValueError('invalid/recursive include_file')

174 new_fd = open(new_path)

175 new_lines = new_fd.readlines()

176 new_lines = clean_lines(new_lines)

177 for include_line in new_lines:

178 sep = re.split(r'[:=\s]+', include_line)

179 if 'include_file' == sep[0]:

180 raise ValueError('nested include_file')

181 fdi_lines[:] = fdi_lines[:n + 1] + \

182 new_lines + \

183 fdi_lines[n + 1:]

184 include_files.append(new_path)

185 continue

186

187 if '%endblock' in line_lower:

188 if 'positions_' in line_lower:

189 if 'ang' in fdi_lines[block_start]:

190 to_read = fdi_lines[block_start + 1:n]

191 positions = np.loadtxt(to_read, usecols=(1, 2, 3))

192 else:

193 to_read = fdi_lines[block_start:n]

194 positions = np.loadtxt(to_read, usecols=(1, 2, 3))

195 positions *= units['Bohr']

196 symbols = np.loadtxt(to_read, usecols=(0), dtype='str')

197 if 'frac' in line_lower:

198 fractional = True

199 elif '%endblock species' == line_lower.strip():

200 els = fdi_lines[block_start:n]

201 species = {}

202 for el in els:

203 sep = el.split()

204 species[sep[0]] = sep[1]

205 to_read = [i.strip() for i in fdi_lines[block_start:n]]

206 keywords['species'] = to_read

207 elif 'lattice_cart' in line_lower:

208 pass

209 else:

210 to_read = [i.strip() for i in fdi_lines[block_start:n]]

211 block_title = line_lower.replace('%endblock', '').strip()

212 keywords[block_title] = to_read

213 block_start = 0

214

215 # We don't need a fully valid onetep

216 # input to read the keywords, just

217 # the keywords

218 if kwargs.get('only_keywords', False):

219 return {'keywords': keywords}

220 # Necessary if we have only one atom

221 # Check if the cell is valid (3D)

222 if not cell.any(axis=1).all():

223 raise ValueError('invalid cell specified')

224

225 if positions is False:

226 raise ValueError('invalid position specified')

227

228 if symbols is False:

229 raise ValueError('no symbols found')

230

231 positions = positions.reshape(-1, 3)

232 symbols = symbols.reshape(-1)

233 tags = []

234 info = {'onetep_species': []}

235 for symbol in symbols:

236 label = symbol.replace(species[symbol], '')

237 if label.isdigit():

238 tags.append(int(label))

239 else:

240 tags.append(0)

241 info['onetep_species'].append(symbol)

242 atoms = Atoms([species[i] for i in symbols],

243 cell=cell,

244 pbc=True,

245 tags=tags,

246 info=info)

247 if fractional:

248 atoms.set_scaled_positions(positions / units['Bohr'])

249 else:

250 atoms.set_positions(positions)

251 results = {'atoms': atoms, 'keywords': keywords}

252 return results

253

254

255def write_onetep_in(

256 fd,

257 atoms,

258 edft=False,

259 xc='PBE',

260 ngwf_count=-1,

261 ngwf_radius=9.0,

262 keywords={},

263 pseudopotentials={},

264 pseudo_path=".",

265 pseudo_suffix=None,

266 **kwargs):

267 """

268 Write a single ONETEP input.

269

270 This function will be used by ASE to perform

271 various workflows (Opt, NEB...) or can be used

272 manually to quickly create ONETEP input file(s).

273

274 The function will first write keywords in

275 alphabetic order in lowercase. Secondly, blocks

276 will be written in alphabetic order in uppercase.

277

278 Two ways to work with the function:

279

280 - By providing only (simple) keywords present in

281 the parameters. ngwf_count and ngwf_radius

282 accept multiple types as described in the Parameters

283 section.

284

285 - If the keywords parameters is provided as a dictionary

286 these keywords will be used to write the input file and

287 will take priority.

288

289 If no pseudopotentials are provided in the parameters and

290 the function will try to look for suitable pseudopotential

291 in the pseudo_path.

292

293 Parameters

294 ----------

295 fd : file

296 File to write.

297 atoms: Atoms

298 Atoms including Cell object to write.

299 edft: Bool

300 Activate EDFT.

301 xc: str

302 DFT xc to use e.g (PBE, RPBE, ...)

303 ngwf_count: int|list|dict

304 Behaviour depends on the type:

305 int: every species will have this amount

306 of ngwfs.

307 list: list of int, will be attributed

308 alphabetically to species:

309 dict: keys are species name(s),

310 value are their number:

311 ngwf_radius: int|list|dict

312 Behaviour depends on the type:

313 float: every species will have this radius.

314 list: list of float, will be attributed

315 alphabetically to species:

316 [10.0, 9.0]

317 dict: keys are species name(s),

318 value are their radius:

319 {'Na': 9.0, 'Cl': 10.0}

320 keywords: dict

321 Dictionary with ONETEP keywords to write,

322 keywords with lists as values will be

323 treated like blocks, with each element

324 of list being a different line.

325 pseudopotentials: dict

326 Behaviour depends on the type:

327 keys are species name(s) their

328 value are the pseudopotential file to use:

329 {'Na': 'Na.usp', 'Cl': 'Cl.usp'}

330 pseudo_path: str

331 Where to look for pseudopotential, correspond

332 to the pseudo_path keyword of ONETEP.

333 pseudo_suffix: str

334 Suffix for the pseudopotential filename

335 to look for, useful if you have multiple sets of

336 pseudopotentials in pseudo_path.

337 """

338

339 label = kwargs.get('label', 'onetep')

340 try:

341 directory = kwargs.get('directory', Path(dirname(fd.name)))

342 except AttributeError:

343 directory = '.'

344 autorestart = kwargs.get('autorestart', False)

345 elements = np.array(atoms.symbols)

346 tags = np.array(atoms.get_tags())

347 species_maybe = atoms.info.get('onetep_species', False)

348 # We look if the atom.info contains onetep species information

349 # If it does, we use it, as it might contains character

350 # which are not allowed in ase tags, if not we fall back

351 # to tags and use them instead.

352 if species_maybe:

353 if set(species_maybe) != set(elements):

354 species = np.array(species_maybe)

355 else:

356 formatted_tags = np.array(['' if i == 0 else str(i) for i in tags])

357 species = np.char.add(elements, formatted_tags)

358 numbers = np.array(atoms.numbers)

359 tmp = np.argsort(species)

360 # We sort both Z and name the same

361 numbers = np.take_along_axis(numbers, tmp, axis=0)

362 # u_elements = np.take_along_axis(elements, tmp, axis=0)

363 u_species = np.take_along_axis(species, tmp, axis=0)

364 elements = np.take_along_axis(elements, tmp, axis=0)

365 # We want to keep unique but without sort: small trick with index

366 idx = np.unique(u_species, return_index=True)[1]

367 elements = elements[idx]

368 # Unique species

369 u_species = u_species[idx]

370 numbers = numbers[idx]

371 n_sp = len(u_species)

372

373 if isinstance(ngwf_count, int):

374 ngwf_count = dict(zip(u_species, [ngwf_count] * n_sp))

375 elif isinstance(ngwf_count, list):

376 ngwf_count = dict(zip(u_species, ngwf_count))

377 elif isinstance(ngwf_count, dict):

378 pass

379 else:

380 raise TypeError('ngwf_count can only be int|list|dict')

381

382 if isinstance(ngwf_radius, float):

383 ngwf_radius = dict(zip(u_species, [ngwf_radius] * n_sp))

384 elif isinstance(ngwf_radius, list):

385 ngwf_radius = dict(zip(u_species, ngwf_radius))

386 elif isinstance(ngwf_radius, dict):

387 pass

388 else:

389 raise TypeError('ngwf_radius can only be float|list|dict')

390

391 pp_files = keywords.get('pseudo_path', pseudo_path)

392 pp_files = pp_files.replace('\'', '')

393 pp_files = pp_files.replace('\"', '')

394 pp_files = Path(pp_files).glob('*')

395 pp_files = [i for i in sorted(pp_files) if i.is_file()]

396 pp_is_manual = keywords.get('species_pot', False)

397 common_suffix = ['.usp', '.recpot', '.upf', '.paw', '.psp', '.pspnc']

398 if pseudo_suffix:

399 common_suffix = [pseudo_suffix]

400 # Transform to list

401 if pp_is_manual:

402 pp_list = keywords['species_pot']

403 elif isinstance(pseudopotentials, dict):

404 pp_list = []

405 for idx, el in enumerate(u_species):

406 try:

407 pp_list.append(el + ' ' + pseudopotentials[el])

408 except KeyError:

409 for i in pp_files:

410 if elements[idx] in basename(i)[:2]:

411 for j in common_suffix:

412 if basename(i).endswith(j):

413 pp_list.append(el + ' ' + basename(i))

414 # pp_maybe = attempt_to_find_pp(elements[idx])

415 # if pp_maybe:

416 # pp_list.append(el + ' ' + pp_maybe)

417 # else:

418 # warnings.warn('No pseudopotential found for element {}'

419 # .format(el))

420 else:

421 raise TypeError('pseudopotentials object can only be dict')

422

423 default_species = []

424 for idx, el in enumerate(u_species):

425 tmp = ""

426 tmp += u_species[idx] + " " + elements[idx] + " "

427 tmp += str(numbers[idx]) + " "

428 try:

429 tmp += str(ngwf_count[el]) + " "

430 except KeyError:

431 tmp += str(ngwf_count[elements[idx]]) + " "

432 try:

433 tmp += str(ngwf_radius[el])

434 except KeyError:

435 tmp += str(ngwf_radius[elements[idx]])

436 default_species.append(tmp)

437

438 positions_abs = ['ang']

439 for s, p in zip(species, atoms.get_positions()):

440 line = '{s:>5} {0:>12.6f} {1:>12.6f} {2:>12.6f}'.format(s=s, *p)

441 positions_abs.append(line)

442

443 lattice_cart = ['ang']

444 for axis in atoms.get_cell():

445 line = '{:>16.8f} {:>16.8f} {:>16.8f}'.format(*axis)

446 lattice_cart.append(line)

447

448 # Default keywords if not provided by the user,

449 # most of them are ONETEP default, except write_forces

450 # which is always turned on.

451 default_keywords = {

452 "xc_functional": "pbe",

453 "edft": edft,

454 "cutoff_energy": 20,

455 "paw": False,

456 "task": "singlepoint",

457 "output_detail": "normal",

458 "species": default_species,

459 "pseudo_path": pseudo_path,

460 "species_pot": pp_list,

461 "positions_abs": positions_abs,

462 "lattice_cart": lattice_cart,

463 "write_forces": True,

464 "forces_output_detail": 'verbose'

465 }

466

467 # Main loop, fill the keyword dictionary

468 keywords = {key.lower(): value for key, value in keywords.items()}

469 for value in default_keywords:

470 if not keywords.get(value, None):

471 keywords[value] = default_keywords[value]

472

473 # No pseudopotential provided, we look for them in pseudo_path

474 # If autorestart is True, we look for restart files,

475 # and turn on relevant keywords...

476 if autorestart:

477 keywords['read_denskern'] = \

478 isfile(directory / (label + '.dkn'))

479 keywords['read_tightbox_ngwfs'] = \

480 isfile(directory / (label + '.tightbox_ngwfs'))

481 keywords['read_hamiltonian'] = \

482 isfile(directory / (label + '.ham'))

483

484 # If not EDFT, hamiltonian is irrelevant.

485 # print(keywords.get('edft', False))

486 # keywords['read_hamiltonian'] = \

487 # keywords.get('read_hamiltonian', False) & keywords.get('edft', False)

488

489 keywords = dict(sorted(keywords.items()))

490

491 lines = []

492 block_lines = []

493

494 for key, value in keywords.items():

495 if isinstance(value, (list, np.ndarray)):

496 if not all(isinstance(_, str) for _ in value):

497 raise TypeError('list values for blocks must be strings only')

498 block_lines.append(('\n%block ' + key).upper())

499 block_lines.extend(value)

500 block_lines.append(('%endblock ' + key).upper())

501 elif isinstance(value, bool):

502 lines.append(str(key) + " : " + str(value)[0])

503 elif isinstance(value, (str, int, float)):

504 lines.append(str(key) + " : " + str(value))

505 else:

506 raise TypeError('keyword values must be list|str|bool')

507 input_header = '!' + '-' * 78 + '!\n' + \

508 '!' + '-' * 33 + ' INPUT FILE ' + '-' * 33 + '!\n' + \

509 '!' + '-' * 78 + '!\n\n'

510

511 input_footer = '\n!' + '-' * 78 + '!\n' + \

512 '!' + '-' * 32 + ' END OF INPUT ' + '-' * 32 + '!\n' + \

513 '!' + '-' * 78 + '!'

514

515 fd.write(input_header)

516 fd.writelines(line + '\n' for line in lines)

517 fd.writelines(b_line + '\n' for b_line in block_lines)

518

519 if 'devel_code' in kwargs:

520 warnings.warn('writing devel code as it is, at the end of the file')

521 fd.writelines('\n' + line for line in kwargs['devel_code'])

522

523 fd.write(input_footer)

524

525

526def read_onetep_out(fd, index=-1, improving=False, **kwargs):

527 """

528 Read ONETEP output(s).

529

530 !!!

531 This function will be used by ASE when performing

532 various workflows (Opt, NEB...)

533 !!!

534

535 Parameters

536 ----------

537 fd : file

538 File to read.

539 index: slice

540 Which atomic configuration to read

541 improving: Bool

542 If the output is a geometry optimisation,

543 improving = True will keep line search

544 configuration from BFGS

545

546 Yields

547 ------

548 structure: Atoms|list of Atoms

549 """

550 # Put everything in memory

551 fdo_lines = fd.readlines()

552 n_lines = len(fdo_lines)

553

554 # Used to store index of important elements

555 output = {

556 ONETEP_START: [],

557 ONETEP_STOP: [],

558 ONETEP_TOTAL_ENERGY: [],

559 ONETEP_FORCE: [],

560 ONETEP_MULLIKEN: [],

561 ONETEP_POSITION: [],

562 ONETEP_FIRST_POSITION: [],

563 ONETEP_WRONG_FIRST_POSITION: [],

564 ONETEP_ATOM_COUNT: [],

565 ONETEP_IBFGS_IMPROVE: [],

566 ONETEP_IBFGS_ITER: [],

567 ONETEP_START_GEOM: [],

568 ONETEP_RESUMING_GEOM: [],

569 ONETEP_SPECIES: [],

570 ONETEP_SPECIESL: [],

571 ONETEP_FIRST_CELL: [],

572 ONETEP_FIRST_CELLL: [],

573 ONETEP_STRESS_CELL: []

574 }

575

576 # Index will be treated to get rid of duplicate or improving iterations

577 output_corr = deepcopy(output)

578

579 # Core properties that will be used in Yield

580 properties = [ONETEP_TOTAL_ENERGY, ONETEP_FORCE,

581 ONETEP_MULLIKEN, ONETEP_FIRST_CELL,

582 ONETEP_FIRST_CELLL]

583

584 # Find all matches append them to the dictionary

585 for idx, line in enumerate(fdo_lines):

586 match = False

587 for key in output:

588 # The second condition is for species block where

589 # we have to make sure there is nothing after the word

590 # 'species' but sometimes no trailing space will

591 # be present.

592 if key in line or \

593 key.strip() == line.strip():

594 match = key

595 break

596 if match:

597 output[match].append(idx)

598 # output[match].append(idx)

599 # If a calculation died in the middle of nowhere...

600 # Might be needed, keeping it here

601 # if len(output[ONETEP_START]) - len(output[ONETEP_STOP]) > 1:

602 # output[ONETEP_STOP].append(i - 1)

603

604 # Everything is numpy

605 output = {key: np.array(value) for key, value in output.items()}

606 # Conveniance notation (pointers: no overhead, no additional memory)

607 ibfgs_iter = output[ONETEP_IBFGS_ITER]

608 ibfgs_start = output[ONETEP_START_GEOM]

609 ibfgs_improve = output[ONETEP_IBFGS_IMPROVE]

610 ibfgs_resume = output[ONETEP_RESUMING_GEOM]

611 i_first_positions = output[ONETEP_FIRST_POSITION]

612 is_frac_positions = [i for i in i_first_positions if 'FRAC' in fdo_lines[i]]

613

614 # In onetep species can have arbritary names,

615 # We want to map them to real element names

616 # Via the species block

617 species = np.concatenate((output[ONETEP_SPECIES],

618 output[ONETEP_SPECIESL])).astype(np.int32)

619

620 icells = np.hstack(

621 (output[ONETEP_FIRST_CELL],

622 output[ONETEP_FIRST_CELLL],

623 output[ONETEP_STRESS_CELL])

624 )

625 icells = icells.astype(np.int32)

626 # Using the fact that 0 == False and > 0 == True

627 has_bfgs = len(ibfgs_iter) \

628 + len(output[ONETEP_START_GEOM]) \

629 + len(output[ONETEP_RESUMING_GEOM])

630

631 has_bfgs_improve = len(ibfgs_improve)

632 has_bfgs_resume = len(ibfgs_resume)

633 # When the input block position is written in lowercase

634 # ONETEP does not print the initial position but a hash

635 # of it, might be needed

636 has_hash = len(output[ONETEP_WRONG_FIRST_POSITION])

637

638 def is_in_bfgs(idx):

639 """

640 Check if a given index is in a BFGS block

641 """

642 for past, future in zip(output[ONETEP_START], np.hstack(

643 (output[ONETEP_START][1:], [n_lines]))):

644 if past < idx < future:

645 if np.any((past < ibfgs_start) & (ibfgs_start < future)) or \

646 np.any((past < ibfgs_resume) & (ibfgs_resume < future)):

647 return True

648 return False

649

650 # If onetep has bfgs, the first atomic positions

651 # Will be printed multiple times, we don't add them

652 if has_bfgs:

653 to_del = []

654 for idx, tmp in enumerate(i_first_positions):

655 if is_in_bfgs(tmp):

656 to_del.append(idx)

657 i_first_positions = np.delete(i_first_positions, to_del)

658

659 ipositions = np.hstack((output[ONETEP_POSITION],

660 i_first_positions)).astype(np.int32)

661 ipositions = np.sort(ipositions)

662

663 n_pos = len(ipositions)

664

665 # Some ONETEP files will not have any positions

666 # due to how the software is coded. As a last

667 # resort we look for a geom file with the same label.

668 if n_pos == 0:

669 name = fd.name

670 label_maybe = basename(name).split('.')[0]

671 geom_maybe = label_maybe + '.geom'

672 if isfile(geom_maybe):

673 from ase.io import read

674 positions = read(geom_maybe, index="::",

675 format='castep-geom',

676 units={

677 'Eh': units['Hartree'],

678 'a0': units['Bohr']

679 }

680 )

681 forces = [i.get_forces() for i in positions]

682 has_bfgs = False

683 has_bfgs_improve = False

684 # way to make everything work

685 ipositions = np.hstack(([0], output[ONETEP_IBFGS_ITER]))

686 else:

687 if has_hash:

688 raise RuntimeError(no_positions_error)

689 raise RuntimeError(unable_to_read)

690

691 to_del = []

692

693 # Important loop which:

694 # - Get rid of improving BFGS iteration if improving == False

695 # - Append None to properties to make sure each properties will

696 # have the same length and each index correspond to the right

697 # atomic configuration (hopefully).

698 # Past is the index of the current atomic conf, future is the

699 # index of the next one.

700 for idx, (past, future) in enumerate(

701 zip(ipositions, np.hstack((ipositions[1:], [n_lines])))):

702 if has_bfgs:

703 # BFGS resume prints the configuration at the beggining,

704 # we don't want it

705 if has_bfgs_resume:

706 closest_resume = np.min(np.abs(past - ibfgs_resume))

707 closest_starting = np.min(np.abs(past - ibfgs_iter))

708 if closest_resume < closest_starting:

709 to_del.append(idx)

710 continue

711 if has_bfgs_improve and not improving:

712 # Find closest improve iteration index

713 closest_improve = np.min(np.abs(past - ibfgs_improve))

714 # Find closest normal iteration index

715 closest_iter = np.min(np.abs(past - ibfgs_iter))

716 if len(ibfgs_start):

717 closest_starting = np.min(np.abs(past - ibfgs_start))

718 closest = np.min([closest_iter, closest_starting])

719 else:

720 closest = closest_iter

721 # If improve is closer we delete

722 if closest_improve < closest:

723 to_del.append(idx)

724 continue

725

726 # We append None if no properties in contained for

727 # one specific atomic configurations.

728 for prop in properties:

729 tmp, = np.where((past < output[prop]) & (output[prop] <= future))

730 if len(tmp) == 0:

731 output_corr[prop].append(None)

732 else:

733 output_corr[prop].extend(output[prop][tmp[:1]])

734

735 # We effectively delete unwanted atomic configurations

736 if to_del:

737 new_indices = np.setdiff1d(np.arange(n_pos), to_del)

738 ipositions = ipositions[new_indices]

739

740 # Bunch of methods to grep properties from output.

741 def parse_cell(idx):

742 a, b, c = np.loadtxt([fdo_lines[idx + 2]]) * units['Bohr']

743 al, be, ga = np.loadtxt([fdo_lines[idx + 4]])

744 cell = Cell.fromcellpar([a, b, c, al, be, ga])

745 return np.array(cell)

746

747 def parse_charge(idx):

748 n = 0

749 offset = 4

750 while idx + n < len(fdo_lines):

751 if not fdo_lines[idx + n].strip():

752 tmp_charges = np.loadtxt(

753 fdo_lines[idx + offset:idx + n - 1],

754 usecols=3)

755 return tmp_charges

756 n += 1

757 return None

758 # In ONETEP there is no way to differentiate electronic entropy

759 # and entropy due to solvent, therefore there is no way to

760 # extrapolate the energy at 0 K. We return the last energy

761 # instead.

762

763 def parse_energy(idx):

764 n = 0

765 energies = []

766 while idx + n < len(fdo_lines):

767 if '| Total' in fdo_lines[idx + n]:

768 energies.append(float(fdo_lines[idx + n].split()[-2]))

769 if 'LOCAL ENERGY COMPONENTS FROM MATRIX TRACES' \

770 in fdo_lines[idx + n]:

771 return energies[-1] * units['Hartree']

772 # Something is wrong with this ONETEP output

773 if len(energies) > 2:

774 raise RuntimeError('something is wrong with this ONETEP output')

775 n += 1

776 return None

777

778 def parse_first_cell(idx):

779 n = 0

780 offset = 1

781 while idx + n < len(fdo_lines):

782 if 'ang' in fdo_lines[idx + n].lower():

783 offset += 1

784 if '%endblock lattice_cart' in fdo_lines[idx + n].lower():

785 cell = np.loadtxt(

786 fdo_lines[idx + offset:idx + n]

787 )

788 return cell if offset == 2 else cell * units['Bohr']

789 n += 1

790 return None

791

792 def parse_first_positions(idx):

793 n = 0

794 offset = 1

795 while idx + n < len(fdo_lines):

796 if 'ang' in fdo_lines[idx + n]:

797 offset += 1

798 if '%ENDBLOCK POSITIONS_' in fdo_lines[idx + n]:

799 if 'FRAC' in fdo_lines[idx + n]:

800 conv_factor = 1

801 else:

802 conv_factor = units['Bohr']

803 tmp = np.loadtxt(fdo_lines[idx + offset:idx + n],

804 dtype='str').reshape(-1, 4)

805 els = np.char.array(tmp[:, 0])

806 if offset == 2:

807 pos = tmp[:, 1:].astype(np.float32)

808 else:

809 pos = tmp[:, 1:].astype(np.float32) * conv_factor

810 try:

811 atoms = Atoms(els, pos)

812 # ASE doesn't recognize names used in ONETEP

813 # as chemical symbol: dig deeper

814 except KeyError:

815 tags, real_elements = find_correct_species(

816 els,

817 idx,

818 first=True

819 )

820 atoms = Atoms(real_elements, pos)

821 atoms.set_tags(tags)

822 atoms.info['onetep_species'] = list(els)

823 return atoms

824 n += 1

825 return None

826

827 def parse_force(idx):

828 n = 0

829 while idx + n < len(fdo_lines):

830 if '* TOTAL:' in fdo_lines[idx + n]:

831 tmp = np.loadtxt(fdo_lines[idx + 6:idx + n - 2],

832 dtype=np.float64, usecols=(3, 4, 5))

833 return tmp * units['Hartree'] / units['Bohr']

834 n += 1

835 return None

836

837 def parse_positions(idx):

838 n = 0

839 offset = 7

840 stop = 0

841 while idx + n < len(fdo_lines):

842 if 'xxxxxxxxxxxxxxxxxxxxxxxxx' in fdo_lines[idx + n].strip():

843 stop += 1

844 if stop == 2:

845 tmp = np.loadtxt(fdo_lines[idx + offset:idx + n],

846 dtype='str', usecols=(1, 3, 4, 5))

847 els = np.char.array(tmp[:, 0])

848 pos = tmp[:, 1:].astype(np.float32) * units['Bohr']

849 try:

850 atoms = Atoms(els, pos)

851 # ASE doesn't recognize names used in ONETEP

852 # as chemical symbol: dig deeper

853 except KeyError:

854 tags, real_elements = find_correct_species(els, idx)

855 atoms = Atoms(real_elements, pos)

856 atoms.set_tags(tags)

857 atoms.info['onetep_species'] = list(els)

858 return atoms

859 n += 1

860 return None

861

862 def parse_species(idx):

863 n = 1

864 element_map = {}

865 while idx + n < len(fdo_lines):

866 sep = fdo_lines[idx + n].split()

867 lsline = fdo_lines[idx + n].lower().strip()

868 if '%endblock species' == lsline:

869 return element_map

870 element_map[sep[0]] = sep[1]

871 n += 1

872 return None

873

874 def parse_spin(idx):

875 n = 0

876 offset = 4

877 while idx + n < len(fdo_lines):

878 if not fdo_lines[idx + n].strip():

879 # If no spin is present we return None

880 try:

881 tmp_spins = np.loadtxt(

882 fdo_lines[idx + offset:idx + n - 1],

883 usecols=4)

884 except ValueError:

885 tmp_spins = None

886 return tmp_spins

887 n += 1

888 return None

889

890 # This is needed if ASE doesn't recognize the element

891 def find_correct_species(els, idx, first=False):

892 real_elements = []

893 tags = []

894 # Find nearest species block in case of

895 # multi-output file with different species blocks.

896 if first:

897 closest_species = np.argmin(abs(idx - species))

898 else:

899 tmp = idx - species

900 tmp[tmp < 0] = 9999999999

901 closest_species = np.argmin(tmp)

902 elements_map = real_species[closest_species]

903 for el in els:

904 real_elements.append(elements_map[el])

905 tag_maybe = el.replace(elements_map[el], '')

906 if tag_maybe.isdigit():

907 tags.append(int(tag_maybe))

908 else:

909 tags.append(False)

910 return tags, real_elements

911

912 cells = []

913 for idx in icells:

914 if idx in output[ONETEP_STRESS_CELL]:

915 cell = parse_cell(idx) if idx else None

916 else:

917 cell = parse_first_cell(idx) if idx else None

918 cells.append(cell)

919

920 charges = []

921 for idx in output_corr[ONETEP_MULLIKEN]:

922 charge = parse_charge(idx) if idx else None

923 charges.append(charge)

924

925 energies = []

926 for idx in output_corr[ONETEP_TOTAL_ENERGY]:

927 energy = parse_energy(idx) if idx else None

928 energies.append(energy)

929

930 magmoms = []

931 for idx in output_corr[ONETEP_MULLIKEN]:

932 magmom = parse_spin(idx) if idx else None

933 magmoms.append(magmom)

934

935 real_species = []

936 for idx in species:

937 real_specie = parse_species(idx)

938 real_species.append(real_specie)

939

940 # If you are here and n_pos == 0 then it

941 # means you read a CASTEP geom file (see line ~ 522)

942 if n_pos > 0:

943 positions, forces = [], []

944 for idx in ipositions:

945 if idx in i_first_positions:

946 position = parse_first_positions(idx)

947 else:

948 position = parse_positions(idx)

949 if position:

950 positions.append(position)

951 else:

952 n_pos -= 1

953 break

954 for idx in output_corr[ONETEP_FORCE]:

955 force = parse_force(idx) if idx else None

956 forces.append(force)

957

958 n_pos = len(positions)

959 # Numpy trick to get rid of configuration that are essentially the same

960 # in a regular geometry optimisation with internal BFGS, the first

961 # configuration is printed three time, we get rid of it

962 properties = [energies, forces, charges, magmoms]

963

964 if has_bfgs:

965 tmp = [i.positions for i in positions]

966 to_del = []

967 for i in range(len(tmp[:-1])):

968 if is_in_bfgs(ipositions[i]):

969 if np.array_equal(tmp[i], tmp[i + 1]):

970 # If the deleted configuration has a property

971 # we want to keep it

972 for prop in properties:

973 if prop[i + 1] is not None and prop[i] is None:

974 prop[i] = prop[i + 1]

975 to_del.append(i + 1)

976 c = np.full((len(tmp)), True)

977 c[to_del] = False

978 energies = [energies[i] for i in range(n_pos) if c[i]]

979 forces = [forces[i] for i in range(n_pos) if c[i]]

980 charges = [charges[i] for i in range(n_pos) if c[i]]

981 positions = [positions[i] for i in range(n_pos) if c[i]]

982 ipositions = [ipositions[i] for i in range(n_pos) if c[i]]

983 magmoms = [magmoms[i] for i in range(n_pos) if c[i]]

984

985 n_pos = len(positions)

986

987 # Prepare atom objects with all the properties

988 if isinstance(index, int):

989 indices = [range(n_pos)[index]]

990 else:

991 indices = range(n_pos)[index]

992

993 for idx in indices:

994 if cells:

995 tmp = ipositions[idx] - icells

996 p, = np.where(tmp >= 0)

997 tmp = tmp[p]

998 closest_cell = np.argmin(tmp)

999 cell = cells[p[closest_cell]]

1000 positions[idx].set_cell(cell)

1001 if ipositions[idx] in is_frac_positions:

1002 positions[idx].set_scaled_positions(

1003 positions[idx].get_positions()

1004 )

1005 else:

1006 raise RuntimeError(

1007 'No cell found, are you sure this is a onetep output?')

1008 positions[idx].set_initial_charges(charges[idx])

1009 calc = SinglePointDFTCalculator(

1010 positions[idx],

1011 energy=energies[idx] if energies else None,

1012 free_energy=energies[idx] if energies else None,

1013 forces=forces[idx] if forces else None,

1014 charges=charges[idx] if charges else None,

1015 magmoms=magmoms[idx] if magmoms else None,

1016 )

1017 positions[idx].calc = calc

1018 yield positions[idx]