Coverage for /builds/kinetik161/ase/ase/io/onetep.py: 81.16%
552 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
1import re
2import warnings
3from copy import deepcopy
4from os.path import basename, dirname, isfile
5from pathlib import Path
7import numpy as np
9from ase.atoms import Atoms
10from ase.calculators.singlepoint import SinglePointDFTCalculator
11from ase.cell import Cell
12from ase.units import Bohr
14no_positions_error = (
15 'no positions can be read from this onetep output '
16 'if you wish to use ASE to read onetep outputs '
17 'please use uppercase block positions in your calculations')
19unable_to_read = (
20 'unable to read this onetep output file, ending'
21)
23# taken from onetep source code,
24# does not seem to be from any known NIST data
25units = {
26 'Hartree': 27.2116529,
27 'Bohr': 1 / 1.889726134583548707935
28}
30# Want to add a functionality? add a global constant below
31ONETEP_START = "Linear-Scaling Ab Initio Total Energy Program"
32ONETEP_STOP = "Job completed:"
33ONETEP_TOTAL_ENERGY = "*** NGWF optimisation converged ***"
34ONETEP_FORCE = "* Forces *"
35ONETEP_MULLIKEN = "Mulliken Atomic Populations"
36ONETEP_IBFGS_ITER = "starting iteration"
37ONETEP_POSITION = "Cell Contents"
38ONETEP_FIRST_POSITION = "%BLOCK POSITIONS_"
39ONETEP_WRONG_FIRST_POSITION = '%block positions_'
40ONETEP_RESUMING_GEOM = 'Resuming previous ONETEP Geometry Optimisation'
41# ONETEP_CELL = "NOT IMPLEMENTED YET"
42# ONETEP_STRESS = "NOT IMPLEMENTED YET"
43ONETEP_ATOM_COUNT = "Totals:"
44ONETEP_IBFGS_IMPROVE = "improving iteration"
45ONETEP_START_GEOM = "Starting ONETEP Geometry Optimisation"
46ONETEP_SPECIES = "%BLOCK SPECIES "
47ONETEP_SPECIESL = "%block species "
48ONETEP_FIRST_CELLL = "%block lattice_cart"
49ONETEP_FIRST_CELL = "%BLOCK LATTICE_CART"
50ONETEP_STRESS_CELL = "stress_calculation: cell geometry"
53# def _alphanum_sorted(arr):
54"""
55Sort an array of strings alphanumerically.
56"""
57# def alphanum_key(key):
58# digits = re.split('([0-9]+)', key)
59# return [int(char) if char.isdigit() else char for char in digits]
60# return sorted(arr, key=alphanum_key)
63def get_onetep_keywords(path):
65 if isinstance(path, str):
66 with open(path) as fd:
67 results = read_onetep_in(fd, only_keywords=True)
68 else:
69 results = read_onetep_in(path, only_keywords=True)
71 # If there is an include file, the entire
72 # file keyword's will be included in the dict
73 # and the include_file keyword will be deleted
74 if 'include_file' in results['keywords']:
75 warnings.warn('include_file will be deleted from the dict')
76 del results['keywords']['include_file']
77 return results['keywords']
80def read_onetep_in(fd, **kwargs):
81 """
82 Read a single ONETEP input.
84 This function can be used to visually check ONETEP inputs,
85 using the ase gui. It can also be used to get access to
86 the input parameters attached to the ONETEP calculator
87 returned
89 The function should work on inputs which contain
90 'include_file' command(s), (possibly recursively
91 but untested)
93 The function should work on input which contains
94 exotic element(s) name(s) if the specie block is
95 present to map them back to real element(s)
97 Parameters
98 ----------
99 fd : io-object
100 File to read.
102 Return
103 ------
104 structure: Atoms
105 Atoms object with cell and a Onetep calculator
106 attached which contains the keywords dictionary
107 """
108 fdi_lines = fd.readlines()
110 try:
111 fd_path = Path(fd.name).resolve()
112 fd_parent = fd_path.parent
113 include_files = [fd_path]
114 except AttributeError:
115 # We are in a StringIO or something similar
116 fd_path = Path().cwd()
117 fd_parent = fd_path
118 include_files = [Path().cwd()]
120 def clean_lines(lines):
121 """
122 Remove indesirable line from the input
123 """
124 new_lines = []
125 for line in lines:
126 sep = re.split(r'[!#]', line.strip())[0]
127 if sep:
128 new_lines.append(sep)
129 return new_lines
131 # Skip comments and empty lines
132 fdi_lines = clean_lines(fdi_lines)
134 # Are we in a block?
135 block_start = 0
137 keywords = {}
138 atoms = Atoms()
139 cell = np.zeros((3, 3))
140 fractional = False
141 positions = False
142 symbols = False
144 # Main loop reading the input
145 for n, line in enumerate(fdi_lines):
146 line_lower = line.lower()
147 if '%block' in line_lower:
148 block_start = n + 1
149 if 'lattice_cart' in line_lower:
150 if 'ang' in fdi_lines[block_start]:
151 cell = np.loadtxt(fdi_lines[n + 2:n + 5])
152 else:
153 cell = np.loadtxt(fdi_lines[n + 1:n + 4])
154 cell *= Bohr
156 if not block_start:
157 if 'devel_code' in line_lower:
158 warnings.warn('devel_code is not supported')
159 continue
160 # Splits line on any valid onetep separator
161 sep = re.split(r'[:=\s]+', line)
162 keywords[sep[0]] = ' '.join(sep[1:])
163 # If include_file is used, we open the included file
164 # and insert it in the current fdi_lines...
165 # ONETEP does not work with cascade
166 # and this SHOULD NOT work with cascade
167 if 'include_file' == sep[0]:
168 name = sep[1].replace('\'', '')
169 name = name.replace('\"', '')
170 new_path = fd_parent / name
171 for path in include_files:
172 if new_path.samefile(path):
173 raise ValueError('invalid/recursive include_file')
174 new_fd = open(new_path)
175 new_lines = new_fd.readlines()
176 new_lines = clean_lines(new_lines)
177 for include_line in new_lines:
178 sep = re.split(r'[:=\s]+', include_line)
179 if 'include_file' == sep[0]:
180 raise ValueError('nested include_file')
181 fdi_lines[:] = fdi_lines[:n + 1] + \
182 new_lines + \
183 fdi_lines[n + 1:]
184 include_files.append(new_path)
185 continue
187 if '%endblock' in line_lower:
188 if 'positions_' in line_lower:
189 if 'ang' in fdi_lines[block_start]:
190 to_read = fdi_lines[block_start + 1:n]
191 positions = np.loadtxt(to_read, usecols=(1, 2, 3))
192 else:
193 to_read = fdi_lines[block_start:n]
194 positions = np.loadtxt(to_read, usecols=(1, 2, 3))
195 positions *= units['Bohr']
196 symbols = np.loadtxt(to_read, usecols=(0), dtype='str')
197 if 'frac' in line_lower:
198 fractional = True
199 elif '%endblock species' == line_lower.strip():
200 els = fdi_lines[block_start:n]
201 species = {}
202 for el in els:
203 sep = el.split()
204 species[sep[0]] = sep[1]
205 to_read = [i.strip() for i in fdi_lines[block_start:n]]
206 keywords['species'] = to_read
207 elif 'lattice_cart' in line_lower:
208 pass
209 else:
210 to_read = [i.strip() for i in fdi_lines[block_start:n]]
211 block_title = line_lower.replace('%endblock', '').strip()
212 keywords[block_title] = to_read
213 block_start = 0
215 # We don't need a fully valid onetep
216 # input to read the keywords, just
217 # the keywords
218 if kwargs.get('only_keywords', False):
219 return {'keywords': keywords}
220 # Necessary if we have only one atom
221 # Check if the cell is valid (3D)
222 if not cell.any(axis=1).all():
223 raise ValueError('invalid cell specified')
225 if positions is False:
226 raise ValueError('invalid position specified')
228 if symbols is False:
229 raise ValueError('no symbols found')
231 positions = positions.reshape(-1, 3)
232 symbols = symbols.reshape(-1)
233 tags = []
234 info = {'onetep_species': []}
235 for symbol in symbols:
236 label = symbol.replace(species[symbol], '')
237 if label.isdigit():
238 tags.append(int(label))
239 else:
240 tags.append(0)
241 info['onetep_species'].append(symbol)
242 atoms = Atoms([species[i] for i in symbols],
243 cell=cell,
244 pbc=True,
245 tags=tags,
246 info=info)
247 if fractional:
248 atoms.set_scaled_positions(positions / units['Bohr'])
249 else:
250 atoms.set_positions(positions)
251 results = {'atoms': atoms, 'keywords': keywords}
252 return results
255def write_onetep_in(
256 fd,
257 atoms,
258 edft=False,
259 xc='PBE',
260 ngwf_count=-1,
261 ngwf_radius=9.0,
262 keywords={},
263 pseudopotentials={},
264 pseudo_path=".",
265 pseudo_suffix=None,
266 **kwargs):
267 """
268 Write a single ONETEP input.
270 This function will be used by ASE to perform
271 various workflows (Opt, NEB...) or can be used
272 manually to quickly create ONETEP input file(s).
274 The function will first write keywords in
275 alphabetic order in lowercase. Secondly, blocks
276 will be written in alphabetic order in uppercase.
278 Two ways to work with the function:
280 - By providing only (simple) keywords present in
281 the parameters. ngwf_count and ngwf_radius
282 accept multiple types as described in the Parameters
283 section.
285 - If the keywords parameters is provided as a dictionary
286 these keywords will be used to write the input file and
287 will take priority.
289 If no pseudopotentials are provided in the parameters and
290 the function will try to look for suitable pseudopotential
291 in the pseudo_path.
293 Parameters
294 ----------
295 fd : file
296 File to write.
297 atoms: Atoms
298 Atoms including Cell object to write.
299 edft: Bool
300 Activate EDFT.
301 xc: str
302 DFT xc to use e.g (PBE, RPBE, ...)
303 ngwf_count: int|list|dict
304 Behaviour depends on the type:
305 int: every species will have this amount
306 of ngwfs.
307 list: list of int, will be attributed
308 alphabetically to species:
309 dict: keys are species name(s),
310 value are their number:
311 ngwf_radius: int|list|dict
312 Behaviour depends on the type:
313 float: every species will have this radius.
314 list: list of float, will be attributed
315 alphabetically to species:
316 [10.0, 9.0]
317 dict: keys are species name(s),
318 value are their radius:
319 {'Na': 9.0, 'Cl': 10.0}
320 keywords: dict
321 Dictionary with ONETEP keywords to write,
322 keywords with lists as values will be
323 treated like blocks, with each element
324 of list being a different line.
325 pseudopotentials: dict
326 Behaviour depends on the type:
327 keys are species name(s) their
328 value are the pseudopotential file to use:
329 {'Na': 'Na.usp', 'Cl': 'Cl.usp'}
330 pseudo_path: str
331 Where to look for pseudopotential, correspond
332 to the pseudo_path keyword of ONETEP.
333 pseudo_suffix: str
334 Suffix for the pseudopotential filename
335 to look for, useful if you have multiple sets of
336 pseudopotentials in pseudo_path.
337 """
339 label = kwargs.get('label', 'onetep')
340 try:
341 directory = kwargs.get('directory', Path(dirname(fd.name)))
342 except AttributeError:
343 directory = '.'
344 autorestart = kwargs.get('autorestart', False)
345 elements = np.array(atoms.symbols)
346 tags = np.array(atoms.get_tags())
347 species_maybe = atoms.info.get('onetep_species', False)
348 # We look if the atom.info contains onetep species information
349 # If it does, we use it, as it might contains character
350 # which are not allowed in ase tags, if not we fall back
351 # to tags and use them instead.
352 if species_maybe:
353 if set(species_maybe) != set(elements):
354 species = np.array(species_maybe)
355 else:
356 formatted_tags = np.array(['' if i == 0 else str(i) for i in tags])
357 species = np.char.add(elements, formatted_tags)
358 numbers = np.array(atoms.numbers)
359 tmp = np.argsort(species)
360 # We sort both Z and name the same
361 numbers = np.take_along_axis(numbers, tmp, axis=0)
362 # u_elements = np.take_along_axis(elements, tmp, axis=0)
363 u_species = np.take_along_axis(species, tmp, axis=0)
364 elements = np.take_along_axis(elements, tmp, axis=0)
365 # We want to keep unique but without sort: small trick with index
366 idx = np.unique(u_species, return_index=True)[1]
367 elements = elements[idx]
368 # Unique species
369 u_species = u_species[idx]
370 numbers = numbers[idx]
371 n_sp = len(u_species)
373 if isinstance(ngwf_count, int):
374 ngwf_count = dict(zip(u_species, [ngwf_count] * n_sp))
375 elif isinstance(ngwf_count, list):
376 ngwf_count = dict(zip(u_species, ngwf_count))
377 elif isinstance(ngwf_count, dict):
378 pass
379 else:
380 raise TypeError('ngwf_count can only be int|list|dict')
382 if isinstance(ngwf_radius, float):
383 ngwf_radius = dict(zip(u_species, [ngwf_radius] * n_sp))
384 elif isinstance(ngwf_radius, list):
385 ngwf_radius = dict(zip(u_species, ngwf_radius))
386 elif isinstance(ngwf_radius, dict):
387 pass
388 else:
389 raise TypeError('ngwf_radius can only be float|list|dict')
391 pp_files = keywords.get('pseudo_path', pseudo_path)
392 pp_files = pp_files.replace('\'', '')
393 pp_files = pp_files.replace('\"', '')
394 pp_files = Path(pp_files).glob('*')
395 pp_files = [i for i in sorted(pp_files) if i.is_file()]
396 pp_is_manual = keywords.get('species_pot', False)
397 common_suffix = ['.usp', '.recpot', '.upf', '.paw', '.psp', '.pspnc']
398 if pseudo_suffix:
399 common_suffix = [pseudo_suffix]
400 # Transform to list
401 if pp_is_manual:
402 pp_list = keywords['species_pot']
403 elif isinstance(pseudopotentials, dict):
404 pp_list = []
405 for idx, el in enumerate(u_species):
406 try:
407 pp_list.append(el + ' ' + pseudopotentials[el])
408 except KeyError:
409 for i in pp_files:
410 if elements[idx] in basename(i)[:2]:
411 for j in common_suffix:
412 if basename(i).endswith(j):
413 pp_list.append(el + ' ' + basename(i))
414 # pp_maybe = attempt_to_find_pp(elements[idx])
415 # if pp_maybe:
416 # pp_list.append(el + ' ' + pp_maybe)
417 # else:
418 # warnings.warn('No pseudopotential found for element {}'
419 # .format(el))
420 else:
421 raise TypeError('pseudopotentials object can only be dict')
423 default_species = []
424 for idx, el in enumerate(u_species):
425 tmp = ""
426 tmp += u_species[idx] + " " + elements[idx] + " "
427 tmp += str(numbers[idx]) + " "
428 try:
429 tmp += str(ngwf_count[el]) + " "
430 except KeyError:
431 tmp += str(ngwf_count[elements[idx]]) + " "
432 try:
433 tmp += str(ngwf_radius[el])
434 except KeyError:
435 tmp += str(ngwf_radius[elements[idx]])
436 default_species.append(tmp)
438 positions_abs = ['ang']
439 for s, p in zip(species, atoms.get_positions()):
440 line = '{s:>5} {0:>12.6f} {1:>12.6f} {2:>12.6f}'.format(s=s, *p)
441 positions_abs.append(line)
443 lattice_cart = ['ang']
444 for axis in atoms.get_cell():
445 line = '{:>16.8f} {:>16.8f} {:>16.8f}'.format(*axis)
446 lattice_cart.append(line)
448 # Default keywords if not provided by the user,
449 # most of them are ONETEP default, except write_forces
450 # which is always turned on.
451 default_keywords = {
452 "xc_functional": "pbe",
453 "edft": edft,
454 "cutoff_energy": 20,
455 "paw": False,
456 "task": "singlepoint",
457 "output_detail": "normal",
458 "species": default_species,
459 "pseudo_path": pseudo_path,
460 "species_pot": pp_list,
461 "positions_abs": positions_abs,
462 "lattice_cart": lattice_cart,
463 "write_forces": True,
464 "forces_output_detail": 'verbose'
465 }
467 # Main loop, fill the keyword dictionary
468 keywords = {key.lower(): value for key, value in keywords.items()}
469 for value in default_keywords:
470 if not keywords.get(value, None):
471 keywords[value] = default_keywords[value]
473 # No pseudopotential provided, we look for them in pseudo_path
474 # If autorestart is True, we look for restart files,
475 # and turn on relevant keywords...
476 if autorestart:
477 keywords['read_denskern'] = \
478 isfile(directory / (label + '.dkn'))
479 keywords['read_tightbox_ngwfs'] = \
480 isfile(directory / (label + '.tightbox_ngwfs'))
481 keywords['read_hamiltonian'] = \
482 isfile(directory / (label + '.ham'))
484 # If not EDFT, hamiltonian is irrelevant.
485 # print(keywords.get('edft', False))
486 # keywords['read_hamiltonian'] = \
487 # keywords.get('read_hamiltonian', False) & keywords.get('edft', False)
489 keywords = dict(sorted(keywords.items()))
491 lines = []
492 block_lines = []
494 for key, value in keywords.items():
495 if isinstance(value, (list, np.ndarray)):
496 if not all(isinstance(_, str) for _ in value):
497 raise TypeError('list values for blocks must be strings only')
498 block_lines.append(('\n%block ' + key).upper())
499 block_lines.extend(value)
500 block_lines.append(('%endblock ' + key).upper())
501 elif isinstance(value, bool):
502 lines.append(str(key) + " : " + str(value)[0])
503 elif isinstance(value, (str, int, float)):
504 lines.append(str(key) + " : " + str(value))
505 else:
506 raise TypeError('keyword values must be list|str|bool')
507 input_header = '!' + '-' * 78 + '!\n' + \
508 '!' + '-' * 33 + ' INPUT FILE ' + '-' * 33 + '!\n' + \
509 '!' + '-' * 78 + '!\n\n'
511 input_footer = '\n!' + '-' * 78 + '!\n' + \
512 '!' + '-' * 32 + ' END OF INPUT ' + '-' * 32 + '!\n' + \
513 '!' + '-' * 78 + '!'
515 fd.write(input_header)
516 fd.writelines(line + '\n' for line in lines)
517 fd.writelines(b_line + '\n' for b_line in block_lines)
519 if 'devel_code' in kwargs:
520 warnings.warn('writing devel code as it is, at the end of the file')
521 fd.writelines('\n' + line for line in kwargs['devel_code'])
523 fd.write(input_footer)
526def read_onetep_out(fd, index=-1, improving=False, **kwargs):
527 """
528 Read ONETEP output(s).
530 !!!
531 This function will be used by ASE when performing
532 various workflows (Opt, NEB...)
533 !!!
535 Parameters
536 ----------
537 fd : file
538 File to read.
539 index: slice
540 Which atomic configuration to read
541 improving: Bool
542 If the output is a geometry optimisation,
543 improving = True will keep line search
544 configuration from BFGS
546 Yields
547 ------
548 structure: Atoms|list of Atoms
549 """
550 # Put everything in memory
551 fdo_lines = fd.readlines()
552 n_lines = len(fdo_lines)
554 # Used to store index of important elements
555 output = {
556 ONETEP_START: [],
557 ONETEP_STOP: [],
558 ONETEP_TOTAL_ENERGY: [],
559 ONETEP_FORCE: [],
560 ONETEP_MULLIKEN: [],
561 ONETEP_POSITION: [],
562 ONETEP_FIRST_POSITION: [],
563 ONETEP_WRONG_FIRST_POSITION: [],
564 ONETEP_ATOM_COUNT: [],
565 ONETEP_IBFGS_IMPROVE: [],
566 ONETEP_IBFGS_ITER: [],
567 ONETEP_START_GEOM: [],
568 ONETEP_RESUMING_GEOM: [],
569 ONETEP_SPECIES: [],
570 ONETEP_SPECIESL: [],
571 ONETEP_FIRST_CELL: [],
572 ONETEP_FIRST_CELLL: [],
573 ONETEP_STRESS_CELL: []
574 }
576 # Index will be treated to get rid of duplicate or improving iterations
577 output_corr = deepcopy(output)
579 # Core properties that will be used in Yield
580 properties = [ONETEP_TOTAL_ENERGY, ONETEP_FORCE,
581 ONETEP_MULLIKEN, ONETEP_FIRST_CELL,
582 ONETEP_FIRST_CELLL]
584 # Find all matches append them to the dictionary
585 for idx, line in enumerate(fdo_lines):
586 match = False
587 for key in output:
588 # The second condition is for species block where
589 # we have to make sure there is nothing after the word
590 # 'species' but sometimes no trailing space will
591 # be present.
592 if key in line or \
593 key.strip() == line.strip():
594 match = key
595 break
596 if match:
597 output[match].append(idx)
598 # output[match].append(idx)
599 # If a calculation died in the middle of nowhere...
600 # Might be needed, keeping it here
601 # if len(output[ONETEP_START]) - len(output[ONETEP_STOP]) > 1:
602 # output[ONETEP_STOP].append(i - 1)
604 # Everything is numpy
605 output = {key: np.array(value) for key, value in output.items()}
606 # Conveniance notation (pointers: no overhead, no additional memory)
607 ibfgs_iter = output[ONETEP_IBFGS_ITER]
608 ibfgs_start = output[ONETEP_START_GEOM]
609 ibfgs_improve = output[ONETEP_IBFGS_IMPROVE]
610 ibfgs_resume = output[ONETEP_RESUMING_GEOM]
611 i_first_positions = output[ONETEP_FIRST_POSITION]
612 is_frac_positions = [i for i in i_first_positions if 'FRAC' in fdo_lines[i]]
614 # In onetep species can have arbritary names,
615 # We want to map them to real element names
616 # Via the species block
617 species = np.concatenate((output[ONETEP_SPECIES],
618 output[ONETEP_SPECIESL])).astype(np.int32)
620 icells = np.hstack(
621 (output[ONETEP_FIRST_CELL],
622 output[ONETEP_FIRST_CELLL],
623 output[ONETEP_STRESS_CELL])
624 )
625 icells = icells.astype(np.int32)
626 # Using the fact that 0 == False and > 0 == True
627 has_bfgs = len(ibfgs_iter) \
628 + len(output[ONETEP_START_GEOM]) \
629 + len(output[ONETEP_RESUMING_GEOM])
631 has_bfgs_improve = len(ibfgs_improve)
632 has_bfgs_resume = len(ibfgs_resume)
633 # When the input block position is written in lowercase
634 # ONETEP does not print the initial position but a hash
635 # of it, might be needed
636 has_hash = len(output[ONETEP_WRONG_FIRST_POSITION])
638 def is_in_bfgs(idx):
639 """
640 Check if a given index is in a BFGS block
641 """
642 for past, future in zip(output[ONETEP_START], np.hstack(
643 (output[ONETEP_START][1:], [n_lines]))):
644 if past < idx < future:
645 if np.any((past < ibfgs_start) & (ibfgs_start < future)) or \
646 np.any((past < ibfgs_resume) & (ibfgs_resume < future)):
647 return True
648 return False
650 # If onetep has bfgs, the first atomic positions
651 # Will be printed multiple times, we don't add them
652 if has_bfgs:
653 to_del = []
654 for idx, tmp in enumerate(i_first_positions):
655 if is_in_bfgs(tmp):
656 to_del.append(idx)
657 i_first_positions = np.delete(i_first_positions, to_del)
659 ipositions = np.hstack((output[ONETEP_POSITION],
660 i_first_positions)).astype(np.int32)
661 ipositions = np.sort(ipositions)
663 n_pos = len(ipositions)
665 # Some ONETEP files will not have any positions
666 # due to how the software is coded. As a last
667 # resort we look for a geom file with the same label.
668 if n_pos == 0:
669 name = fd.name
670 label_maybe = basename(name).split('.')[0]
671 geom_maybe = label_maybe + '.geom'
672 if isfile(geom_maybe):
673 from ase.io import read
674 positions = read(geom_maybe, index="::",
675 format='castep-geom',
676 units={
677 'Eh': units['Hartree'],
678 'a0': units['Bohr']
679 }
680 )
681 forces = [i.get_forces() for i in positions]
682 has_bfgs = False
683 has_bfgs_improve = False
684 # way to make everything work
685 ipositions = np.hstack(([0], output[ONETEP_IBFGS_ITER]))
686 else:
687 if has_hash:
688 raise RuntimeError(no_positions_error)
689 raise RuntimeError(unable_to_read)
691 to_del = []
693 # Important loop which:
694 # - Get rid of improving BFGS iteration if improving == False
695 # - Append None to properties to make sure each properties will
696 # have the same length and each index correspond to the right
697 # atomic configuration (hopefully).
698 # Past is the index of the current atomic conf, future is the
699 # index of the next one.
700 for idx, (past, future) in enumerate(
701 zip(ipositions, np.hstack((ipositions[1:], [n_lines])))):
702 if has_bfgs:
703 # BFGS resume prints the configuration at the beggining,
704 # we don't want it
705 if has_bfgs_resume:
706 closest_resume = np.min(np.abs(past - ibfgs_resume))
707 closest_starting = np.min(np.abs(past - ibfgs_iter))
708 if closest_resume < closest_starting:
709 to_del.append(idx)
710 continue
711 if has_bfgs_improve and not improving:
712 # Find closest improve iteration index
713 closest_improve = np.min(np.abs(past - ibfgs_improve))
714 # Find closest normal iteration index
715 closest_iter = np.min(np.abs(past - ibfgs_iter))
716 if len(ibfgs_start):
717 closest_starting = np.min(np.abs(past - ibfgs_start))
718 closest = np.min([closest_iter, closest_starting])
719 else:
720 closest = closest_iter
721 # If improve is closer we delete
722 if closest_improve < closest:
723 to_del.append(idx)
724 continue
726 # We append None if no properties in contained for
727 # one specific atomic configurations.
728 for prop in properties:
729 tmp, = np.where((past < output[prop]) & (output[prop] <= future))
730 if len(tmp) == 0:
731 output_corr[prop].append(None)
732 else:
733 output_corr[prop].extend(output[prop][tmp[:1]])
735 # We effectively delete unwanted atomic configurations
736 if to_del:
737 new_indices = np.setdiff1d(np.arange(n_pos), to_del)
738 ipositions = ipositions[new_indices]
740 # Bunch of methods to grep properties from output.
741 def parse_cell(idx):
742 a, b, c = np.loadtxt([fdo_lines[idx + 2]]) * units['Bohr']
743 al, be, ga = np.loadtxt([fdo_lines[idx + 4]])
744 cell = Cell.fromcellpar([a, b, c, al, be, ga])
745 return np.array(cell)
747 def parse_charge(idx):
748 n = 0
749 offset = 4
750 while idx + n < len(fdo_lines):
751 if not fdo_lines[idx + n].strip():
752 tmp_charges = np.loadtxt(
753 fdo_lines[idx + offset:idx + n - 1],
754 usecols=3)
755 return tmp_charges
756 n += 1
757 return None
758 # In ONETEP there is no way to differentiate electronic entropy
759 # and entropy due to solvent, therefore there is no way to
760 # extrapolate the energy at 0 K. We return the last energy
761 # instead.
763 def parse_energy(idx):
764 n = 0
765 energies = []
766 while idx + n < len(fdo_lines):
767 if '| Total' in fdo_lines[idx + n]:
768 energies.append(float(fdo_lines[idx + n].split()[-2]))
769 if 'LOCAL ENERGY COMPONENTS FROM MATRIX TRACES' \
770 in fdo_lines[idx + n]:
771 return energies[-1] * units['Hartree']
772 # Something is wrong with this ONETEP output
773 if len(energies) > 2:
774 raise RuntimeError('something is wrong with this ONETEP output')
775 n += 1
776 return None
778 def parse_first_cell(idx):
779 n = 0
780 offset = 1
781 while idx + n < len(fdo_lines):
782 if 'ang' in fdo_lines[idx + n].lower():
783 offset += 1
784 if '%endblock lattice_cart' in fdo_lines[idx + n].lower():
785 cell = np.loadtxt(
786 fdo_lines[idx + offset:idx + n]
787 )
788 return cell if offset == 2 else cell * units['Bohr']
789 n += 1
790 return None
792 def parse_first_positions(idx):
793 n = 0
794 offset = 1
795 while idx + n < len(fdo_lines):
796 if 'ang' in fdo_lines[idx + n]:
797 offset += 1
798 if '%ENDBLOCK POSITIONS_' in fdo_lines[idx + n]:
799 if 'FRAC' in fdo_lines[idx + n]:
800 conv_factor = 1
801 else:
802 conv_factor = units['Bohr']
803 tmp = np.loadtxt(fdo_lines[idx + offset:idx + n],
804 dtype='str').reshape(-1, 4)
805 els = np.char.array(tmp[:, 0])
806 if offset == 2:
807 pos = tmp[:, 1:].astype(np.float32)
808 else:
809 pos = tmp[:, 1:].astype(np.float32) * conv_factor
810 try:
811 atoms = Atoms(els, pos)
812 # ASE doesn't recognize names used in ONETEP
813 # as chemical symbol: dig deeper
814 except KeyError:
815 tags, real_elements = find_correct_species(
816 els,
817 idx,
818 first=True
819 )
820 atoms = Atoms(real_elements, pos)
821 atoms.set_tags(tags)
822 atoms.info['onetep_species'] = list(els)
823 return atoms
824 n += 1
825 return None
827 def parse_force(idx):
828 n = 0
829 while idx + n < len(fdo_lines):
830 if '* TOTAL:' in fdo_lines[idx + n]:
831 tmp = np.loadtxt(fdo_lines[idx + 6:idx + n - 2],
832 dtype=np.float64, usecols=(3, 4, 5))
833 return tmp * units['Hartree'] / units['Bohr']
834 n += 1
835 return None
837 def parse_positions(idx):
838 n = 0
839 offset = 7
840 stop = 0
841 while idx + n < len(fdo_lines):
842 if 'xxxxxxxxxxxxxxxxxxxxxxxxx' in fdo_lines[idx + n].strip():
843 stop += 1
844 if stop == 2:
845 tmp = np.loadtxt(fdo_lines[idx + offset:idx + n],
846 dtype='str', usecols=(1, 3, 4, 5))
847 els = np.char.array(tmp[:, 0])
848 pos = tmp[:, 1:].astype(np.float32) * units['Bohr']
849 try:
850 atoms = Atoms(els, pos)
851 # ASE doesn't recognize names used in ONETEP
852 # as chemical symbol: dig deeper
853 except KeyError:
854 tags, real_elements = find_correct_species(els, idx)
855 atoms = Atoms(real_elements, pos)
856 atoms.set_tags(tags)
857 atoms.info['onetep_species'] = list(els)
858 return atoms
859 n += 1
860 return None
862 def parse_species(idx):
863 n = 1
864 element_map = {}
865 while idx + n < len(fdo_lines):
866 sep = fdo_lines[idx + n].split()
867 lsline = fdo_lines[idx + n].lower().strip()
868 if '%endblock species' == lsline:
869 return element_map
870 element_map[sep[0]] = sep[1]
871 n += 1
872 return None
874 def parse_spin(idx):
875 n = 0
876 offset = 4
877 while idx + n < len(fdo_lines):
878 if not fdo_lines[idx + n].strip():
879 # If no spin is present we return None
880 try:
881 tmp_spins = np.loadtxt(
882 fdo_lines[idx + offset:idx + n - 1],
883 usecols=4)
884 except ValueError:
885 tmp_spins = None
886 return tmp_spins
887 n += 1
888 return None
890 # This is needed if ASE doesn't recognize the element
891 def find_correct_species(els, idx, first=False):
892 real_elements = []
893 tags = []
894 # Find nearest species block in case of
895 # multi-output file with different species blocks.
896 if first:
897 closest_species = np.argmin(abs(idx - species))
898 else:
899 tmp = idx - species
900 tmp[tmp < 0] = 9999999999
901 closest_species = np.argmin(tmp)
902 elements_map = real_species[closest_species]
903 for el in els:
904 real_elements.append(elements_map[el])
905 tag_maybe = el.replace(elements_map[el], '')
906 if tag_maybe.isdigit():
907 tags.append(int(tag_maybe))
908 else:
909 tags.append(False)
910 return tags, real_elements
912 cells = []
913 for idx in icells:
914 if idx in output[ONETEP_STRESS_CELL]:
915 cell = parse_cell(idx) if idx else None
916 else:
917 cell = parse_first_cell(idx) if idx else None
918 cells.append(cell)
920 charges = []
921 for idx in output_corr[ONETEP_MULLIKEN]:
922 charge = parse_charge(idx) if idx else None
923 charges.append(charge)
925 energies = []
926 for idx in output_corr[ONETEP_TOTAL_ENERGY]:
927 energy = parse_energy(idx) if idx else None
928 energies.append(energy)
930 magmoms = []
931 for idx in output_corr[ONETEP_MULLIKEN]:
932 magmom = parse_spin(idx) if idx else None
933 magmoms.append(magmom)
935 real_species = []
936 for idx in species:
937 real_specie = parse_species(idx)
938 real_species.append(real_specie)
940 # If you are here and n_pos == 0 then it
941 # means you read a CASTEP geom file (see line ~ 522)
942 if n_pos > 0:
943 positions, forces = [], []
944 for idx in ipositions:
945 if idx in i_first_positions:
946 position = parse_first_positions(idx)
947 else:
948 position = parse_positions(idx)
949 if position:
950 positions.append(position)
951 else:
952 n_pos -= 1
953 break
954 for idx in output_corr[ONETEP_FORCE]:
955 force = parse_force(idx) if idx else None
956 forces.append(force)
958 n_pos = len(positions)
959 # Numpy trick to get rid of configuration that are essentially the same
960 # in a regular geometry optimisation with internal BFGS, the first
961 # configuration is printed three time, we get rid of it
962 properties = [energies, forces, charges, magmoms]
964 if has_bfgs:
965 tmp = [i.positions for i in positions]
966 to_del = []
967 for i in range(len(tmp[:-1])):
968 if is_in_bfgs(ipositions[i]):
969 if np.array_equal(tmp[i], tmp[i + 1]):
970 # If the deleted configuration has a property
971 # we want to keep it
972 for prop in properties:
973 if prop[i + 1] is not None and prop[i] is None:
974 prop[i] = prop[i + 1]
975 to_del.append(i + 1)
976 c = np.full((len(tmp)), True)
977 c[to_del] = False
978 energies = [energies[i] for i in range(n_pos) if c[i]]
979 forces = [forces[i] for i in range(n_pos) if c[i]]
980 charges = [charges[i] for i in range(n_pos) if c[i]]
981 positions = [positions[i] for i in range(n_pos) if c[i]]
982 ipositions = [ipositions[i] for i in range(n_pos) if c[i]]
983 magmoms = [magmoms[i] for i in range(n_pos) if c[i]]
985 n_pos = len(positions)
987 # Prepare atom objects with all the properties
988 if isinstance(index, int):
989 indices = [range(n_pos)[index]]
990 else:
991 indices = range(n_pos)[index]
993 for idx in indices:
994 if cells:
995 tmp = ipositions[idx] - icells
996 p, = np.where(tmp >= 0)
997 tmp = tmp[p]
998 closest_cell = np.argmin(tmp)
999 cell = cells[p[closest_cell]]
1000 positions[idx].set_cell(cell)
1001 if ipositions[idx] in is_frac_positions:
1002 positions[idx].set_scaled_positions(
1003 positions[idx].get_positions()
1004 )
1005 else:
1006 raise RuntimeError(
1007 'No cell found, are you sure this is a onetep output?')
1008 positions[idx].set_initial_charges(charges[idx])
1009 calc = SinglePointDFTCalculator(
1010 positions[idx],
1011 energy=energies[idx] if energies else None,
1012 free_energy=energies[idx] if energies else None,
1013 forces=forces[idx] if forces else None,
1014 charges=charges[idx] if charges else None,
1015 magmoms=magmoms[idx] if magmoms else None,
1016 )
1017 positions[idx].calc = calc
1018 yield positions[idx]