Coverage for /builds/kinetik161/ase/ase/io/extxyz.py: 80.78%
541 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
1"""
2Extended XYZ support
4Read/write files in "extended" XYZ format, storing additional
5per-configuration information as key-value pairs on the XYZ
6comment line, and additional per-atom properties as extra columns.
8Contributed by James Kermode <james.kermode@gmail.com>
9"""
10import json
11import numbers
12import re
13import warnings
14from io import StringIO, UnsupportedOperation
16import numpy as np
18from ase.atoms import Atoms
19from ase.calculators.calculator import BaseCalculator, all_properties
20from ase.calculators.singlepoint import SinglePointCalculator
21from ase.constraints import FixAtoms, FixCartesian
22from ase.io.formats import index2range
23from ase.io.utils import ImageIterator
24from ase.parallel import paropen
25from ase.spacegroup.spacegroup import Spacegroup
26from ase.utils import reader
28__all__ = ['read_xyz', 'write_xyz', 'iread_xyz']
30PROPERTY_NAME_MAP = {'positions': 'pos',
31 'numbers': 'Z',
32 'charges': 'charge',
33 'symbols': 'species'}
35REV_PROPERTY_NAME_MAP = dict(zip(PROPERTY_NAME_MAP.values(),
36 PROPERTY_NAME_MAP.keys()))
38KEY_QUOTED_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)'
39 + r'\s*=\s*["\{\}]([^"\{\}]+)["\{\}]\s*')
40KEY_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_]*)\s*='
41 + r'\s*([^\s]+)\s*')
42KEY_RE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)\s*')
44UNPROCESSED_KEYS = ['uid']
46SPECIAL_3_3_KEYS = ['Lattice', 'virial', 'stress']
48# partition ase.calculators.calculator.all_properties into two lists:
49# 'per-atom' and 'per-config'
50per_atom_properties = ['forces', 'stresses', 'charges', 'magmoms', 'energies']
51per_config_properties = ['energy', 'stress', 'dipole', 'magmom', 'free_energy']
54def key_val_str_to_dict(string, sep=None):
55 """
56 Parse an xyz properties string in a key=value and return a dict with
57 various values parsed to native types.
59 Accepts brackets or quotes to delimit values. Parses integers, floats
60 booleans and arrays thereof. Arrays with 9 values whose name is listed
61 in SPECIAL_3_3_KEYS are converted to 3x3 arrays with Fortran ordering.
63 If sep is None, string will split on whitespace, otherwise will split
64 key value pairs with the given separator.
66 """
67 # store the closing delimiters to match opening ones
68 delimiters = {
69 "'": "'",
70 '"': '"',
71 '{': '}',
72 '[': ']'
73 }
75 # Make pairs and process afterwards
76 kv_pairs = [
77 [[]]] # List of characters for each entry, add a new list for new value
78 cur_delimiter = None # push and pop closing delimiters
79 escaped = False # add escaped sequences verbatim
81 # parse character-by-character unless someone can do nested brackets
82 # and escape sequences in a regex
83 for char in string.strip():
84 if escaped: # bypass everything if escaped
85 kv_pairs[-1][-1].append(char)
86 escaped = False
87 elif char == '\\': # escape the next thing
88 escaped = True
89 elif cur_delimiter: # inside brackets
90 if char == cur_delimiter: # found matching delimiter
91 cur_delimiter = None
92 else:
93 kv_pairs[-1][-1].append(char) # inside quotes, add verbatim
94 elif char in delimiters:
95 cur_delimiter = delimiters[char] # brackets or quotes
96 elif (sep is None and char.isspace()) or char == sep:
97 if kv_pairs == [[[]]]: # empty, beginning of string
98 continue
99 elif kv_pairs[-1][-1] == []:
100 continue
101 else:
102 kv_pairs.append([[]])
103 elif char == '=':
104 if kv_pairs[-1] == [[]]:
105 del kv_pairs[-1]
106 kv_pairs[-1].append([]) # value
107 else:
108 kv_pairs[-1][-1].append(char)
110 kv_dict = {}
112 for kv_pair in kv_pairs:
113 if len(kv_pair) == 0: # empty line
114 continue
115 elif len(kv_pair) == 1: # default to True
116 key, value = ''.join(kv_pair[0]), 'T'
117 else: # Smush anything else with kv-splitter '=' between them
118 key, value = ''.join(kv_pair[0]), '='.join(
119 ''.join(x) for x in kv_pair[1:])
121 if key.lower() not in UNPROCESSED_KEYS:
122 # Try to convert to (arrays of) floats, ints
123 split_value = re.findall(r'[^\s,]+', value)
124 try:
125 try:
126 numvalue = np.array(split_value, dtype=int)
127 except (ValueError, OverflowError):
128 # don't catch errors here so it falls through to bool
129 numvalue = np.array(split_value, dtype=float)
130 if len(numvalue) == 1:
131 numvalue = numvalue[0] # Only one number
132 value = numvalue
133 except (ValueError, OverflowError):
134 pass # value is unchanged
136 # convert special 3x3 matrices
137 if key in SPECIAL_3_3_KEYS:
138 if not isinstance(value, np.ndarray) or value.shape != (9,):
139 raise ValueError("Got info item {}, expecting special 3x3 "
140 "matrix, but value is not in the form of "
141 "a 9-long numerical vector".format(key))
142 value = np.array(value).reshape((3, 3), order='F')
144 # parse special strings as boolean or JSON
145 if isinstance(value, str):
146 # Parse boolean values:
147 # T or [tT]rue or TRUE -> True
148 # F or [fF]alse or FALSE -> False
149 # For list: 'T T F' -> [True, True, False]
150 # Cannot use `.lower()` to reduce `str_to_bool` mapping because
151 # 't'/'f' not accepted
152 str_to_bool = {
153 'T': True, 'F': False, 'true': True, 'false': False,
154 'True': True, 'False': False, 'TRUE': True, 'FALSE': False
155 }
156 try:
157 boolvalue = [str_to_bool[vpart] for vpart in
158 re.findall(r'[^\s,]+', value)]
160 if len(boolvalue) == 1:
161 value = boolvalue[0]
162 else:
163 value = boolvalue
164 except KeyError:
165 # Try to parse JSON
166 if value.startswith("_JSON "):
167 d = json.loads(value.replace("_JSON ", "", 1))
168 value = np.array(d)
169 if value.dtype.kind not in ['i', 'f', 'b']:
170 value = d
172 kv_dict[key] = value
174 return kv_dict
177def key_val_str_to_dict_regex(s):
178 """
179 Parse strings in the form 'key1=value1 key2="quoted value"'
180 """
181 d = {}
182 s = s.strip()
183 while True:
184 # Match quoted string first, then fall through to plain key=value
185 m = KEY_QUOTED_VALUE.match(s)
186 if m is None:
187 m = KEY_VALUE.match(s)
188 if m is not None:
189 s = KEY_VALUE.sub('', s, 1)
190 else:
191 # Just a key with no value
192 m = KEY_RE.match(s)
193 if m is not None:
194 s = KEY_RE.sub('', s, 1)
195 else:
196 s = KEY_QUOTED_VALUE.sub('', s, 1)
198 if m is None:
199 break # No more matches
201 key = m.group(1)
202 try:
203 value = m.group(2)
204 except IndexError:
205 # default value is 'T' (True)
206 value = 'T'
208 if key.lower() not in UNPROCESSED_KEYS:
209 # Try to convert to (arrays of) floats, ints
210 try:
211 numvalue = []
212 for x in value.split():
213 if x.find('.') == -1:
214 numvalue.append(int(float(x)))
215 else:
216 numvalue.append(float(x))
217 if len(numvalue) == 1:
218 numvalue = numvalue[0] # Only one number
219 elif len(numvalue) == 9:
220 # special case: 3x3 matrix, fortran ordering
221 numvalue = np.array(numvalue).reshape((3, 3), order='F')
222 else:
223 numvalue = np.array(numvalue) # vector
224 value = numvalue
225 except (ValueError, OverflowError):
226 pass
228 # Parse boolean values: 'T' -> True, 'F' -> False,
229 # 'T T F' -> [True, True, False]
230 if isinstance(value, str):
231 str_to_bool = {'T': True, 'F': False}
233 if len(value.split()) > 1:
234 if all(x in str_to_bool for x in value.split()):
235 value = [str_to_bool[x] for x in value.split()]
236 elif value in str_to_bool:
237 value = str_to_bool[value]
239 d[key] = value
241 return d
244def escape(string):
245 if (' ' in string or
246 '"' in string or "'" in string or
247 '{' in string or '}' in string or
248 '[' in string or ']' in string):
249 string = string.replace('"', '\\"')
250 string = f'"{string}"'
251 return string
254def key_val_dict_to_str(dct, sep=' '):
255 """
256 Convert atoms.info dictionary to extended XYZ string representation
257 """
259 def array_to_string(key, val):
260 # some ndarrays are special (special 3x3 keys, and scalars/vectors of
261 # numbers or bools), handle them here
262 if key in SPECIAL_3_3_KEYS:
263 # special 3x3 matrix, flatten in Fortran order
264 val = val.reshape(val.size, order='F')
265 if val.dtype.kind in ['i', 'f', 'b']:
266 # numerical or bool scalars/vectors are special, for backwards
267 # compat.
268 if len(val.shape) == 0:
269 # scalar
270 val = str(known_types_to_str(val))
271 elif len(val.shape) == 1:
272 # vector
273 val = ' '.join(str(known_types_to_str(v)) for v in val)
274 return val
276 def known_types_to_str(val):
277 if isinstance(val, (bool, np.bool_)):
278 return 'T' if val else 'F'
279 elif isinstance(val, numbers.Real):
280 return f'{val}'
281 elif isinstance(val, Spacegroup):
282 return val.symbol
283 else:
284 return val
286 if len(dct) == 0:
287 return ''
289 string = ''
290 for key in dct:
291 val = dct[key]
293 if isinstance(val, np.ndarray):
294 val = array_to_string(key, val)
295 else:
296 # convert any known types to string
297 val = known_types_to_str(val)
299 if val is not None and not isinstance(val, str):
300 # what's left is an object, try using JSON
301 if isinstance(val, np.ndarray):
302 val = val.tolist()
303 try:
304 val = '_JSON ' + json.dumps(val)
305 # if this fails, let give up
306 except TypeError:
307 warnings.warn('Skipping unhashable information '
308 '{}'.format(key))
309 continue
311 key = escape(key) # escape and quote key
312 eq = "="
313 # Should this really be setting empty value that's going to be
314 # interpreted as bool True?
315 if val is None:
316 val = ""
317 eq = ""
318 val = escape(val) # escape and quote val
320 string += f'{key}{eq}{val}{sep}'
322 return string.strip()
325def parse_properties(prop_str):
326 """
327 Parse extended XYZ properties format string
329 Format is "[NAME:TYPE:NCOLS]...]", e.g. "species:S:1:pos:R:3".
330 NAME is the name of the property.
331 TYPE is one of R, I, S, L for real, integer, string and logical.
332 NCOLS is number of columns for that property.
333 """
335 properties = {}
336 properties_list = []
337 dtypes = []
338 converters = []
340 fields = prop_str.split(':')
342 def parse_bool(x):
343 """
344 Parse bool to string
345 """
346 return {'T': True, 'F': False,
347 'True': True, 'False': False}.get(x)
349 fmt_map = {'R': ('d', float),
350 'I': ('i', int),
351 'S': (object, str),
352 'L': ('bool', parse_bool)}
354 for name, ptype, cols in zip(fields[::3],
355 fields[1::3],
356 [int(x) for x in fields[2::3]]):
357 if ptype not in ('R', 'I', 'S', 'L'):
358 raise ValueError('Unknown property type: ' + ptype)
359 ase_name = REV_PROPERTY_NAME_MAP.get(name, name)
361 dtype, converter = fmt_map[ptype]
362 if cols == 1:
363 dtypes.append((name, dtype))
364 converters.append(converter)
365 else:
366 for c in range(cols):
367 dtypes.append((name + str(c), dtype))
368 converters.append(converter)
370 properties[name] = (ase_name, cols)
371 properties_list.append(name)
373 dtype = np.dtype(dtypes)
374 return properties, properties_list, dtype, converters
377def _read_xyz_frame(lines, natoms, properties_parser=key_val_str_to_dict,
378 nvec=0):
379 # comment line
380 line = next(lines).strip()
381 if nvec > 0:
382 info = {'comment': line}
383 else:
384 info = properties_parser(line) if line else {}
386 pbc = None
387 if 'pbc' in info:
388 pbc = info.pop('pbc')
389 elif 'Lattice' in info:
390 # default pbc for extxyz file containing Lattice
391 # is True in all directions
392 pbc = [True, True, True]
393 elif nvec > 0:
394 # cell information given as pseudo-Atoms
395 pbc = [False, False, False]
397 cell = None
398 if 'Lattice' in info:
399 # NB: ASE cell is transpose of extended XYZ lattice
400 cell = info['Lattice'].T
401 del info['Lattice']
402 elif nvec > 0:
403 # cell information given as pseudo-Atoms
404 cell = np.zeros((3, 3))
406 if 'Properties' not in info:
407 # Default set of properties is atomic symbols and positions only
408 info['Properties'] = 'species:S:1:pos:R:3'
409 properties, names, dtype, convs = parse_properties(info['Properties'])
410 del info['Properties']
412 data = []
413 for ln in range(natoms):
414 try:
415 line = next(lines)
416 except StopIteration:
417 raise XYZError('ase.io.extxyz: Frame has {} atoms, expected {}'
418 .format(len(data), natoms))
419 vals = line.split()
420 row = tuple([conv(val) for conv, val in zip(convs, vals)])
421 data.append(row)
423 try:
424 data = np.array(data, dtype)
425 except TypeError:
426 raise XYZError('Badly formatted data '
427 'or end of file reached before end of frame')
429 # Read VEC entries if present
430 if nvec > 0:
431 for ln in range(nvec):
432 try:
433 line = next(lines)
434 except StopIteration:
435 raise XYZError('ase.io.adfxyz: Frame has {} cell vectors, '
436 'expected {}'.format(len(cell), nvec))
437 entry = line.split()
439 if not entry[0].startswith('VEC'):
440 raise XYZError(f'Expected cell vector, got {entry[0]}')
442 try:
443 n = int(entry[0][3:])
444 except ValueError as e:
445 raise XYZError('Expected VEC{}, got VEC{}'
446 .format(ln + 1, entry[0][3:])) from e
447 if n != ln + 1:
448 raise XYZError('Expected VEC{}, got VEC{}'
449 .format(ln + 1, n))
451 cell[ln] = np.array([float(x) for x in entry[1:]])
452 pbc[ln] = True
453 if nvec != pbc.count(True):
454 raise XYZError('Problem with number of cell vectors')
455 pbc = tuple(pbc)
457 arrays = {}
458 for name in names:
459 ase_name, cols = properties[name]
460 if cols == 1:
461 value = data[name]
462 else:
463 value = np.vstack([data[name + str(c)]
464 for c in range(cols)]).T
465 arrays[ase_name] = value
467 symbols = None
468 if 'symbols' in arrays:
469 symbols = [s.capitalize() for s in arrays['symbols']]
470 del arrays['symbols']
472 numbers = None
473 duplicate_numbers = None
474 if 'numbers' in arrays:
475 if symbols is None:
476 numbers = arrays['numbers']
477 else:
478 duplicate_numbers = arrays['numbers']
479 del arrays['numbers']
481 initial_charges = None
482 if 'initial_charges' in arrays:
483 initial_charges = arrays['initial_charges']
484 del arrays['initial_charges']
486 positions = None
487 if 'positions' in arrays:
488 positions = arrays['positions']
489 del arrays['positions']
491 atoms = Atoms(symbols=symbols,
492 positions=positions,
493 numbers=numbers,
494 charges=initial_charges,
495 cell=cell,
496 pbc=pbc,
497 info=info)
499 # Read and set constraints
500 if 'move_mask' in arrays:
501 move_mask = arrays['move_mask'].astype(bool)
502 if properties['move_mask'][1] == 3:
503 cons = []
504 for a in range(natoms):
505 cons.append(FixCartesian(a, mask=~move_mask[a, :]))
506 atoms.set_constraint(cons)
507 elif properties['move_mask'][1] == 1:
508 atoms.set_constraint(FixAtoms(mask=~move_mask))
509 else:
510 raise XYZError('Not implemented constraint')
511 del arrays['move_mask']
513 for name, array in arrays.items():
514 atoms.new_array(name, array)
516 if duplicate_numbers is not None:
517 atoms.set_atomic_numbers(duplicate_numbers)
519 # Load results of previous calculations into SinglePointCalculator
520 results = {}
521 for key in list(atoms.info.keys()):
522 if key in per_config_properties:
523 results[key] = atoms.info[key]
524 # special case for stress- convert to Voigt 6-element form
525 if key == 'stress' and results[key].shape == (3, 3):
526 stress = results[key]
527 stress = np.array([stress[0, 0],
528 stress[1, 1],
529 stress[2, 2],
530 stress[1, 2],
531 stress[0, 2],
532 stress[0, 1]])
533 results[key] = stress
534 for key in list(atoms.arrays.keys()):
535 if (key in per_atom_properties and len(value.shape) >= 1
536 and value.shape[0] == len(atoms)):
537 results[key] = atoms.arrays[key]
538 if results != {}:
539 calculator = SinglePointCalculator(atoms, **results)
540 atoms.calc = calculator
541 return atoms
544class XYZError(IOError):
545 pass
548class XYZChunk:
549 def __init__(self, lines, natoms):
550 self.lines = lines
551 self.natoms = natoms
553 def build(self):
554 """Convert unprocessed chunk into Atoms."""
555 return _read_xyz_frame(iter(self.lines), self.natoms)
558def ixyzchunks(fd):
559 """Yield unprocessed chunks (header, lines) for each xyz image."""
560 while True:
561 line = next(fd).strip() # Raises StopIteration on empty file
562 try:
563 natoms = int(line)
564 except ValueError:
565 raise XYZError(f'Expected integer, found "{line}"')
566 try:
567 lines = [next(fd) for _ in range(1 + natoms)]
568 except StopIteration:
569 raise XYZError('Incomplete XYZ chunk')
570 yield XYZChunk(lines, natoms)
573iread_xyz = ImageIterator(ixyzchunks)
576@reader
577def read_xyz(fileobj, index=-1, properties_parser=key_val_str_to_dict):
578 r"""
579 Read from a file in Extended XYZ format
581 index is the frame to read, default is last frame (index=-1).
582 properties_parser is the parse to use when converting the properties line
583 to a dictionary, ``extxyz.key_val_str_to_dict`` is the default and can
584 deal with most use cases, ``extxyz.key_val_str_to_dict_regex`` is slightly
585 faster but has fewer features.
587 Extended XYZ format is an enhanced version of the `basic XYZ format
588 <http://en.wikipedia.org/wiki/XYZ_file_format>`_ that allows extra
589 columns to be present in the file for additonal per-atom properties as
590 well as standardising the format of the comment line to include the
591 cell lattice and other per-frame parameters.
593 It's easiest to describe the format with an example. Here is a
594 standard XYZ file containing a bulk cubic 8 atom silicon cell ::
596 8
597 Cubic bulk silicon cell
598 Si 0.00000000 0.00000000 0.00000000
599 Si 1.36000000 1.36000000 1.36000000
600 Si 2.72000000 2.72000000 0.00000000
601 Si 4.08000000 4.08000000 1.36000000
602 Si 2.72000000 0.00000000 2.72000000
603 Si 4.08000000 1.36000000 4.08000000
604 Si 0.00000000 2.72000000 2.72000000
605 Si 1.36000000 4.08000000 4.08000000
607 The first line is the number of atoms, followed by a comment and
608 then one line per atom, giving the element symbol and cartesian
609 x y, and z coordinates in Angstroms.
611 Here's the same configuration in extended XYZ format ::
613 8
614 Lattice="5.44 0.0 0.0 0.0 5.44 0.0 0.0 0.0 5.44" Properties=species:S:1:pos:R:3 Time=0.0
615 Si 0.00000000 0.00000000 0.00000000
616 Si 1.36000000 1.36000000 1.36000000
617 Si 2.72000000 2.72000000 0.00000000
618 Si 4.08000000 4.08000000 1.36000000
619 Si 2.72000000 0.00000000 2.72000000
620 Si 4.08000000 1.36000000 4.08000000
621 Si 0.00000000 2.72000000 2.72000000
622 Si 1.36000000 4.08000000 4.08000000
624 In extended XYZ format, the comment line is replaced by a series of
625 key/value pairs. The keys should be strings and values can be
626 integers, reals, logicals (denoted by `T` and `F` for true and false)
627 or strings. Quotes are required if a value contains any spaces (like
628 `Lattice` above). There are two mandatory parameters that any
629 extended XYZ: `Lattice` and `Properties`. Other parameters --
630 e.g. `Time` in the example above --- can be added to the parameter line
631 as needed.
633 `Lattice` is a Cartesian 3x3 matrix representation of the cell
634 vectors, with each vector stored as a column and the 9 values listed in
635 Fortran column-major order, i.e. in the form ::
637 Lattice="R1x R1y R1z R2x R2y R2z R3x R3y R3z"
639 where `R1x R1y R1z` are the Cartesian x-, y- and z-components of the
640 first lattice vector (:math:`\mathbf{a}`), `R2x R2y R2z` those of the second
641 lattice vector (:math:`\mathbf{b}`) and `R3x R3y R3z` those of the
642 third lattice vector (:math:`\mathbf{c}`).
644 The list of properties in the file is described by the `Properties`
645 parameter, which should take the form of a series of colon separated
646 triplets giving the name, format (`R` for real, `I` for integer) and
647 number of columns of each property. For example::
649 Properties="species:S:1:pos:R:3:vel:R:3:select:I:1"
651 indicates the first column represents atomic species, the next three
652 columns represent atomic positions, the next three velcoities, and the
653 last is an single integer called `select`. With this property
654 definition, the line ::
656 Si 4.08000000 4.08000000 1.36000000 0.00000000 0.00000000 0.00000000 1
658 would describe a silicon atom at position (4.08,4.08,1.36) with zero
659 velocity and the `select` property set to 1.
661 The property names `pos`, `Z`, `mass`, and `charge` map to ASE
662 :attr:`ase.atoms.Atoms.arrays` entries named
663 `positions`, `numbers`, `masses` and `charges` respectively.
665 Additional key-value pairs in the comment line are parsed into the
666 :attr:`ase.Atoms.atoms.info` dictionary, with the following conventions
668 - Values can be quoted with `""`, `''`, `[]` or `{}` (the latter are
669 included to ease command-line usage as the `{}` are not treated
670 specially by the shell)
671 - Quotes within keys or values can be escaped with `\"`.
672 - Keys with special names `stress` or `virial` are treated as 3x3 matrices
673 in Fortran order, as for `Lattice` above.
674 - Otherwise, values with multiple elements are treated as 1D arrays, first
675 assuming integer format and falling back to float if conversion is
676 unsuccessful.
677 - A missing value defaults to `True`, e.g. the comment line
678 `"cutoff=3.4 have_energy"` leads to
679 `{'cutoff': 3.4, 'have_energy': True}` in `atoms.info`.
680 - Value strings starting with `"_JSON"` are interpreted as JSON content;
681 similarly, when writing, anything which does not match the criteria above
682 is serialised as JSON.
684 The extended XYZ format is also supported by the
685 the `Ovito <http://www.ovito.org>`_ visualisation tool
686 (from `v2.4 beta
687 <http://www.ovito.org/index.php/component/content/article?id=25>`_
688 onwards).
689 """ # noqa: E501
691 if not isinstance(index, int) and not isinstance(index, slice):
692 raise TypeError('Index argument is neither slice nor integer!')
694 # If possible, build a partial index up to the last frame required
695 last_frame = None
696 if isinstance(index, int) and index >= 0:
697 last_frame = index
698 elif isinstance(index, slice):
699 if index.stop is not None and index.stop >= 0:
700 last_frame = index.stop
702 # scan through file to find where the frames start
703 try:
704 fileobj.seek(0)
705 except UnsupportedOperation:
706 fileobj = StringIO(fileobj.read())
707 fileobj.seek(0)
708 frames = []
709 while True:
710 frame_pos = fileobj.tell()
711 line = fileobj.readline()
712 if line.strip() == '':
713 break
714 try:
715 natoms = int(line)
716 except ValueError as err:
717 raise XYZError('ase.io.extxyz: Expected xyz header but got: {}'
718 .format(err))
719 fileobj.readline() # read comment line
720 for i in range(natoms):
721 fileobj.readline()
722 # check for VEC
723 nvec = 0
724 while True:
725 lastPos = fileobj.tell()
726 line = fileobj.readline()
727 if line.lstrip().startswith('VEC'):
728 nvec += 1
729 if nvec > 3:
730 raise XYZError('ase.io.extxyz: More than 3 VECX entries')
731 else:
732 fileobj.seek(lastPos)
733 break
734 frames.append((frame_pos, natoms, nvec))
735 if last_frame is not None and len(frames) > last_frame:
736 break
738 trbl = index2range(index, len(frames))
740 for index in trbl:
741 frame_pos, natoms, nvec = frames[index]
742 fileobj.seek(frame_pos)
743 # check for consistency with frame index table
744 assert int(fileobj.readline()) == natoms
745 yield _read_xyz_frame(fileobj, natoms, properties_parser, nvec)
748def output_column_format(atoms, columns, arrays,
749 write_info=True, results=None):
750 """
751 Helper function to build extended XYZ comment line
752 """
753 fmt_map = {'d': ('R', '%16.8f'),
754 'f': ('R', '%16.8f'),
755 'i': ('I', '%8d'),
756 'O': ('S', '%s'),
757 'S': ('S', '%s'),
758 'U': ('S', '%-2s'),
759 'b': ('L', ' %.1s')}
761 # NB: Lattice is stored as tranpose of ASE cell,
762 # with Fortran array ordering
763 lattice_str = ('Lattice="'
764 + ' '.join([str(x) for x in np.reshape(atoms.cell.T,
765 9, order='F')]) +
766 '"')
768 property_names = []
769 property_types = []
770 property_ncols = []
771 dtypes = []
772 formats = []
774 for column in columns:
775 array = arrays[column]
776 dtype = array.dtype
778 property_name = PROPERTY_NAME_MAP.get(column, column)
779 property_type, fmt = fmt_map[dtype.kind]
780 property_names.append(property_name)
781 property_types.append(property_type)
783 if (len(array.shape) == 1
784 or (len(array.shape) == 2 and array.shape[1] == 1)):
785 ncol = 1
786 dtypes.append((column, dtype))
787 else:
788 ncol = array.shape[1]
789 for c in range(ncol):
790 dtypes.append((column + str(c), dtype))
792 formats.extend([fmt] * ncol)
793 property_ncols.append(ncol)
795 props_str = ':'.join([':'.join(x) for x in
796 zip(property_names,
797 property_types,
798 [str(nc) for nc in property_ncols])])
800 comment_str = ''
801 if atoms.cell.any():
802 comment_str += lattice_str + ' '
803 comment_str += f'Properties={props_str}'
805 info = {}
806 if write_info:
807 info.update(atoms.info)
808 if results is not None:
809 info.update(results)
810 info['pbc'] = atoms.get_pbc() # always save periodic boundary conditions
811 comment_str += ' ' + key_val_dict_to_str(info)
813 dtype = np.dtype(dtypes)
814 fmt = ' '.join(formats) + '\n'
816 return comment_str, property_ncols, dtype, fmt
819def write_xyz(fileobj, images, comment='', columns=None,
820 write_info=True,
821 write_results=True, plain=False, vec_cell=False,
822 append=False):
823 """
824 Write output in extended XYZ format
826 Optionally, specify which columns (arrays) to include in output,
827 whether to write the contents of the `atoms.info` dict to the
828 XYZ comment line (default is True), the results of any
829 calculator attached to this Atoms. The `plain` argument
830 can be used to write a simple XYZ file with no additional information.
831 `vec_cell` can be used to write the cell vectors as additional
832 pseudo-atoms. If `append` is set to True, the file is for append (mode `a`),
833 otherwise it is overwritten (mode `w`).
835 See documentation for :func:`read_xyz()` for further details of the extended
836 XYZ file format.
837 """
838 if isinstance(fileobj, str):
839 mode = 'w'
840 if append:
841 mode = 'a'
842 fileobj = paropen(fileobj, mode)
844 if hasattr(images, 'get_positions'):
845 images = [images]
847 for atoms in images:
848 natoms = len(atoms)
850 if columns is None:
851 fr_cols = None
852 else:
853 fr_cols = columns[:]
855 if fr_cols is None:
856 fr_cols = (['symbols', 'positions']
857 + [key for key in atoms.arrays.keys() if
858 key not in ['symbols', 'positions', 'numbers',
859 'species', 'pos']])
861 if vec_cell:
862 plain = True
864 if plain:
865 fr_cols = ['symbols', 'positions']
866 write_info = False
867 write_results = False
869 per_frame_results = {}
870 per_atom_results = {}
871 if write_results:
872 calculator = atoms.calc
873 if (calculator is not None
874 and isinstance(calculator, BaseCalculator)):
875 for key in all_properties:
876 value = calculator.results.get(key, None)
877 if value is None:
878 # skip missing calculator results
879 continue
880 if (key in per_atom_properties and len(value.shape) >= 1
881 and value.shape[0] == len(atoms)):
882 # per-atom quantities (forces, energies, stresses)
883 per_atom_results[key] = value
884 elif key in per_config_properties:
885 # per-frame quantities (energy, stress)
886 # special case for stress, which should be converted
887 # to 3x3 matrices before writing
888 if key == 'stress':
889 xx, yy, zz, yz, xz, xy = value
890 value = np.array(
891 [(xx, xy, xz), (xy, yy, yz), (xz, yz, zz)])
892 per_frame_results[key] = value
894 # Move symbols and positions to first two properties
895 if 'symbols' in fr_cols:
896 i = fr_cols.index('symbols')
897 fr_cols[0], fr_cols[i] = fr_cols[i], fr_cols[0]
899 if 'positions' in fr_cols:
900 i = fr_cols.index('positions')
901 fr_cols[1], fr_cols[i] = fr_cols[i], fr_cols[1]
903 # Check first column "looks like" atomic symbols
904 if fr_cols[0] in atoms.arrays:
905 symbols = atoms.arrays[fr_cols[0]]
906 else:
907 symbols = atoms.get_chemical_symbols()
909 if natoms > 0 and not isinstance(symbols[0], str):
910 raise ValueError('First column must be symbols-like')
912 # Check second column "looks like" atomic positions
913 pos = atoms.arrays[fr_cols[1]]
914 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f':
915 raise ValueError('Second column must be position-like')
917 # if vec_cell add cell information as pseudo-atoms
918 if vec_cell:
919 pbc = list(atoms.get_pbc())
920 cell = atoms.get_cell()
922 if True in pbc:
923 nPBC = 0
924 for i, b in enumerate(pbc):
925 if b:
926 nPBC += 1
927 symbols.append('VEC' + str(nPBC))
928 pos = np.vstack((pos, cell[i]))
929 # add to natoms
930 natoms += nPBC
931 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f':
932 raise ValueError(
933 'Pseudo Atoms containing cell have bad coords')
935 # Move mask
936 if 'move_mask' in fr_cols:
937 cnstr = images[0]._get_constraints()
938 if len(cnstr) > 0:
939 c0 = cnstr[0]
940 if isinstance(c0, FixAtoms):
941 cnstr = np.ones((natoms,), dtype=bool)
942 for idx in c0.index:
943 cnstr[idx] = False
944 elif isinstance(c0, FixCartesian):
945 masks = np.ones((natoms, 3), dtype=bool)
946 for i in range(len(cnstr)):
947 idx = cnstr[i].index
948 masks[idx] = cnstr[i].mask
949 cnstr = masks
950 else:
951 fr_cols.remove('move_mask')
953 # Collect data to be written out
954 arrays = {}
955 for column in fr_cols:
956 if column == 'positions':
957 arrays[column] = pos
958 elif column in atoms.arrays:
959 arrays[column] = atoms.arrays[column]
960 elif column == 'symbols':
961 arrays[column] = np.array(symbols)
962 elif column == 'move_mask':
963 arrays[column] = cnstr
964 else:
965 raise ValueError(f'Missing array "{column}"')
967 if write_results:
968 for key in per_atom_results:
969 if key not in fr_cols:
970 fr_cols += [key]
971 else:
972 warnings.warn('write_xyz() overwriting array "{}" present '
973 'in atoms.arrays with stored results '
974 'from calculator'.format(key))
975 arrays.update(per_atom_results)
977 comm, ncols, dtype, fmt = output_column_format(atoms,
978 fr_cols,
979 arrays,
980 write_info,
981 per_frame_results)
983 if plain or comment != '':
984 # override key/value pairs with user-speficied comment string
985 comm = comment.rstrip()
986 if '\n' in comm:
987 raise ValueError('Comment line should not have line breaks.')
989 # Pack fr_cols into record array
990 data = np.zeros(natoms, dtype)
991 for column, ncol in zip(fr_cols, ncols):
992 value = arrays[column]
993 if ncol == 1:
994 data[column] = np.squeeze(value)
995 else:
996 for c in range(ncol):
997 data[column + str(c)] = value[:, c]
999 nat = natoms
1000 if vec_cell:
1001 nat -= nPBC
1002 # Write the output
1003 fileobj.write('%d\n' % nat)
1004 fileobj.write(f'{comm}\n')
1005 for i in range(natoms):
1006 fileobj.write(fmt % tuple(data[i]))
1009# create aliases for read/write functions
1010read_extxyz = read_xyz
1011write_extxyz = write_xyz