Coverage for /builds/kinetik161/ase/ase/io/extxyz.py: 80.78%

1"""

2Extended XYZ support

4Read/write files in "extended" XYZ format, storing additional

5per-configuration information as key-value pairs on the XYZ

6comment line, and additional per-atom properties as extra columns.

8Contributed by James Kermode <james.kermode@gmail.com>

9"""

10import json

11import numbers

12import re

13import warnings

14from io import StringIO, UnsupportedOperation

16import numpy as np

18from ase.atoms import Atoms

19from ase.calculators.calculator import BaseCalculator, all_properties

20from ase.calculators.singlepoint import SinglePointCalculator

21from ase.constraints import FixAtoms, FixCartesian

22from ase.io.formats import index2range

23from ase.io.utils import ImageIterator

24from ase.parallel import paropen

25from ase.spacegroup.spacegroup import Spacegroup

26from ase.utils import reader

28__all__ = ['read_xyz', 'write_xyz', 'iread_xyz']

30PROPERTY_NAME_MAP = {'positions': 'pos',

31 'numbers': 'Z',

32 'charges': 'charge',

33 'symbols': 'species'}

35REV_PROPERTY_NAME_MAP = dict(zip(PROPERTY_NAME_MAP.values(),

36 PROPERTY_NAME_MAP.keys()))

38KEY_QUOTED_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)'

39 + r'\s*=\s*["\{\}]([^"\{\}]+)["\{\}]\s*')

40KEY_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_]*)\s*='

41 + r'\s*([^\s]+)\s*')

42KEY_RE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)\s*')

44UNPROCESSED_KEYS = ['uid']

46SPECIAL_3_3_KEYS = ['Lattice', 'virial', 'stress']

48# partition ase.calculators.calculator.all_properties into two lists:

49# 'per-atom' and 'per-config'

50per_atom_properties = ['forces', 'stresses', 'charges', 'magmoms', 'energies']

51per_config_properties = ['energy', 'stress', 'dipole', 'magmom', 'free_energy']

54def key_val_str_to_dict(string, sep=None):

55 """

56 Parse an xyz properties string in a key=value and return a dict with

57 various values parsed to native types.

59 Accepts brackets or quotes to delimit values. Parses integers, floats

60 booleans and arrays thereof. Arrays with 9 values whose name is listed

61 in SPECIAL_3_3_KEYS are converted to 3x3 arrays with Fortran ordering.

63 If sep is None, string will split on whitespace, otherwise will split

64 key value pairs with the given separator.

66 """

67 # store the closing delimiters to match opening ones

68 delimiters = {

69 "'": "'",

70 '"': '"',

71 '{': '}',

72 '[': ']'

73 }

75 # Make pairs and process afterwards

76 kv_pairs = [

77 [[]]] # List of characters for each entry, add a new list for new value

78 cur_delimiter = None # push and pop closing delimiters

79 escaped = False # add escaped sequences verbatim

81 # parse character-by-character unless someone can do nested brackets

82 # and escape sequences in a regex

83 for char in string.strip():

84 if escaped: # bypass everything if escaped

85 kv_pairs[-1][-1].append(char)

86 escaped = False

87 elif char == '\\': # escape the next thing

88 escaped = True

89 elif cur_delimiter: # inside brackets

90 if char == cur_delimiter: # found matching delimiter

91 cur_delimiter = None

92 else:

93 kv_pairs[-1][-1].append(char) # inside quotes, add verbatim

94 elif char in delimiters:

95 cur_delimiter = delimiters[char] # brackets or quotes

96 elif (sep is None and char.isspace()) or char == sep:

97 if kv_pairs == [[[]]]: # empty, beginning of string

98 continue

99 elif kv_pairs[-1][-1] == []:

100 continue

101 else:

102 kv_pairs.append([[]])

103 elif char == '=':

104 if kv_pairs[-1] == [[]]:

105 del kv_pairs[-1]

106 kv_pairs[-1].append([]) # value

107 else:

108 kv_pairs[-1][-1].append(char)

109

110 kv_dict = {}

111

112 for kv_pair in kv_pairs:

113 if len(kv_pair) == 0: # empty line

114 continue

115 elif len(kv_pair) == 1: # default to True

116 key, value = ''.join(kv_pair[0]), 'T'

117 else: # Smush anything else with kv-splitter '=' between them

118 key, value = ''.join(kv_pair[0]), '='.join(

119 ''.join(x) for x in kv_pair[1:])

120

121 if key.lower() not in UNPROCESSED_KEYS:

122 # Try to convert to (arrays of) floats, ints

123 split_value = re.findall(r'[^\s,]+', value)

124 try:

125 try:

126 numvalue = np.array(split_value, dtype=int)

127 except (ValueError, OverflowError):

128 # don't catch errors here so it falls through to bool

129 numvalue = np.array(split_value, dtype=float)

130 if len(numvalue) == 1:

131 numvalue = numvalue[0] # Only one number

132 value = numvalue

133 except (ValueError, OverflowError):

134 pass # value is unchanged

135

136 # convert special 3x3 matrices

137 if key in SPECIAL_3_3_KEYS:

138 if not isinstance(value, np.ndarray) or value.shape != (9,):

139 raise ValueError("Got info item {}, expecting special 3x3 "

140 "matrix, but value is not in the form of "

141 "a 9-long numerical vector".format(key))

142 value = np.array(value).reshape((3, 3), order='F')

143

144 # parse special strings as boolean or JSON

145 if isinstance(value, str):

146 # Parse boolean values:

147 # T or [tT]rue or TRUE -> True

148 # F or [fF]alse or FALSE -> False

149 # For list: 'T T F' -> [True, True, False]

150 # Cannot use `.lower()` to reduce `str_to_bool` mapping because

151 # 't'/'f' not accepted

152 str_to_bool = {

153 'T': True, 'F': False, 'true': True, 'false': False,

154 'True': True, 'False': False, 'TRUE': True, 'FALSE': False

155 }

156 try:

157 boolvalue = [str_to_bool[vpart] for vpart in

158 re.findall(r'[^\s,]+', value)]

159

160 if len(boolvalue) == 1:

161 value = boolvalue[0]

162 else:

163 value = boolvalue

164 except KeyError:

165 # Try to parse JSON

166 if value.startswith("_JSON "):

167 d = json.loads(value.replace("_JSON ", "", 1))

168 value = np.array(d)

169 if value.dtype.kind not in ['i', 'f', 'b']:

170 value = d

171

172 kv_dict[key] = value

173

174 return kv_dict

175

176

177def key_val_str_to_dict_regex(s):

178 """

179 Parse strings in the form 'key1=value1 key2="quoted value"'

180 """

181 d = {}

182 s = s.strip()

183 while True:

184 # Match quoted string first, then fall through to plain key=value

185 m = KEY_QUOTED_VALUE.match(s)

186 if m is None:

187 m = KEY_VALUE.match(s)

188 if m is not None:

189 s = KEY_VALUE.sub('', s, 1)

190 else:

191 # Just a key with no value

192 m = KEY_RE.match(s)

193 if m is not None:

194 s = KEY_RE.sub('', s, 1)

195 else:

196 s = KEY_QUOTED_VALUE.sub('', s, 1)

197

198 if m is None:

199 break # No more matches

200

201 key = m.group(1)

202 try:

203 value = m.group(2)

204 except IndexError:

205 # default value is 'T' (True)

206 value = 'T'

207

208 if key.lower() not in UNPROCESSED_KEYS:

209 # Try to convert to (arrays of) floats, ints

210 try:

211 numvalue = []

212 for x in value.split():

213 if x.find('.') == -1:

214 numvalue.append(int(float(x)))

215 else:

216 numvalue.append(float(x))

217 if len(numvalue) == 1:

218 numvalue = numvalue[0] # Only one number

219 elif len(numvalue) == 9:

220 # special case: 3x3 matrix, fortran ordering

221 numvalue = np.array(numvalue).reshape((3, 3), order='F')

222 else:

223 numvalue = np.array(numvalue) # vector

224 value = numvalue

225 except (ValueError, OverflowError):

226 pass

227

228 # Parse boolean values: 'T' -> True, 'F' -> False,

229 # 'T T F' -> [True, True, False]

230 if isinstance(value, str):

231 str_to_bool = {'T': True, 'F': False}

232

233 if len(value.split()) > 1:

234 if all(x in str_to_bool for x in value.split()):

235 value = [str_to_bool[x] for x in value.split()]

236 elif value in str_to_bool:

237 value = str_to_bool[value]

239 d[key] = value

241 return d

244def escape(string):

245 if (' ' in string or

246 '"' in string or "'" in string or

247 '{' in string or '}' in string or

248 '[' in string or ']' in string):

249 string = string.replace('"', '\\"')

250 string = f'"{string}"'

251 return string

252

253

254def key_val_dict_to_str(dct, sep=' '):

255 """

256 Convert atoms.info dictionary to extended XYZ string representation

257 """

258

259 def array_to_string(key, val):

260 # some ndarrays are special (special 3x3 keys, and scalars/vectors of

261 # numbers or bools), handle them here

262 if key in SPECIAL_3_3_KEYS:

263 # special 3x3 matrix, flatten in Fortran order

264 val = val.reshape(val.size, order='F')

265 if val.dtype.kind in ['i', 'f', 'b']:

266 # numerical or bool scalars/vectors are special, for backwards

267 # compat.

268 if len(val.shape) == 0:

269 # scalar

270 val = str(known_types_to_str(val))

271 elif len(val.shape) == 1:

272 # vector

273 val = ' '.join(str(known_types_to_str(v)) for v in val)

274 return val

275

276 def known_types_to_str(val):

277 if isinstance(val, (bool, np.bool_)):

278 return 'T' if val else 'F'

279 elif isinstance(val, numbers.Real):

280 return f'{val}'

281 elif isinstance(val, Spacegroup):

282 return val.symbol

283 else:

284 return val

285

286 if len(dct) == 0:

287 return ''

288

289 string = ''

290 for key in dct:

291 val = dct[key]

292

293 if isinstance(val, np.ndarray):

294 val = array_to_string(key, val)

295 else:

296 # convert any known types to string

297 val = known_types_to_str(val)

298

299 if val is not None and not isinstance(val, str):

300 # what's left is an object, try using JSON

301 if isinstance(val, np.ndarray):

302 val = val.tolist()

303 try:

304 val = '_JSON ' + json.dumps(val)

305 # if this fails, let give up

306 except TypeError:

307 warnings.warn('Skipping unhashable information '

308 '{}'.format(key))

309 continue

310

311 key = escape(key) # escape and quote key

312 eq = "="

313 # Should this really be setting empty value that's going to be

314 # interpreted as bool True?

315 if val is None:

316 val = ""

317 eq = ""

318 val = escape(val) # escape and quote val

319

320 string += f'{key}{eq}{val}{sep}'

321

322 return string.strip()

323

324

325def parse_properties(prop_str):

326 """

327 Parse extended XYZ properties format string

328

329 Format is "[NAME:TYPE:NCOLS]...]", e.g. "species:S:1:pos:R:3".

330 NAME is the name of the property.

331 TYPE is one of R, I, S, L for real, integer, string and logical.

332 NCOLS is number of columns for that property.

333 """

334

335 properties = {}

336 properties_list = []

337 dtypes = []

338 converters = []

339

340 fields = prop_str.split(':')

341

342 def parse_bool(x):

343 """

344 Parse bool to string

345 """

346 return {'T': True, 'F': False,

347 'True': True, 'False': False}.get(x)

348

349 fmt_map = {'R': ('d', float),

350 'I': ('i', int),

351 'S': (object, str),

352 'L': ('bool', parse_bool)}

353

354 for name, ptype, cols in zip(fields[::3],

355 fields[1::3],

356 [int(x) for x in fields[2::3]]):

357 if ptype not in ('R', 'I', 'S', 'L'):

358 raise ValueError('Unknown property type: ' + ptype)

359 ase_name = REV_PROPERTY_NAME_MAP.get(name, name)

360

361 dtype, converter = fmt_map[ptype]

362 if cols == 1:

363 dtypes.append((name, dtype))

364 converters.append(converter)

365 else:

366 for c in range(cols):

367 dtypes.append((name + str(c), dtype))

368 converters.append(converter)

369

370 properties[name] = (ase_name, cols)

371 properties_list.append(name)

372

373 dtype = np.dtype(dtypes)

374 return properties, properties_list, dtype, converters

375

376

377def _read_xyz_frame(lines, natoms, properties_parser=key_val_str_to_dict,

378 nvec=0):

379 # comment line

380 line = next(lines).strip()

381 if nvec > 0:

382 info = {'comment': line}

383 else:

384 info = properties_parser(line) if line else {}

385

386 pbc = None

387 if 'pbc' in info:

388 pbc = info.pop('pbc')

389 elif 'Lattice' in info:

390 # default pbc for extxyz file containing Lattice

391 # is True in all directions

392 pbc = [True, True, True]

393 elif nvec > 0:

394 # cell information given as pseudo-Atoms

395 pbc = [False, False, False]

396

397 cell = None

398 if 'Lattice' in info:

399 # NB: ASE cell is transpose of extended XYZ lattice

400 cell = info['Lattice'].T

401 del info['Lattice']

402 elif nvec > 0:

403 # cell information given as pseudo-Atoms

404 cell = np.zeros((3, 3))

405

406 if 'Properties' not in info:

407 # Default set of properties is atomic symbols and positions only

408 info['Properties'] = 'species:S:1:pos:R:3'

409 properties, names, dtype, convs = parse_properties(info['Properties'])

410 del info['Properties']

411

412 data = []

413 for ln in range(natoms):

414 try:

415 line = next(lines)

416 except StopIteration:

417 raise XYZError('ase.io.extxyz: Frame has {} atoms, expected {}'

418 .format(len(data), natoms))

419 vals = line.split()

420 row = tuple([conv(val) for conv, val in zip(convs, vals)])

421 data.append(row)

422

423 try:

424 data = np.array(data, dtype)

425 except TypeError:

426 raise XYZError('Badly formatted data '

427 'or end of file reached before end of frame')

428

429 # Read VEC entries if present

430 if nvec > 0:

431 for ln in range(nvec):

432 try:

433 line = next(lines)

434 except StopIteration:

435 raise XYZError('ase.io.adfxyz: Frame has {} cell vectors, '

436 'expected {}'.format(len(cell), nvec))

437 entry = line.split()

438

439 if not entry[0].startswith('VEC'):

440 raise XYZError(f'Expected cell vector, got {entry[0]}')

441

442 try:

443 n = int(entry[0][3:])

444 except ValueError as e:

445 raise XYZError('Expected VEC{}, got VEC{}'

446 .format(ln + 1, entry[0][3:])) from e

447 if n != ln + 1:

448 raise XYZError('Expected VEC{}, got VEC{}'

449 .format(ln + 1, n))

450

451 cell[ln] = np.array([float(x) for x in entry[1:]])

452 pbc[ln] = True

453 if nvec != pbc.count(True):

454 raise XYZError('Problem with number of cell vectors')

455 pbc = tuple(pbc)

456

457 arrays = {}

458 for name in names:

459 ase_name, cols = properties[name]

460 if cols == 1:

461 value = data[name]

462 else:

463 value = np.vstack([data[name + str(c)]

464 for c in range(cols)]).T

465 arrays[ase_name] = value

466

467 symbols = None

468 if 'symbols' in arrays:

469 symbols = [s.capitalize() for s in arrays['symbols']]

470 del arrays['symbols']

471

472 numbers = None

473 duplicate_numbers = None

474 if 'numbers' in arrays:

475 if symbols is None:

476 numbers = arrays['numbers']

477 else:

478 duplicate_numbers = arrays['numbers']

479 del arrays['numbers']

480

481 initial_charges = None

482 if 'initial_charges' in arrays:

483 initial_charges = arrays['initial_charges']

484 del arrays['initial_charges']

485

486 positions = None

487 if 'positions' in arrays:

488 positions = arrays['positions']

489 del arrays['positions']

490

491 atoms = Atoms(symbols=symbols,

492 positions=positions,

493 numbers=numbers,

494 charges=initial_charges,

495 cell=cell,

496 pbc=pbc,

497 info=info)

498

499 # Read and set constraints

500 if 'move_mask' in arrays:

501 move_mask = arrays['move_mask'].astype(bool)

502 if properties['move_mask'][1] == 3:

503 cons = []

504 for a in range(natoms):

505 cons.append(FixCartesian(a, mask=~move_mask[a, :]))

506 atoms.set_constraint(cons)

507 elif properties['move_mask'][1] == 1:

508 atoms.set_constraint(FixAtoms(mask=~move_mask))

509 else:

510 raise XYZError('Not implemented constraint')

511 del arrays['move_mask']

512

513 for name, array in arrays.items():

514 atoms.new_array(name, array)

515

516 if duplicate_numbers is not None:

517 atoms.set_atomic_numbers(duplicate_numbers)

518

519 # Load results of previous calculations into SinglePointCalculator

520 results = {}

521 for key in list(atoms.info.keys()):

522 if key in per_config_properties:

523 results[key] = atoms.info[key]

524 # special case for stress- convert to Voigt 6-element form

525 if key == 'stress' and results[key].shape == (3, 3):

526 stress = results[key]

527 stress = np.array([stress[0, 0],

528 stress[1, 1],

529 stress[2, 2],

530 stress[1, 2],

531 stress[0, 2],

532 stress[0, 1]])

533 results[key] = stress

534 for key in list(atoms.arrays.keys()):

535 if (key in per_atom_properties and len(value.shape) >= 1

536 and value.shape[0] == len(atoms)):

537 results[key] = atoms.arrays[key]

538 if results != {}:

539 calculator = SinglePointCalculator(atoms, **results)

540 atoms.calc = calculator

541 return atoms

542

543

544class XYZError(IOError):

545 pass

546

547

548class XYZChunk:

549 def __init__(self, lines, natoms):

550 self.lines = lines

551 self.natoms = natoms

552

553 def build(self):

554 """Convert unprocessed chunk into Atoms."""

555 return _read_xyz_frame(iter(self.lines), self.natoms)

556

557

558def ixyzchunks(fd):

559 """Yield unprocessed chunks (header, lines) for each xyz image."""

560 while True:

561 line = next(fd).strip() # Raises StopIteration on empty file

562 try:

563 natoms = int(line)

564 except ValueError:

565 raise XYZError(f'Expected integer, found "{line}"')

566 try:

567 lines = [next(fd) for _ in range(1 + natoms)]

568 except StopIteration:

569 raise XYZError('Incomplete XYZ chunk')

570 yield XYZChunk(lines, natoms)

571

572

573iread_xyz = ImageIterator(ixyzchunks)

574

575

576@reader

577def read_xyz(fileobj, index=-1, properties_parser=key_val_str_to_dict):

578 r"""

579 Read from a file in Extended XYZ format

580

581 index is the frame to read, default is last frame (index=-1).

582 properties_parser is the parse to use when converting the properties line

583 to a dictionary, ``extxyz.key_val_str_to_dict`` is the default and can

584 deal with most use cases, ``extxyz.key_val_str_to_dict_regex`` is slightly

585 faster but has fewer features.

586

587 Extended XYZ format is an enhanced version of the `basic XYZ format

588 <http://en.wikipedia.org/wiki/XYZ_file_format>`_ that allows extra

589 columns to be present in the file for additonal per-atom properties as

590 well as standardising the format of the comment line to include the

591 cell lattice and other per-frame parameters.

592

593 It's easiest to describe the format with an example. Here is a

594 standard XYZ file containing a bulk cubic 8 atom silicon cell ::

595

596 8

597 Cubic bulk silicon cell

598 Si 0.00000000 0.00000000 0.00000000

599 Si 1.36000000 1.36000000 1.36000000

600 Si 2.72000000 2.72000000 0.00000000

601 Si 4.08000000 4.08000000 1.36000000

602 Si 2.72000000 0.00000000 2.72000000

603 Si 4.08000000 1.36000000 4.08000000

604 Si 0.00000000 2.72000000 2.72000000

605 Si 1.36000000 4.08000000 4.08000000

606

607 The first line is the number of atoms, followed by a comment and

608 then one line per atom, giving the element symbol and cartesian

609 x y, and z coordinates in Angstroms.

610

611 Here's the same configuration in extended XYZ format ::

612

613 8

614 Lattice="5.44 0.0 0.0 0.0 5.44 0.0 0.0 0.0 5.44" Properties=species:S:1:pos:R:3 Time=0.0

615 Si 0.00000000 0.00000000 0.00000000

616 Si 1.36000000 1.36000000 1.36000000

617 Si 2.72000000 2.72000000 0.00000000

618 Si 4.08000000 4.08000000 1.36000000

619 Si 2.72000000 0.00000000 2.72000000

620 Si 4.08000000 1.36000000 4.08000000

621 Si 0.00000000 2.72000000 2.72000000

622 Si 1.36000000 4.08000000 4.08000000

623

624 In extended XYZ format, the comment line is replaced by a series of

625 key/value pairs. The keys should be strings and values can be

626 integers, reals, logicals (denoted by `T` and `F` for true and false)

627 or strings. Quotes are required if a value contains any spaces (like

628 `Lattice` above). There are two mandatory parameters that any

629 extended XYZ: `Lattice` and `Properties`. Other parameters --

630 e.g. `Time` in the example above --- can be added to the parameter line

631 as needed.

632

633 `Lattice` is a Cartesian 3x3 matrix representation of the cell

634 vectors, with each vector stored as a column and the 9 values listed in

635 Fortran column-major order, i.e. in the form ::

636

637 Lattice="R1x R1y R1z R2x R2y R2z R3x R3y R3z"

638

639 where `R1x R1y R1z` are the Cartesian x-, y- and z-components of the

640 first lattice vector (:math:`\mathbf{a}`), `R2x R2y R2z` those of the second

641 lattice vector (:math:`\mathbf{b}`) and `R3x R3y R3z` those of the

642 third lattice vector (:math:`\mathbf{c}`).

643

644 The list of properties in the file is described by the `Properties`

645 parameter, which should take the form of a series of colon separated

646 triplets giving the name, format (`R` for real, `I` for integer) and

647 number of columns of each property. For example::

648

649 Properties="species:S:1:pos:R:3:vel:R:3:select:I:1"

650

651 indicates the first column represents atomic species, the next three

652 columns represent atomic positions, the next three velcoities, and the

653 last is an single integer called `select`. With this property

654 definition, the line ::

655

656 Si 4.08000000 4.08000000 1.36000000 0.00000000 0.00000000 0.00000000 1

657

658 would describe a silicon atom at position (4.08,4.08,1.36) with zero

659 velocity and the `select` property set to 1.

660

661 The property names `pos`, `Z`, `mass`, and `charge` map to ASE

662 :attr:`ase.atoms.Atoms.arrays` entries named

663 `positions`, `numbers`, `masses` and `charges` respectively.

664

665 Additional key-value pairs in the comment line are parsed into the

666 :attr:`ase.Atoms.atoms.info` dictionary, with the following conventions

667

668 - Values can be quoted with `""`, `''`, `[]` or `{}` (the latter are

669 included to ease command-line usage as the `{}` are not treated

670 specially by the shell)

671 - Quotes within keys or values can be escaped with `\"`.

672 - Keys with special names `stress` or `virial` are treated as 3x3 matrices

673 in Fortran order, as for `Lattice` above.

674 - Otherwise, values with multiple elements are treated as 1D arrays, first

675 assuming integer format and falling back to float if conversion is

676 unsuccessful.

677 - A missing value defaults to `True`, e.g. the comment line

678 `"cutoff=3.4 have_energy"` leads to

679 `{'cutoff': 3.4, 'have_energy': True}` in `atoms.info`.

680 - Value strings starting with `"_JSON"` are interpreted as JSON content;

681 similarly, when writing, anything which does not match the criteria above

682 is serialised as JSON.

683

684 The extended XYZ format is also supported by the

685 the `Ovito <http://www.ovito.org>`_ visualisation tool

686 (from `v2.4 beta

687 <http://www.ovito.org/index.php/component/content/article?id=25>`_

688 onwards).

689 """ # noqa: E501

690

691 if not isinstance(index, int) and not isinstance(index, slice):

692 raise TypeError('Index argument is neither slice nor integer!')

693

694 # If possible, build a partial index up to the last frame required

695 last_frame = None

696 if isinstance(index, int) and index >= 0:

697 last_frame = index

698 elif isinstance(index, slice):

699 if index.stop is not None and index.stop >= 0:

700 last_frame = index.stop

701

702 # scan through file to find where the frames start

703 try:

704 fileobj.seek(0)

705 except UnsupportedOperation:

706 fileobj = StringIO(fileobj.read())

707 fileobj.seek(0)

708 frames = []

709 while True:

710 frame_pos = fileobj.tell()

711 line = fileobj.readline()

712 if line.strip() == '':

713 break

714 try:

715 natoms = int(line)

716 except ValueError as err:

717 raise XYZError('ase.io.extxyz: Expected xyz header but got: {}'

718 .format(err))

719 fileobj.readline() # read comment line

720 for i in range(natoms):

721 fileobj.readline()

722 # check for VEC

723 nvec = 0

724 while True:

725 lastPos = fileobj.tell()

726 line = fileobj.readline()

727 if line.lstrip().startswith('VEC'):

728 nvec += 1

729 if nvec > 3:

730 raise XYZError('ase.io.extxyz: More than 3 VECX entries')

731 else:

732 fileobj.seek(lastPos)

733 break

734 frames.append((frame_pos, natoms, nvec))

735 if last_frame is not None and len(frames) > last_frame:

736 break

737

738 trbl = index2range(index, len(frames))

739

740 for index in trbl:

741 frame_pos, natoms, nvec = frames[index]

742 fileobj.seek(frame_pos)

743 # check for consistency with frame index table

744 assert int(fileobj.readline()) == natoms

745 yield _read_xyz_frame(fileobj, natoms, properties_parser, nvec)

746

747

748def output_column_format(atoms, columns, arrays,

749 write_info=True, results=None):

750 """

751 Helper function to build extended XYZ comment line

752 """

753 fmt_map = {'d': ('R', '%16.8f'),

754 'f': ('R', '%16.8f'),

755 'i': ('I', '%8d'),

756 'O': ('S', '%s'),

757 'S': ('S', '%s'),

758 'U': ('S', '%-2s'),

759 'b': ('L', ' %.1s')}

760

761 # NB: Lattice is stored as tranpose of ASE cell,

762 # with Fortran array ordering

763 lattice_str = ('Lattice="'

764 + ' '.join([str(x) for x in np.reshape(atoms.cell.T,

765 9, order='F')]) +

766 '"')

767

768 property_names = []

769 property_types = []

770 property_ncols = []

771 dtypes = []

772 formats = []

773

774 for column in columns:

775 array = arrays[column]

776 dtype = array.dtype

777

778 property_name = PROPERTY_NAME_MAP.get(column, column)

779 property_type, fmt = fmt_map[dtype.kind]

780 property_names.append(property_name)

781 property_types.append(property_type)

782

783 if (len(array.shape) == 1

784 or (len(array.shape) == 2 and array.shape[1] == 1)):

785 ncol = 1

786 dtypes.append((column, dtype))

787 else:

788 ncol = array.shape[1]

789 for c in range(ncol):

790 dtypes.append((column + str(c), dtype))

791

792 formats.extend([fmt] * ncol)

793 property_ncols.append(ncol)

794

795 props_str = ':'.join([':'.join(x) for x in

796 zip(property_names,

797 property_types,

798 [str(nc) for nc in property_ncols])])

799

800 comment_str = ''

801 if atoms.cell.any():

802 comment_str += lattice_str + ' '

803 comment_str += f'Properties={props_str}'

804

805 info = {}

806 if write_info:

807 info.update(atoms.info)

808 if results is not None:

809 info.update(results)

810 info['pbc'] = atoms.get_pbc() # always save periodic boundary conditions

811 comment_str += ' ' + key_val_dict_to_str(info)

812

813 dtype = np.dtype(dtypes)

814 fmt = ' '.join(formats) + '\n'

815

816 return comment_str, property_ncols, dtype, fmt

817

818

819def write_xyz(fileobj, images, comment='', columns=None,

820 write_info=True,

821 write_results=True, plain=False, vec_cell=False,

822 append=False):

823 """

824 Write output in extended XYZ format

825

826 Optionally, specify which columns (arrays) to include in output,

827 whether to write the contents of the `atoms.info` dict to the

828 XYZ comment line (default is True), the results of any

829 calculator attached to this Atoms. The `plain` argument

830 can be used to write a simple XYZ file with no additional information.

831 `vec_cell` can be used to write the cell vectors as additional

832 pseudo-atoms. If `append` is set to True, the file is for append (mode `a`),

833 otherwise it is overwritten (mode `w`).

834

835 See documentation for :func:`read_xyz()` for further details of the extended

836 XYZ file format.

837 """

838 if isinstance(fileobj, str):

839 mode = 'w'

840 if append:

841 mode = 'a'

842 fileobj = paropen(fileobj, mode)

843

844 if hasattr(images, 'get_positions'):

845 images = [images]

846

847 for atoms in images:

848 natoms = len(atoms)

849

850 if columns is None:

851 fr_cols = None

852 else:

853 fr_cols = columns[:]

854

855 if fr_cols is None:

856 fr_cols = (['symbols', 'positions']

857 + [key for key in atoms.arrays.keys() if

858 key not in ['symbols', 'positions', 'numbers',

859 'species', 'pos']])

860

861 if vec_cell:

862 plain = True

863

864 if plain:

865 fr_cols = ['symbols', 'positions']

866 write_info = False

867 write_results = False

868

869 per_frame_results = {}

870 per_atom_results = {}

871 if write_results:

872 calculator = atoms.calc

873 if (calculator is not None

874 and isinstance(calculator, BaseCalculator)):

875 for key in all_properties:

876 value = calculator.results.get(key, None)

877 if value is None:

878 # skip missing calculator results

879 continue

880 if (key in per_atom_properties and len(value.shape) >= 1

881 and value.shape[0] == len(atoms)):

882 # per-atom quantities (forces, energies, stresses)

883 per_atom_results[key] = value

884 elif key in per_config_properties:

885 # per-frame quantities (energy, stress)

886 # special case for stress, which should be converted

887 # to 3x3 matrices before writing

888 if key == 'stress':

889 xx, yy, zz, yz, xz, xy = value

890 value = np.array(

891 [(xx, xy, xz), (xy, yy, yz), (xz, yz, zz)])

892 per_frame_results[key] = value

893

894 # Move symbols and positions to first two properties

895 if 'symbols' in fr_cols:

896 i = fr_cols.index('symbols')

897 fr_cols[0], fr_cols[i] = fr_cols[i], fr_cols[0]

898

899 if 'positions' in fr_cols:

900 i = fr_cols.index('positions')

901 fr_cols[1], fr_cols[i] = fr_cols[i], fr_cols[1]

902

903 # Check first column "looks like" atomic symbols

904 if fr_cols[0] in atoms.arrays:

905 symbols = atoms.arrays[fr_cols[0]]

906 else:

907 symbols = atoms.get_chemical_symbols()

908

909 if natoms > 0 and not isinstance(symbols[0], str):

910 raise ValueError('First column must be symbols-like')

911

912 # Check second column "looks like" atomic positions

913 pos = atoms.arrays[fr_cols[1]]

914 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f':

915 raise ValueError('Second column must be position-like')

916

917 # if vec_cell add cell information as pseudo-atoms

918 if vec_cell:

919 pbc = list(atoms.get_pbc())

920 cell = atoms.get_cell()

921

922 if True in pbc:

923 nPBC = 0

924 for i, b in enumerate(pbc):

925 if b:

926 nPBC += 1

927 symbols.append('VEC' + str(nPBC))

928 pos = np.vstack((pos, cell[i]))

929 # add to natoms

930 natoms += nPBC

931 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f':

932 raise ValueError(

933 'Pseudo Atoms containing cell have bad coords')

934

935 # Move mask

936 if 'move_mask' in fr_cols:

937 cnstr = images[0]._get_constraints()

938 if len(cnstr) > 0:

939 c0 = cnstr[0]

940 if isinstance(c0, FixAtoms):

941 cnstr = np.ones((natoms,), dtype=bool)

942 for idx in c0.index:

943 cnstr[idx] = False

944 elif isinstance(c0, FixCartesian):

945 masks = np.ones((natoms, 3), dtype=bool)

946 for i in range(len(cnstr)):

947 idx = cnstr[i].index

948 masks[idx] = cnstr[i].mask

949 cnstr = masks

950 else:

951 fr_cols.remove('move_mask')

952

953 # Collect data to be written out

954 arrays = {}

955 for column in fr_cols:

956 if column == 'positions':

957 arrays[column] = pos

958 elif column in atoms.arrays:

959 arrays[column] = atoms.arrays[column]

960 elif column == 'symbols':

961 arrays[column] = np.array(symbols)

962 elif column == 'move_mask':

963 arrays[column] = cnstr

964 else:

965 raise ValueError(f'Missing array "{column}"')

966

967 if write_results:

968 for key in per_atom_results:

969 if key not in fr_cols:

970 fr_cols += [key]

971 else:

972 warnings.warn('write_xyz() overwriting array "{}" present '

973 'in atoms.arrays with stored results '

974 'from calculator'.format(key))

975 arrays.update(per_atom_results)

976

977 comm, ncols, dtype, fmt = output_column_format(atoms,

978 fr_cols,

979 arrays,

980 write_info,

981 per_frame_results)

982

983 if plain or comment != '':

984 # override key/value pairs with user-speficied comment string

985 comm = comment.rstrip()

986 if '\n' in comm:

987 raise ValueError('Comment line should not have line breaks.')

988

989 # Pack fr_cols into record array

990 data = np.zeros(natoms, dtype)

991 for column, ncol in zip(fr_cols, ncols):

992 value = arrays[column]

993 if ncol == 1:

994 data[column] = np.squeeze(value)

995 else:

996 for c in range(ncol):

997 data[column + str(c)] = value[:, c]

998

999 nat = natoms

1000 if vec_cell:

1001 nat -= nPBC

1002 # Write the output

1003 fileobj.write('%d\n' % nat)

1004 fileobj.write(f'{comm}\n')

1005 for i in range(natoms):

1006 fileobj.write(fmt % tuple(data[i]))

1007

1008

1009# create aliases for read/write functions

1010read_extxyz = read_xyz

1011write_extxyz = write_xyz