Coverage for /builds/kinetik161/ase/ase/io/extxyz.py: 80.78%

541 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-12-10 11:04 +0000

1""" 

2Extended XYZ support 

3 

4Read/write files in "extended" XYZ format, storing additional 

5per-configuration information as key-value pairs on the XYZ 

6comment line, and additional per-atom properties as extra columns. 

7 

8Contributed by James Kermode <james.kermode@gmail.com> 

9""" 

10import json 

11import numbers 

12import re 

13import warnings 

14from io import StringIO, UnsupportedOperation 

15 

16import numpy as np 

17 

18from ase.atoms import Atoms 

19from ase.calculators.calculator import BaseCalculator, all_properties 

20from ase.calculators.singlepoint import SinglePointCalculator 

21from ase.constraints import FixAtoms, FixCartesian 

22from ase.io.formats import index2range 

23from ase.io.utils import ImageIterator 

24from ase.parallel import paropen 

25from ase.spacegroup.spacegroup import Spacegroup 

26from ase.utils import reader 

27 

28__all__ = ['read_xyz', 'write_xyz', 'iread_xyz'] 

29 

30PROPERTY_NAME_MAP = {'positions': 'pos', 

31 'numbers': 'Z', 

32 'charges': 'charge', 

33 'symbols': 'species'} 

34 

35REV_PROPERTY_NAME_MAP = dict(zip(PROPERTY_NAME_MAP.values(), 

36 PROPERTY_NAME_MAP.keys())) 

37 

38KEY_QUOTED_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)' 

39 + r'\s*=\s*["\{\}]([^"\{\}]+)["\{\}]\s*') 

40KEY_VALUE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_]*)\s*=' 

41 + r'\s*([^\s]+)\s*') 

42KEY_RE = re.compile(r'([A-Za-z_]+[A-Za-z0-9_-]*)\s*') 

43 

44UNPROCESSED_KEYS = ['uid'] 

45 

46SPECIAL_3_3_KEYS = ['Lattice', 'virial', 'stress'] 

47 

48# partition ase.calculators.calculator.all_properties into two lists: 

49# 'per-atom' and 'per-config' 

50per_atom_properties = ['forces', 'stresses', 'charges', 'magmoms', 'energies'] 

51per_config_properties = ['energy', 'stress', 'dipole', 'magmom', 'free_energy'] 

52 

53 

54def key_val_str_to_dict(string, sep=None): 

55 """ 

56 Parse an xyz properties string in a key=value and return a dict with 

57 various values parsed to native types. 

58 

59 Accepts brackets or quotes to delimit values. Parses integers, floats 

60 booleans and arrays thereof. Arrays with 9 values whose name is listed 

61 in SPECIAL_3_3_KEYS are converted to 3x3 arrays with Fortran ordering. 

62 

63 If sep is None, string will split on whitespace, otherwise will split 

64 key value pairs with the given separator. 

65 

66 """ 

67 # store the closing delimiters to match opening ones 

68 delimiters = { 

69 "'": "'", 

70 '"': '"', 

71 '{': '}', 

72 '[': ']' 

73 } 

74 

75 # Make pairs and process afterwards 

76 kv_pairs = [ 

77 [[]]] # List of characters for each entry, add a new list for new value 

78 cur_delimiter = None # push and pop closing delimiters 

79 escaped = False # add escaped sequences verbatim 

80 

81 # parse character-by-character unless someone can do nested brackets 

82 # and escape sequences in a regex 

83 for char in string.strip(): 

84 if escaped: # bypass everything if escaped 

85 kv_pairs[-1][-1].append(char) 

86 escaped = False 

87 elif char == '\\': # escape the next thing 

88 escaped = True 

89 elif cur_delimiter: # inside brackets 

90 if char == cur_delimiter: # found matching delimiter 

91 cur_delimiter = None 

92 else: 

93 kv_pairs[-1][-1].append(char) # inside quotes, add verbatim 

94 elif char in delimiters: 

95 cur_delimiter = delimiters[char] # brackets or quotes 

96 elif (sep is None and char.isspace()) or char == sep: 

97 if kv_pairs == [[[]]]: # empty, beginning of string 

98 continue 

99 elif kv_pairs[-1][-1] == []: 

100 continue 

101 else: 

102 kv_pairs.append([[]]) 

103 elif char == '=': 

104 if kv_pairs[-1] == [[]]: 

105 del kv_pairs[-1] 

106 kv_pairs[-1].append([]) # value 

107 else: 

108 kv_pairs[-1][-1].append(char) 

109 

110 kv_dict = {} 

111 

112 for kv_pair in kv_pairs: 

113 if len(kv_pair) == 0: # empty line 

114 continue 

115 elif len(kv_pair) == 1: # default to True 

116 key, value = ''.join(kv_pair[0]), 'T' 

117 else: # Smush anything else with kv-splitter '=' between them 

118 key, value = ''.join(kv_pair[0]), '='.join( 

119 ''.join(x) for x in kv_pair[1:]) 

120 

121 if key.lower() not in UNPROCESSED_KEYS: 

122 # Try to convert to (arrays of) floats, ints 

123 split_value = re.findall(r'[^\s,]+', value) 

124 try: 

125 try: 

126 numvalue = np.array(split_value, dtype=int) 

127 except (ValueError, OverflowError): 

128 # don't catch errors here so it falls through to bool 

129 numvalue = np.array(split_value, dtype=float) 

130 if len(numvalue) == 1: 

131 numvalue = numvalue[0] # Only one number 

132 value = numvalue 

133 except (ValueError, OverflowError): 

134 pass # value is unchanged 

135 

136 # convert special 3x3 matrices 

137 if key in SPECIAL_3_3_KEYS: 

138 if not isinstance(value, np.ndarray) or value.shape != (9,): 

139 raise ValueError("Got info item {}, expecting special 3x3 " 

140 "matrix, but value is not in the form of " 

141 "a 9-long numerical vector".format(key)) 

142 value = np.array(value).reshape((3, 3), order='F') 

143 

144 # parse special strings as boolean or JSON 

145 if isinstance(value, str): 

146 # Parse boolean values: 

147 # T or [tT]rue or TRUE -> True 

148 # F or [fF]alse or FALSE -> False 

149 # For list: 'T T F' -> [True, True, False] 

150 # Cannot use `.lower()` to reduce `str_to_bool` mapping because 

151 # 't'/'f' not accepted 

152 str_to_bool = { 

153 'T': True, 'F': False, 'true': True, 'false': False, 

154 'True': True, 'False': False, 'TRUE': True, 'FALSE': False 

155 } 

156 try: 

157 boolvalue = [str_to_bool[vpart] for vpart in 

158 re.findall(r'[^\s,]+', value)] 

159 

160 if len(boolvalue) == 1: 

161 value = boolvalue[0] 

162 else: 

163 value = boolvalue 

164 except KeyError: 

165 # Try to parse JSON 

166 if value.startswith("_JSON "): 

167 d = json.loads(value.replace("_JSON ", "", 1)) 

168 value = np.array(d) 

169 if value.dtype.kind not in ['i', 'f', 'b']: 

170 value = d 

171 

172 kv_dict[key] = value 

173 

174 return kv_dict 

175 

176 

177def key_val_str_to_dict_regex(s): 

178 """ 

179 Parse strings in the form 'key1=value1 key2="quoted value"' 

180 """ 

181 d = {} 

182 s = s.strip() 

183 while True: 

184 # Match quoted string first, then fall through to plain key=value 

185 m = KEY_QUOTED_VALUE.match(s) 

186 if m is None: 

187 m = KEY_VALUE.match(s) 

188 if m is not None: 

189 s = KEY_VALUE.sub('', s, 1) 

190 else: 

191 # Just a key with no value 

192 m = KEY_RE.match(s) 

193 if m is not None: 

194 s = KEY_RE.sub('', s, 1) 

195 else: 

196 s = KEY_QUOTED_VALUE.sub('', s, 1) 

197 

198 if m is None: 

199 break # No more matches 

200 

201 key = m.group(1) 

202 try: 

203 value = m.group(2) 

204 except IndexError: 

205 # default value is 'T' (True) 

206 value = 'T' 

207 

208 if key.lower() not in UNPROCESSED_KEYS: 

209 # Try to convert to (arrays of) floats, ints 

210 try: 

211 numvalue = [] 

212 for x in value.split(): 

213 if x.find('.') == -1: 

214 numvalue.append(int(float(x))) 

215 else: 

216 numvalue.append(float(x)) 

217 if len(numvalue) == 1: 

218 numvalue = numvalue[0] # Only one number 

219 elif len(numvalue) == 9: 

220 # special case: 3x3 matrix, fortran ordering 

221 numvalue = np.array(numvalue).reshape((3, 3), order='F') 

222 else: 

223 numvalue = np.array(numvalue) # vector 

224 value = numvalue 

225 except (ValueError, OverflowError): 

226 pass 

227 

228 # Parse boolean values: 'T' -> True, 'F' -> False, 

229 # 'T T F' -> [True, True, False] 

230 if isinstance(value, str): 

231 str_to_bool = {'T': True, 'F': False} 

232 

233 if len(value.split()) > 1: 

234 if all(x in str_to_bool for x in value.split()): 

235 value = [str_to_bool[x] for x in value.split()] 

236 elif value in str_to_bool: 

237 value = str_to_bool[value] 

238 

239 d[key] = value 

240 

241 return d 

242 

243 

244def escape(string): 

245 if (' ' in string or 

246 '"' in string or "'" in string or 

247 '{' in string or '}' in string or 

248 '[' in string or ']' in string): 

249 string = string.replace('"', '\\"') 

250 string = f'"{string}"' 

251 return string 

252 

253 

254def key_val_dict_to_str(dct, sep=' '): 

255 """ 

256 Convert atoms.info dictionary to extended XYZ string representation 

257 """ 

258 

259 def array_to_string(key, val): 

260 # some ndarrays are special (special 3x3 keys, and scalars/vectors of 

261 # numbers or bools), handle them here 

262 if key in SPECIAL_3_3_KEYS: 

263 # special 3x3 matrix, flatten in Fortran order 

264 val = val.reshape(val.size, order='F') 

265 if val.dtype.kind in ['i', 'f', 'b']: 

266 # numerical or bool scalars/vectors are special, for backwards 

267 # compat. 

268 if len(val.shape) == 0: 

269 # scalar 

270 val = str(known_types_to_str(val)) 

271 elif len(val.shape) == 1: 

272 # vector 

273 val = ' '.join(str(known_types_to_str(v)) for v in val) 

274 return val 

275 

276 def known_types_to_str(val): 

277 if isinstance(val, (bool, np.bool_)): 

278 return 'T' if val else 'F' 

279 elif isinstance(val, numbers.Real): 

280 return f'{val}' 

281 elif isinstance(val, Spacegroup): 

282 return val.symbol 

283 else: 

284 return val 

285 

286 if len(dct) == 0: 

287 return '' 

288 

289 string = '' 

290 for key in dct: 

291 val = dct[key] 

292 

293 if isinstance(val, np.ndarray): 

294 val = array_to_string(key, val) 

295 else: 

296 # convert any known types to string 

297 val = known_types_to_str(val) 

298 

299 if val is not None and not isinstance(val, str): 

300 # what's left is an object, try using JSON 

301 if isinstance(val, np.ndarray): 

302 val = val.tolist() 

303 try: 

304 val = '_JSON ' + json.dumps(val) 

305 # if this fails, let give up 

306 except TypeError: 

307 warnings.warn('Skipping unhashable information ' 

308 '{}'.format(key)) 

309 continue 

310 

311 key = escape(key) # escape and quote key 

312 eq = "=" 

313 # Should this really be setting empty value that's going to be 

314 # interpreted as bool True? 

315 if val is None: 

316 val = "" 

317 eq = "" 

318 val = escape(val) # escape and quote val 

319 

320 string += f'{key}{eq}{val}{sep}' 

321 

322 return string.strip() 

323 

324 

325def parse_properties(prop_str): 

326 """ 

327 Parse extended XYZ properties format string 

328 

329 Format is "[NAME:TYPE:NCOLS]...]", e.g. "species:S:1:pos:R:3". 

330 NAME is the name of the property. 

331 TYPE is one of R, I, S, L for real, integer, string and logical. 

332 NCOLS is number of columns for that property. 

333 """ 

334 

335 properties = {} 

336 properties_list = [] 

337 dtypes = [] 

338 converters = [] 

339 

340 fields = prop_str.split(':') 

341 

342 def parse_bool(x): 

343 """ 

344 Parse bool to string 

345 """ 

346 return {'T': True, 'F': False, 

347 'True': True, 'False': False}.get(x) 

348 

349 fmt_map = {'R': ('d', float), 

350 'I': ('i', int), 

351 'S': (object, str), 

352 'L': ('bool', parse_bool)} 

353 

354 for name, ptype, cols in zip(fields[::3], 

355 fields[1::3], 

356 [int(x) for x in fields[2::3]]): 

357 if ptype not in ('R', 'I', 'S', 'L'): 

358 raise ValueError('Unknown property type: ' + ptype) 

359 ase_name = REV_PROPERTY_NAME_MAP.get(name, name) 

360 

361 dtype, converter = fmt_map[ptype] 

362 if cols == 1: 

363 dtypes.append((name, dtype)) 

364 converters.append(converter) 

365 else: 

366 for c in range(cols): 

367 dtypes.append((name + str(c), dtype)) 

368 converters.append(converter) 

369 

370 properties[name] = (ase_name, cols) 

371 properties_list.append(name) 

372 

373 dtype = np.dtype(dtypes) 

374 return properties, properties_list, dtype, converters 

375 

376 

377def _read_xyz_frame(lines, natoms, properties_parser=key_val_str_to_dict, 

378 nvec=0): 

379 # comment line 

380 line = next(lines).strip() 

381 if nvec > 0: 

382 info = {'comment': line} 

383 else: 

384 info = properties_parser(line) if line else {} 

385 

386 pbc = None 

387 if 'pbc' in info: 

388 pbc = info.pop('pbc') 

389 elif 'Lattice' in info: 

390 # default pbc for extxyz file containing Lattice 

391 # is True in all directions 

392 pbc = [True, True, True] 

393 elif nvec > 0: 

394 # cell information given as pseudo-Atoms 

395 pbc = [False, False, False] 

396 

397 cell = None 

398 if 'Lattice' in info: 

399 # NB: ASE cell is transpose of extended XYZ lattice 

400 cell = info['Lattice'].T 

401 del info['Lattice'] 

402 elif nvec > 0: 

403 # cell information given as pseudo-Atoms 

404 cell = np.zeros((3, 3)) 

405 

406 if 'Properties' not in info: 

407 # Default set of properties is atomic symbols and positions only 

408 info['Properties'] = 'species:S:1:pos:R:3' 

409 properties, names, dtype, convs = parse_properties(info['Properties']) 

410 del info['Properties'] 

411 

412 data = [] 

413 for ln in range(natoms): 

414 try: 

415 line = next(lines) 

416 except StopIteration: 

417 raise XYZError('ase.io.extxyz: Frame has {} atoms, expected {}' 

418 .format(len(data), natoms)) 

419 vals = line.split() 

420 row = tuple([conv(val) for conv, val in zip(convs, vals)]) 

421 data.append(row) 

422 

423 try: 

424 data = np.array(data, dtype) 

425 except TypeError: 

426 raise XYZError('Badly formatted data ' 

427 'or end of file reached before end of frame') 

428 

429 # Read VEC entries if present 

430 if nvec > 0: 

431 for ln in range(nvec): 

432 try: 

433 line = next(lines) 

434 except StopIteration: 

435 raise XYZError('ase.io.adfxyz: Frame has {} cell vectors, ' 

436 'expected {}'.format(len(cell), nvec)) 

437 entry = line.split() 

438 

439 if not entry[0].startswith('VEC'): 

440 raise XYZError(f'Expected cell vector, got {entry[0]}') 

441 

442 try: 

443 n = int(entry[0][3:]) 

444 except ValueError as e: 

445 raise XYZError('Expected VEC{}, got VEC{}' 

446 .format(ln + 1, entry[0][3:])) from e 

447 if n != ln + 1: 

448 raise XYZError('Expected VEC{}, got VEC{}' 

449 .format(ln + 1, n)) 

450 

451 cell[ln] = np.array([float(x) for x in entry[1:]]) 

452 pbc[ln] = True 

453 if nvec != pbc.count(True): 

454 raise XYZError('Problem with number of cell vectors') 

455 pbc = tuple(pbc) 

456 

457 arrays = {} 

458 for name in names: 

459 ase_name, cols = properties[name] 

460 if cols == 1: 

461 value = data[name] 

462 else: 

463 value = np.vstack([data[name + str(c)] 

464 for c in range(cols)]).T 

465 arrays[ase_name] = value 

466 

467 symbols = None 

468 if 'symbols' in arrays: 

469 symbols = [s.capitalize() for s in arrays['symbols']] 

470 del arrays['symbols'] 

471 

472 numbers = None 

473 duplicate_numbers = None 

474 if 'numbers' in arrays: 

475 if symbols is None: 

476 numbers = arrays['numbers'] 

477 else: 

478 duplicate_numbers = arrays['numbers'] 

479 del arrays['numbers'] 

480 

481 initial_charges = None 

482 if 'initial_charges' in arrays: 

483 initial_charges = arrays['initial_charges'] 

484 del arrays['initial_charges'] 

485 

486 positions = None 

487 if 'positions' in arrays: 

488 positions = arrays['positions'] 

489 del arrays['positions'] 

490 

491 atoms = Atoms(symbols=symbols, 

492 positions=positions, 

493 numbers=numbers, 

494 charges=initial_charges, 

495 cell=cell, 

496 pbc=pbc, 

497 info=info) 

498 

499 # Read and set constraints 

500 if 'move_mask' in arrays: 

501 move_mask = arrays['move_mask'].astype(bool) 

502 if properties['move_mask'][1] == 3: 

503 cons = [] 

504 for a in range(natoms): 

505 cons.append(FixCartesian(a, mask=~move_mask[a, :])) 

506 atoms.set_constraint(cons) 

507 elif properties['move_mask'][1] == 1: 

508 atoms.set_constraint(FixAtoms(mask=~move_mask)) 

509 else: 

510 raise XYZError('Not implemented constraint') 

511 del arrays['move_mask'] 

512 

513 for name, array in arrays.items(): 

514 atoms.new_array(name, array) 

515 

516 if duplicate_numbers is not None: 

517 atoms.set_atomic_numbers(duplicate_numbers) 

518 

519 # Load results of previous calculations into SinglePointCalculator 

520 results = {} 

521 for key in list(atoms.info.keys()): 

522 if key in per_config_properties: 

523 results[key] = atoms.info[key] 

524 # special case for stress- convert to Voigt 6-element form 

525 if key == 'stress' and results[key].shape == (3, 3): 

526 stress = results[key] 

527 stress = np.array([stress[0, 0], 

528 stress[1, 1], 

529 stress[2, 2], 

530 stress[1, 2], 

531 stress[0, 2], 

532 stress[0, 1]]) 

533 results[key] = stress 

534 for key in list(atoms.arrays.keys()): 

535 if (key in per_atom_properties and len(value.shape) >= 1 

536 and value.shape[0] == len(atoms)): 

537 results[key] = atoms.arrays[key] 

538 if results != {}: 

539 calculator = SinglePointCalculator(atoms, **results) 

540 atoms.calc = calculator 

541 return atoms 

542 

543 

544class XYZError(IOError): 

545 pass 

546 

547 

548class XYZChunk: 

549 def __init__(self, lines, natoms): 

550 self.lines = lines 

551 self.natoms = natoms 

552 

553 def build(self): 

554 """Convert unprocessed chunk into Atoms.""" 

555 return _read_xyz_frame(iter(self.lines), self.natoms) 

556 

557 

558def ixyzchunks(fd): 

559 """Yield unprocessed chunks (header, lines) for each xyz image.""" 

560 while True: 

561 line = next(fd).strip() # Raises StopIteration on empty file 

562 try: 

563 natoms = int(line) 

564 except ValueError: 

565 raise XYZError(f'Expected integer, found "{line}"') 

566 try: 

567 lines = [next(fd) for _ in range(1 + natoms)] 

568 except StopIteration: 

569 raise XYZError('Incomplete XYZ chunk') 

570 yield XYZChunk(lines, natoms) 

571 

572 

573iread_xyz = ImageIterator(ixyzchunks) 

574 

575 

576@reader 

577def read_xyz(fileobj, index=-1, properties_parser=key_val_str_to_dict): 

578 r""" 

579 Read from a file in Extended XYZ format 

580 

581 index is the frame to read, default is last frame (index=-1). 

582 properties_parser is the parse to use when converting the properties line 

583 to a dictionary, ``extxyz.key_val_str_to_dict`` is the default and can 

584 deal with most use cases, ``extxyz.key_val_str_to_dict_regex`` is slightly 

585 faster but has fewer features. 

586 

587 Extended XYZ format is an enhanced version of the `basic XYZ format 

588 <http://en.wikipedia.org/wiki/XYZ_file_format>`_ that allows extra 

589 columns to be present in the file for additonal per-atom properties as 

590 well as standardising the format of the comment line to include the 

591 cell lattice and other per-frame parameters. 

592 

593 It's easiest to describe the format with an example. Here is a 

594 standard XYZ file containing a bulk cubic 8 atom silicon cell :: 

595 

596 8 

597 Cubic bulk silicon cell 

598 Si 0.00000000 0.00000000 0.00000000 

599 Si 1.36000000 1.36000000 1.36000000 

600 Si 2.72000000 2.72000000 0.00000000 

601 Si 4.08000000 4.08000000 1.36000000 

602 Si 2.72000000 0.00000000 2.72000000 

603 Si 4.08000000 1.36000000 4.08000000 

604 Si 0.00000000 2.72000000 2.72000000 

605 Si 1.36000000 4.08000000 4.08000000 

606 

607 The first line is the number of atoms, followed by a comment and 

608 then one line per atom, giving the element symbol and cartesian 

609 x y, and z coordinates in Angstroms. 

610 

611 Here's the same configuration in extended XYZ format :: 

612 

613 8 

614 Lattice="5.44 0.0 0.0 0.0 5.44 0.0 0.0 0.0 5.44" Properties=species:S:1:pos:R:3 Time=0.0 

615 Si 0.00000000 0.00000000 0.00000000 

616 Si 1.36000000 1.36000000 1.36000000 

617 Si 2.72000000 2.72000000 0.00000000 

618 Si 4.08000000 4.08000000 1.36000000 

619 Si 2.72000000 0.00000000 2.72000000 

620 Si 4.08000000 1.36000000 4.08000000 

621 Si 0.00000000 2.72000000 2.72000000 

622 Si 1.36000000 4.08000000 4.08000000 

623 

624 In extended XYZ format, the comment line is replaced by a series of 

625 key/value pairs. The keys should be strings and values can be 

626 integers, reals, logicals (denoted by `T` and `F` for true and false) 

627 or strings. Quotes are required if a value contains any spaces (like 

628 `Lattice` above). There are two mandatory parameters that any 

629 extended XYZ: `Lattice` and `Properties`. Other parameters -- 

630 e.g. `Time` in the example above --- can be added to the parameter line 

631 as needed. 

632 

633 `Lattice` is a Cartesian 3x3 matrix representation of the cell 

634 vectors, with each vector stored as a column and the 9 values listed in 

635 Fortran column-major order, i.e. in the form :: 

636 

637 Lattice="R1x R1y R1z R2x R2y R2z R3x R3y R3z" 

638 

639 where `R1x R1y R1z` are the Cartesian x-, y- and z-components of the 

640 first lattice vector (:math:`\mathbf{a}`), `R2x R2y R2z` those of the second 

641 lattice vector (:math:`\mathbf{b}`) and `R3x R3y R3z` those of the 

642 third lattice vector (:math:`\mathbf{c}`). 

643 

644 The list of properties in the file is described by the `Properties` 

645 parameter, which should take the form of a series of colon separated 

646 triplets giving the name, format (`R` for real, `I` for integer) and 

647 number of columns of each property. For example:: 

648 

649 Properties="species:S:1:pos:R:3:vel:R:3:select:I:1" 

650 

651 indicates the first column represents atomic species, the next three 

652 columns represent atomic positions, the next three velcoities, and the 

653 last is an single integer called `select`. With this property 

654 definition, the line :: 

655 

656 Si 4.08000000 4.08000000 1.36000000 0.00000000 0.00000000 0.00000000 1 

657 

658 would describe a silicon atom at position (4.08,4.08,1.36) with zero 

659 velocity and the `select` property set to 1. 

660 

661 The property names `pos`, `Z`, `mass`, and `charge` map to ASE 

662 :attr:`ase.atoms.Atoms.arrays` entries named 

663 `positions`, `numbers`, `masses` and `charges` respectively. 

664 

665 Additional key-value pairs in the comment line are parsed into the 

666 :attr:`ase.Atoms.atoms.info` dictionary, with the following conventions 

667 

668 - Values can be quoted with `""`, `''`, `[]` or `{}` (the latter are 

669 included to ease command-line usage as the `{}` are not treated 

670 specially by the shell) 

671 - Quotes within keys or values can be escaped with `\"`. 

672 - Keys with special names `stress` or `virial` are treated as 3x3 matrices 

673 in Fortran order, as for `Lattice` above. 

674 - Otherwise, values with multiple elements are treated as 1D arrays, first 

675 assuming integer format and falling back to float if conversion is 

676 unsuccessful. 

677 - A missing value defaults to `True`, e.g. the comment line 

678 `"cutoff=3.4 have_energy"` leads to 

679 `{'cutoff': 3.4, 'have_energy': True}` in `atoms.info`. 

680 - Value strings starting with `"_JSON"` are interpreted as JSON content; 

681 similarly, when writing, anything which does not match the criteria above 

682 is serialised as JSON. 

683 

684 The extended XYZ format is also supported by the 

685 the `Ovito <http://www.ovito.org>`_ visualisation tool 

686 (from `v2.4 beta 

687 <http://www.ovito.org/index.php/component/content/article?id=25>`_ 

688 onwards). 

689 """ # noqa: E501 

690 

691 if not isinstance(index, int) and not isinstance(index, slice): 

692 raise TypeError('Index argument is neither slice nor integer!') 

693 

694 # If possible, build a partial index up to the last frame required 

695 last_frame = None 

696 if isinstance(index, int) and index >= 0: 

697 last_frame = index 

698 elif isinstance(index, slice): 

699 if index.stop is not None and index.stop >= 0: 

700 last_frame = index.stop 

701 

702 # scan through file to find where the frames start 

703 try: 

704 fileobj.seek(0) 

705 except UnsupportedOperation: 

706 fileobj = StringIO(fileobj.read()) 

707 fileobj.seek(0) 

708 frames = [] 

709 while True: 

710 frame_pos = fileobj.tell() 

711 line = fileobj.readline() 

712 if line.strip() == '': 

713 break 

714 try: 

715 natoms = int(line) 

716 except ValueError as err: 

717 raise XYZError('ase.io.extxyz: Expected xyz header but got: {}' 

718 .format(err)) 

719 fileobj.readline() # read comment line 

720 for i in range(natoms): 

721 fileobj.readline() 

722 # check for VEC 

723 nvec = 0 

724 while True: 

725 lastPos = fileobj.tell() 

726 line = fileobj.readline() 

727 if line.lstrip().startswith('VEC'): 

728 nvec += 1 

729 if nvec > 3: 

730 raise XYZError('ase.io.extxyz: More than 3 VECX entries') 

731 else: 

732 fileobj.seek(lastPos) 

733 break 

734 frames.append((frame_pos, natoms, nvec)) 

735 if last_frame is not None and len(frames) > last_frame: 

736 break 

737 

738 trbl = index2range(index, len(frames)) 

739 

740 for index in trbl: 

741 frame_pos, natoms, nvec = frames[index] 

742 fileobj.seek(frame_pos) 

743 # check for consistency with frame index table 

744 assert int(fileobj.readline()) == natoms 

745 yield _read_xyz_frame(fileobj, natoms, properties_parser, nvec) 

746 

747 

748def output_column_format(atoms, columns, arrays, 

749 write_info=True, results=None): 

750 """ 

751 Helper function to build extended XYZ comment line 

752 """ 

753 fmt_map = {'d': ('R', '%16.8f'), 

754 'f': ('R', '%16.8f'), 

755 'i': ('I', '%8d'), 

756 'O': ('S', '%s'), 

757 'S': ('S', '%s'), 

758 'U': ('S', '%-2s'), 

759 'b': ('L', ' %.1s')} 

760 

761 # NB: Lattice is stored as tranpose of ASE cell, 

762 # with Fortran array ordering 

763 lattice_str = ('Lattice="' 

764 + ' '.join([str(x) for x in np.reshape(atoms.cell.T, 

765 9, order='F')]) + 

766 '"') 

767 

768 property_names = [] 

769 property_types = [] 

770 property_ncols = [] 

771 dtypes = [] 

772 formats = [] 

773 

774 for column in columns: 

775 array = arrays[column] 

776 dtype = array.dtype 

777 

778 property_name = PROPERTY_NAME_MAP.get(column, column) 

779 property_type, fmt = fmt_map[dtype.kind] 

780 property_names.append(property_name) 

781 property_types.append(property_type) 

782 

783 if (len(array.shape) == 1 

784 or (len(array.shape) == 2 and array.shape[1] == 1)): 

785 ncol = 1 

786 dtypes.append((column, dtype)) 

787 else: 

788 ncol = array.shape[1] 

789 for c in range(ncol): 

790 dtypes.append((column + str(c), dtype)) 

791 

792 formats.extend([fmt] * ncol) 

793 property_ncols.append(ncol) 

794 

795 props_str = ':'.join([':'.join(x) for x in 

796 zip(property_names, 

797 property_types, 

798 [str(nc) for nc in property_ncols])]) 

799 

800 comment_str = '' 

801 if atoms.cell.any(): 

802 comment_str += lattice_str + ' ' 

803 comment_str += f'Properties={props_str}' 

804 

805 info = {} 

806 if write_info: 

807 info.update(atoms.info) 

808 if results is not None: 

809 info.update(results) 

810 info['pbc'] = atoms.get_pbc() # always save periodic boundary conditions 

811 comment_str += ' ' + key_val_dict_to_str(info) 

812 

813 dtype = np.dtype(dtypes) 

814 fmt = ' '.join(formats) + '\n' 

815 

816 return comment_str, property_ncols, dtype, fmt 

817 

818 

819def write_xyz(fileobj, images, comment='', columns=None, 

820 write_info=True, 

821 write_results=True, plain=False, vec_cell=False, 

822 append=False): 

823 """ 

824 Write output in extended XYZ format 

825 

826 Optionally, specify which columns (arrays) to include in output, 

827 whether to write the contents of the `atoms.info` dict to the 

828 XYZ comment line (default is True), the results of any 

829 calculator attached to this Atoms. The `plain` argument 

830 can be used to write a simple XYZ file with no additional information. 

831 `vec_cell` can be used to write the cell vectors as additional 

832 pseudo-atoms. If `append` is set to True, the file is for append (mode `a`), 

833 otherwise it is overwritten (mode `w`). 

834 

835 See documentation for :func:`read_xyz()` for further details of the extended 

836 XYZ file format. 

837 """ 

838 if isinstance(fileobj, str): 

839 mode = 'w' 

840 if append: 

841 mode = 'a' 

842 fileobj = paropen(fileobj, mode) 

843 

844 if hasattr(images, 'get_positions'): 

845 images = [images] 

846 

847 for atoms in images: 

848 natoms = len(atoms) 

849 

850 if columns is None: 

851 fr_cols = None 

852 else: 

853 fr_cols = columns[:] 

854 

855 if fr_cols is None: 

856 fr_cols = (['symbols', 'positions'] 

857 + [key for key in atoms.arrays.keys() if 

858 key not in ['symbols', 'positions', 'numbers', 

859 'species', 'pos']]) 

860 

861 if vec_cell: 

862 plain = True 

863 

864 if plain: 

865 fr_cols = ['symbols', 'positions'] 

866 write_info = False 

867 write_results = False 

868 

869 per_frame_results = {} 

870 per_atom_results = {} 

871 if write_results: 

872 calculator = atoms.calc 

873 if (calculator is not None 

874 and isinstance(calculator, BaseCalculator)): 

875 for key in all_properties: 

876 value = calculator.results.get(key, None) 

877 if value is None: 

878 # skip missing calculator results 

879 continue 

880 if (key in per_atom_properties and len(value.shape) >= 1 

881 and value.shape[0] == len(atoms)): 

882 # per-atom quantities (forces, energies, stresses) 

883 per_atom_results[key] = value 

884 elif key in per_config_properties: 

885 # per-frame quantities (energy, stress) 

886 # special case for stress, which should be converted 

887 # to 3x3 matrices before writing 

888 if key == 'stress': 

889 xx, yy, zz, yz, xz, xy = value 

890 value = np.array( 

891 [(xx, xy, xz), (xy, yy, yz), (xz, yz, zz)]) 

892 per_frame_results[key] = value 

893 

894 # Move symbols and positions to first two properties 

895 if 'symbols' in fr_cols: 

896 i = fr_cols.index('symbols') 

897 fr_cols[0], fr_cols[i] = fr_cols[i], fr_cols[0] 

898 

899 if 'positions' in fr_cols: 

900 i = fr_cols.index('positions') 

901 fr_cols[1], fr_cols[i] = fr_cols[i], fr_cols[1] 

902 

903 # Check first column "looks like" atomic symbols 

904 if fr_cols[0] in atoms.arrays: 

905 symbols = atoms.arrays[fr_cols[0]] 

906 else: 

907 symbols = atoms.get_chemical_symbols() 

908 

909 if natoms > 0 and not isinstance(symbols[0], str): 

910 raise ValueError('First column must be symbols-like') 

911 

912 # Check second column "looks like" atomic positions 

913 pos = atoms.arrays[fr_cols[1]] 

914 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f': 

915 raise ValueError('Second column must be position-like') 

916 

917 # if vec_cell add cell information as pseudo-atoms 

918 if vec_cell: 

919 pbc = list(atoms.get_pbc()) 

920 cell = atoms.get_cell() 

921 

922 if True in pbc: 

923 nPBC = 0 

924 for i, b in enumerate(pbc): 

925 if b: 

926 nPBC += 1 

927 symbols.append('VEC' + str(nPBC)) 

928 pos = np.vstack((pos, cell[i])) 

929 # add to natoms 

930 natoms += nPBC 

931 if pos.shape != (natoms, 3) or pos.dtype.kind != 'f': 

932 raise ValueError( 

933 'Pseudo Atoms containing cell have bad coords') 

934 

935 # Move mask 

936 if 'move_mask' in fr_cols: 

937 cnstr = images[0]._get_constraints() 

938 if len(cnstr) > 0: 

939 c0 = cnstr[0] 

940 if isinstance(c0, FixAtoms): 

941 cnstr = np.ones((natoms,), dtype=bool) 

942 for idx in c0.index: 

943 cnstr[idx] = False 

944 elif isinstance(c0, FixCartesian): 

945 masks = np.ones((natoms, 3), dtype=bool) 

946 for i in range(len(cnstr)): 

947 idx = cnstr[i].index 

948 masks[idx] = cnstr[i].mask 

949 cnstr = masks 

950 else: 

951 fr_cols.remove('move_mask') 

952 

953 # Collect data to be written out 

954 arrays = {} 

955 for column in fr_cols: 

956 if column == 'positions': 

957 arrays[column] = pos 

958 elif column in atoms.arrays: 

959 arrays[column] = atoms.arrays[column] 

960 elif column == 'symbols': 

961 arrays[column] = np.array(symbols) 

962 elif column == 'move_mask': 

963 arrays[column] = cnstr 

964 else: 

965 raise ValueError(f'Missing array "{column}"') 

966 

967 if write_results: 

968 for key in per_atom_results: 

969 if key not in fr_cols: 

970 fr_cols += [key] 

971 else: 

972 warnings.warn('write_xyz() overwriting array "{}" present ' 

973 'in atoms.arrays with stored results ' 

974 'from calculator'.format(key)) 

975 arrays.update(per_atom_results) 

976 

977 comm, ncols, dtype, fmt = output_column_format(atoms, 

978 fr_cols, 

979 arrays, 

980 write_info, 

981 per_frame_results) 

982 

983 if plain or comment != '': 

984 # override key/value pairs with user-speficied comment string 

985 comm = comment.rstrip() 

986 if '\n' in comm: 

987 raise ValueError('Comment line should not have line breaks.') 

988 

989 # Pack fr_cols into record array 

990 data = np.zeros(natoms, dtype) 

991 for column, ncol in zip(fr_cols, ncols): 

992 value = arrays[column] 

993 if ncol == 1: 

994 data[column] = np.squeeze(value) 

995 else: 

996 for c in range(ncol): 

997 data[column + str(c)] = value[:, c] 

998 

999 nat = natoms 

1000 if vec_cell: 

1001 nat -= nPBC 

1002 # Write the output 

1003 fileobj.write('%d\n' % nat) 

1004 fileobj.write(f'{comm}\n') 

1005 for i in range(natoms): 

1006 fileobj.write(fmt % tuple(data[i])) 

1007 

1008 

1009# create aliases for read/write functions 

1010read_extxyz = read_xyz 

1011write_extxyz = write_xyz