Coverage for /builds/kinetik161/ase/ase/io/bundletrajectory.py: 72.40%

576 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-12-10 11:04 +0000

1"""bundletrajectory - a module for I/O from large MD simulations. 

2 

3The BundleTrajectory class writes trajectory into a directory with the 

4following structure, except we now use JSON instead of pickle, 

5so this text needs updating:: 

6 

7 filename.bundle (dir) 

8 metadata.json Data about the file format, and about which 

9 data is present. 

10 frames The number of frames (ascii file) 

11 F0 (dir) Frame number 0 

12 smalldata.ulm Small data structures in a dictionary 

13 (pbc, cell, ...) 

14 numbers.ulm Atomic numbers 

15 positions.ulm Positions 

16 momenta.ulm Momenta 

17 ... 

18 F1 (dir) 

19 

20There is a folder for each frame, and the data is in the ASE Ulm format. 

21""" 

22 

23import os 

24import shutil 

25import sys 

26import time 

27from pathlib import Path 

28 

29import numpy as np 

30 

31from ase import Atoms 

32from ase.calculators.singlepoint import (PropertyNotImplementedError, 

33 SinglePointCalculator) 

34# The system json module causes memory leaks! Use ase's own. 

35# import json 

36from ase.io import jsonio 

37from ase.io.ulm import open as ulmopen 

38from ase.parallel import barrier, paropen, world 

39 

40 

41class BundleTrajectory: 

42 """Reads and writes atoms into a .bundle directory. 

43 

44 The BundleTrajectory is an alternative way of storing 

45 trajectories, intended for large-scale molecular dynamics 

46 simulations, where a single flat file becomes unwieldy. Instead, 

47 the data is stored in directory, a 'bundle' (the name bundle is 

48 inspired from bundles in Mac OS, which are really just directories 

49 the user is supposed to think of as a single file-like unit). 

50 

51 Parameters: 

52 

53 filename: 

54 The name of the directory. Preferably ending in .bundle. 

55 

56 mode (optional): 

57 The file opening mode. 'r' means open for reading, 'w' for 

58 writing and 'a' for appending. Default: 'r'. If opening in 

59 write mode, and the filename already exists, the old file is 

60 renamed to .bak (any old .bak file is deleted), except if the 

61 existing file is empty. 

62 

63 atoms (optional): 

64 The atoms that will be written. Can only be specified in 

65 write or append mode. If not specified, the atoms must be 

66 given as an argument to the .write() method instead. 

67 

68 backup=True: 

69 Use backup=False to disable renaming of an existing file. 

70 

71 backend='ulm': 

72 Request a backend. Currently only 'ulm' is supported. 

73 Only honored when writing. 

74 

75 singleprecision=False: 

76 Store floating point data in single precision (ulm backend only). 

77 """ 

78 slavelog = True # Log from all nodes 

79 

80 def __init__(self, filename, mode='r', atoms=None, backup=True, 

81 backend='ulm', singleprecision=False): 

82 self.state = 'constructing' 

83 self.filename = filename 

84 self.pre_observers = [] # callback functions before write is performed 

85 self.post_observers = [] # callback functions after write is performed 

86 self.master = world.rank == 0 

87 self.extra_data = [] 

88 self.singleprecision = singleprecision 

89 self._set_defaults() 

90 if mode == 'r': 

91 if atoms is not None: 

92 raise ValueError('You cannot specify atoms in read mode.') 

93 self._open_read() 

94 elif mode == 'w': 

95 self._open_write(atoms, backup, backend) 

96 elif mode == 'a': 

97 self._open_append(atoms) 

98 else: 

99 raise ValueError('Unknown mode: ' + str(mode)) 

100 

101 def _set_defaults(self): 

102 "Set default values for internal parameters." 

103 self.version = 1 

104 self.subtype = 'normal' 

105 self.datatypes = {'positions': True, 

106 'numbers': 'once', 

107 'tags': 'once', 

108 'masses': 'once', 

109 'momenta': True, 

110 'forces': True, 

111 'energy': True, 

112 'energies': False, 

113 'stress': False, 

114 'magmoms': True} 

115 

116 def _set_backend(self, backend): 

117 """Set the backed doing the actual I/O.""" 

118 if backend is not None: 

119 self.backend_name = backend 

120 

121 if self.backend_name == 'ulm': 

122 self.backend = UlmBundleBackend(self.master, self.singleprecision) 

123 else: 

124 raise NotImplementedError( 

125 'This version of ASE cannot use BundleTrajectory ' 

126 'with backend "%s"' % self.backend_name) 

127 

128 def write(self, atoms=None): 

129 """Write the atoms to the file. 

130 

131 If the atoms argument is not given, the atoms object specified 

132 when creating the trajectory object is used. 

133 """ 

134 # Check that we are in write mode 

135 if self.state == 'prewrite': 

136 self.state = 'write' 

137 assert self.nframes == 0 

138 elif self.state != 'write': 

139 raise RuntimeError('Cannot write in ' + self.state + ' mode.') 

140 

141 if atoms is None: 

142 atoms = self.atoms 

143 

144 for image in atoms.iterimages(): 

145 self._write_atoms(image) 

146 

147 def _write_atoms(self, atoms): 

148 # OK, it is a real atoms object. Write it. 

149 self._call_observers(self.pre_observers) 

150 self.log('Beginning to write frame ' + str(self.nframes)) 

151 framedir = self._make_framedir(self.nframes) 

152 

153 # Check which data should be written the first time: 

154 # Modify datatypes so any element of type 'once' becomes true 

155 # for the first frame but false for subsequent frames. 

156 datatypes = {} 

157 for k, v in self.datatypes.items(): 

158 if v == 'once': 

159 v = (self.nframes == 0) 

160 datatypes[k] = v 

161 

162 # Write 'small' data structures. They are written jointly. 

163 smalldata = {'pbc': atoms.get_pbc(), 

164 'cell': atoms.get_cell(), 

165 'natoms': atoms.get_global_number_of_atoms(), 

166 'constraints': atoms.constraints} 

167 if datatypes.get('energy'): 

168 try: 

169 smalldata['energy'] = atoms.get_potential_energy() 

170 except (RuntimeError, PropertyNotImplementedError): 

171 self.datatypes['energy'] = False 

172 if datatypes.get('stress'): 

173 try: 

174 smalldata['stress'] = atoms.get_stress() 

175 except PropertyNotImplementedError: 

176 self.datatypes['stress'] = False 

177 self.backend.write_small(framedir, smalldata) 

178 

179 # Write the large arrays. 

180 if datatypes.get('positions'): 

181 self.backend.write(framedir, 'positions', atoms.get_positions()) 

182 if datatypes.get('numbers'): 

183 self.backend.write(framedir, 'numbers', atoms.get_atomic_numbers()) 

184 if datatypes.get('tags'): 

185 if atoms.has('tags'): 

186 self.backend.write(framedir, 'tags', atoms.get_tags()) 

187 else: 

188 self.datatypes['tags'] = False 

189 if datatypes.get('masses'): 

190 if atoms.has('masses'): 

191 self.backend.write(framedir, 'masses', atoms.get_masses()) 

192 else: 

193 self.datatypes['masses'] = False 

194 if datatypes.get('momenta'): 

195 if atoms.has('momenta'): 

196 self.backend.write(framedir, 'momenta', atoms.get_momenta()) 

197 else: 

198 self.datatypes['momenta'] = False 

199 if datatypes.get('magmoms'): 

200 if atoms.has('initial_magmoms'): 

201 self.backend.write(framedir, 'magmoms', 

202 atoms.get_initial_magnetic_moments()) 

203 else: 

204 self.datatypes['magmoms'] = False 

205 if datatypes.get('forces'): 

206 try: 

207 x = atoms.get_forces() 

208 except (RuntimeError, PropertyNotImplementedError): 

209 self.datatypes['forces'] = False 

210 else: 

211 self.backend.write(framedir, 'forces', x) 

212 del x 

213 if datatypes.get('energies'): 

214 try: 

215 x = atoms.get_potential_energies() 

216 except (RuntimeError, PropertyNotImplementedError): 

217 self.datatypes['energies'] = False 

218 else: 

219 self.backend.write(framedir, 'energies', x) 

220 del x 

221 # Write any extra data 

222 for (label, source, once) in self.extra_data: 

223 if self.nframes == 0 or not once: 

224 if source is not None: 

225 x = source() 

226 else: 

227 x = atoms.get_array(label) 

228 self.backend.write(framedir, label, x) 

229 del x 

230 if once: 

231 self.datatypes[label] = 'once' 

232 else: 

233 self.datatypes[label] = True 

234 # Finally, write metadata if it is the first frame 

235 if self.nframes == 0: 

236 metadata = {'datatypes': self.datatypes} 

237 self._write_metadata(metadata) 

238 self._write_nframes(self.nframes + 1) 

239 self._call_observers(self.post_observers) 

240 self.log('Done writing frame ' + str(self.nframes)) 

241 self.nframes += 1 

242 

243 def select_data(self, data, value): 

244 """Selects if a given data type should be written. 

245 

246 Data can be written in every frame (specify True), in the 

247 first frame only (specify 'only') or not at all (specify 

248 False). Not all data types support the 'only' keyword, if not 

249 supported it is interpreted as True. 

250 

251 The following data types are supported, the letter in parenthesis 

252 indicates the default: 

253 

254 positions (T), numbers (O), tags (O), masses (O), momenta (T), 

255 forces (T), energy (T), energies (F), stress (F), magmoms (T) 

256 

257 If a given property is not present during the first write, it 

258 will be not be saved at all. 

259 """ 

260 if value not in (True, False, 'once'): 

261 raise ValueError('Unknown write mode') 

262 if data not in self.datatypes: 

263 raise ValueError('Unsupported data type: ' + data) 

264 self.datatypes[data] = value 

265 

266 def set_extra_data(self, name, source=None, once=False): 

267 """Adds extra data to be written. 

268 

269 Parameters: 

270 name: The name of the data. 

271 

272 source (optional): If specified, a callable object returning 

273 the data to be written. If not specified it is instead 

274 assumed that the atoms contains the data as an array of the 

275 same name. 

276 

277 once (optional): If specified and True, the data will only be 

278 written to the first frame. 

279 """ 

280 self.extra_data.append((name, source, once)) 

281 

282 def close(self): 

283 "Closes the trajectory." 

284 self.state = 'closed' 

285 lf = getattr(self, 'logfile', None) 

286 self.backend.close(log=lf) 

287 if lf is not None: 

288 lf.close() 

289 del self.logfile 

290 

291 def log(self, text): 

292 """Write to the log file in the bundle. 

293 

294 Logging is only possible in write/append mode. 

295 

296 This function is mainly for internal use, but can also be called by 

297 the user. 

298 """ 

299 if not (self.master or self.slavelog): 

300 return 

301 text = time.asctime() + ': ' + text 

302 if hasattr(self, 'logfile'): 

303 # Logging enabled 

304 if self.logfile is None: 

305 # Logfile not yet open 

306 try: 

307 self.logdata.append(text) 

308 except AttributeError: 

309 self.logdata = [text] 

310 else: 

311 self.logfile.write(text + '\n') 

312 self.logfile.flush() 

313 else: 

314 raise RuntimeError('Cannot write to log file in mode ' + 

315 self.state) 

316 

317 # __getitem__ is the main reading method. 

318 def __getitem__(self, n): 

319 return self._read(n) 

320 

321 def _read(self, n): 

322 """Read an atoms object from the BundleTrajectory.""" 

323 if self.state != 'read': 

324 raise OSError(f'Cannot read in {self.state} mode') 

325 if n < 0: 

326 n += self.nframes 

327 if n < 0 or n >= self.nframes: 

328 raise IndexError('Trajectory index %d out of range [0, %d[' 

329 % (n, self.nframes)) 

330 

331 framedir = os.path.join(self.filename, 'F' + str(n)) 

332 framezero = os.path.join(self.filename, 'F0') 

333 smalldata = self.backend.read_small(framedir) 

334 data = {} 

335 data['pbc'] = smalldata['pbc'] 

336 data['cell'] = smalldata['cell'] 

337 data['constraint'] = smalldata['constraints'] 

338 if self.subtype == 'split': 

339 self.backend.set_fragments(smalldata['fragments']) 

340 self.atom_id, dummy = self.backend.read_split(framedir, 'ID') 

341 else: 

342 self.atom_id = None 

343 atoms = Atoms(**data) 

344 natoms = smalldata['natoms'] 

345 for name in ('positions', 'numbers', 'tags', 'masses', 

346 'momenta'): 

347 if self.datatypes.get(name): 

348 atoms.arrays[name] = self._read_data(framezero, framedir, 

349 name, self.atom_id) 

350 assert len(atoms.arrays[name]) == natoms 

351 

352 # Create the atoms object 

353 if self.datatypes.get('energy'): 

354 if self.datatypes.get('forces'): 

355 forces = self.backend.read(framedir, 'forces') 

356 else: 

357 forces = None 

358 if self.datatypes.get('magmoms'): 

359 magmoms = self.backend.read(framedir, 'magmoms') 

360 else: 

361 magmoms = None 

362 calc = SinglePointCalculator(atoms, 

363 energy=smalldata.get('energy'), 

364 forces=forces, 

365 stress=smalldata.get('stress'), 

366 magmoms=magmoms) 

367 atoms.calc = calc 

368 return atoms 

369 

370 def read_extra_data(self, name, n=0): 

371 """Read extra data stored alongside the atoms. 

372 

373 Currently only used to read data stored by an NPT dynamics object. 

374 The data is not associated with individual atoms. 

375 """ 

376 if self.state != 'read': 

377 raise OSError(f'Cannot read extra data in {self.state} mode') 

378 # Handle negative n. 

379 if n < 0: 

380 n += self.nframes 

381 if n < 0 or n >= self.nframes: 

382 raise IndexError('Trajectory index %d out of range [0, %d[' 

383 % (n, self.nframes)) 

384 framedir = os.path.join(self.filename, 'F' + str(n)) 

385 framezero = os.path.join(self.filename, 'F0') 

386 return self._read_data(framezero, framedir, name, self.atom_id) 

387 

388 def _read_data(self, f0, f, name, atom_id): 

389 "Read single data item." 

390 

391 if self.subtype == 'normal': 

392 if self.datatypes[name] == 'once': 

393 d = self.backend.read(f0, name) 

394 else: 

395 d = self.backend.read(f, name) 

396 elif self.subtype == 'split': 

397 if self.datatypes[name] == 'once': 

398 d, issplit = self.backend.read_split(f0, name) 

399 atom_id, dummy = self.backend.read_split(f0, 'ID') 

400 else: 

401 d, issplit = self.backend.read_split(f, name) 

402 if issplit: 

403 assert atom_id is not None 

404 assert len(d) == len(atom_id) 

405 d[atom_id] = np.array(d) 

406 return d 

407 

408 def __len__(self): 

409 return self.nframes 

410 

411 def _open_log(self): 

412 if not (self.master or self.slavelog): 

413 return 

414 if self.master: 

415 lfn = os.path.join(self.filename, 'log.txt') 

416 else: 

417 lfn = os.path.join(self.filename, ('log-node%d.txt' % 

418 (world.rank,))) 

419 self.logfile = open(lfn, 'a', 1) # Append to log if it exists. 

420 if hasattr(self, 'logdata'): 

421 for text in self.logdata: 

422 self.logfile.write(text + '\n') 

423 self.logfile.flush() 

424 del self.logdata 

425 

426 def _open_write(self, atoms, backup, backend): 

427 "Open a bundle trajectory for writing." 

428 self._set_backend(backend) 

429 self.logfile = None # enable delayed logging 

430 self.atoms = atoms 

431 if os.path.exists(self.filename): 

432 # The output directory already exists. 

433 barrier() # all must have time to see it exists 

434 if not self.is_bundle(self.filename, allowempty=True): 

435 raise OSError( 

436 'Filename "' + self.filename + 

437 '" already exists, but is not a BundleTrajectory.' + 

438 ' Cowardly refusing to remove it.') 

439 if self.is_empty_bundle(self.filename): 

440 barrier() 

441 self.log(f'Deleting old "{self.filename}" as it is empty') 

442 self.delete_bundle(self.filename) 

443 elif not backup: 

444 barrier() 

445 self.log( 

446 f'Deleting old "{self.filename}" as backup is turned off.') 

447 self.delete_bundle(self.filename) 

448 else: 

449 barrier() 

450 # Make a backup file 

451 bakname = self.filename + '.bak' 

452 if os.path.exists(bakname): 

453 barrier() # All must see it exists 

454 self.log(f'Deleting old backup file "{bakname}"') 

455 self.delete_bundle(bakname) 

456 self.log(f'Renaming "{self.filename}" to "{bakname}"') 

457 self._rename_bundle(self.filename, bakname) 

458 # Ready to create a new bundle. 

459 barrier() 

460 self.log(f'Creating new "{self.filename}"') 

461 self._make_bundledir(self.filename) 

462 self.state = 'prewrite' 

463 self._write_metadata({}) 

464 self._write_nframes(0) # Mark new bundle as empty 

465 self._open_log() 

466 self.nframes = 0 

467 

468 def _open_read(self): 

469 "Open a bundle trajectory for reading." 

470 if not os.path.exists(self.filename): 

471 raise OSError('File not found: ' + self.filename) 

472 if not self.is_bundle(self.filename): 

473 raise OSError('Not a BundleTrajectory: ' + self.filename) 

474 self.state = 'read' 

475 # Read the metadata 

476 metadata = self._read_metadata() 

477 self.metadata = metadata 

478 if metadata['version'] > self.version: 

479 raise NotImplementedError( 

480 'This version of ASE cannot read a BundleTrajectory version ' + 

481 str(metadata['version'])) 

482 if metadata['subtype'] not in ('normal', 'split'): 

483 raise NotImplementedError( 

484 'This version of ASE cannot read BundleTrajectory subtype ' + 

485 metadata['subtype']) 

486 self.subtype = metadata['subtype'] 

487 if metadata['backend'] == 'ulm': 

488 self.singleprecision = metadata['ulm.singleprecision'] 

489 self._set_backend(metadata['backend']) 

490 self.nframes = self._read_nframes() 

491 if self.nframes == 0: 

492 raise OSError('Empty BundleTrajectory') 

493 self.datatypes = metadata['datatypes'] 

494 try: 

495 self.pythonmajor = metadata['python_ver'][0] 

496 except KeyError: 

497 self.pythonmajor = 2 # Assume written with Python 2. 

498 # We need to know if we are running Python 3.X and try to read 

499 # a bundle written with Python 2.X 

500 self.backend.readpy2 = (self.pythonmajor == 2) 

501 self.state = 'read' 

502 

503 def _open_append(self, atoms): 

504 if not os.path.exists(self.filename): 

505 # OK, no old bundle. Open as for write instead. 

506 barrier() 

507 self._open_write(atoms, False) 

508 return 

509 if not self.is_bundle(self.filename): 

510 raise OSError('Not a BundleTrajectory: ' + self.filename) 

511 self.state = 'read' 

512 metadata = self._read_metadata() 

513 self.metadata = metadata 

514 if metadata['version'] != self.version: 

515 raise NotImplementedError( 

516 'Cannot append to a BundleTrajectory version ' 

517 '%s (supported version is %s)' % (str(metadata['version']), 

518 str(self.version))) 

519 if metadata['subtype'] not in ('normal', 'split'): 

520 raise NotImplementedError( 

521 'This version of ASE cannot append to BundleTrajectory ' 

522 'subtype ' + metadata['subtype']) 

523 self.subtype = metadata['subtype'] 

524 if metadata['backend'] == 'ulm': 

525 self.singleprecision = metadata['ulm.singleprecision'] 

526 self._set_backend(metadata['backend']) 

527 self.nframes = self._read_nframes() 

528 self._open_log() 

529 self.log('Opening "%s" in append mode (nframes=%i)' % (self.filename, 

530 self.nframes)) 

531 self.state = 'write' 

532 self.atoms = atoms 

533 

534 @property 

535 def path(self): 

536 return Path(self.filename) 

537 

538 @property 

539 def metadata_path(self): 

540 return self.path / 'metadata.json' 

541 

542 def _write_nframes(self, n): 

543 "Write the number of frames in the bundle." 

544 assert self.state == 'write' or self.state == 'prewrite' 

545 with paropen(self.path / 'frames', 'w') as fd: 

546 fd.write(str(n) + '\n') 

547 

548 def _read_nframes(self): 

549 "Read the number of frames." 

550 return int((self.path / 'frames').read_text()) 

551 

552 def _write_metadata(self, metadata): 

553 """Write the metadata file of the bundle. 

554 

555 Modifies the medadata dictionary! 

556 """ 

557 # Add standard fields that must always be present. 

558 assert self.state == 'write' or self.state == 'prewrite' 

559 metadata['format'] = 'BundleTrajectory' 

560 metadata['version'] = self.version 

561 metadata['subtype'] = self.subtype 

562 metadata['backend'] = self.backend_name 

563 if self.backend_name == 'ulm': 

564 metadata['ulm.singleprecision'] = self.singleprecision 

565 metadata['python_ver'] = tuple(sys.version_info) 

566 encode = jsonio.MyEncoder(indent=4).encode 

567 fido = encode(metadata) 

568 with paropen(self.metadata_path, 'w') as fd: 

569 fd.write(fido) 

570 

571 def _read_metadata(self): 

572 """Read the metadata.""" 

573 assert self.state == 'read' 

574 return jsonio.decode(self.metadata_path.read_text()) 

575 

576 @staticmethod 

577 def is_bundle(filename, allowempty=False): 

578 """Check if a filename exists and is a BundleTrajectory. 

579 

580 If allowempty=True, an empty folder is regarded as an 

581 empty BundleTrajectory.""" 

582 filename = Path(filename) 

583 if not filename.is_dir(): 

584 return False 

585 if allowempty and not os.listdir(filename): 

586 return True # An empty BundleTrajectory 

587 metaname = filename / 'metadata.json' 

588 if metaname.is_file(): 

589 mdata = jsonio.decode(metaname.read_text()) 

590 else: 

591 return False 

592 

593 try: 

594 return mdata['format'] == 'BundleTrajectory' 

595 except KeyError: 

596 return False 

597 

598 @staticmethod 

599 def is_empty_bundle(filename): 

600 """Check if a filename is an empty bundle. 

601 

602 Assumes that it is a bundle.""" 

603 if not os.listdir(filename): 

604 return True # Empty folders are empty bundles. 

605 with open(os.path.join(filename, 'frames'), 'rb') as fd: 

606 nframes = int(fd.read()) 

607 

608 # File may be removed by the master immediately after this. 

609 barrier() 

610 return nframes == 0 

611 

612 @classmethod 

613 def delete_bundle(cls, filename): 

614 "Deletes a bundle." 

615 if world.rank == 0: 

616 # Only the master deletes 

617 if not cls.is_bundle(filename, allowempty=True): 

618 raise OSError( 

619 f'Cannot remove "{filename}" as it is not a bundle ' 

620 'trajectory.' 

621 ) 

622 if os.path.islink(filename): 

623 # A symbolic link to a bundle. Only remove the link. 

624 os.remove(filename) 

625 else: 

626 # A real bundle 

627 shutil.rmtree(filename) 

628 else: 

629 # All other tasks wait for the directory to go away. 

630 while os.path.exists(filename): 

631 time.sleep(1) 

632 # The master may not proceed before all tasks have seen the 

633 # directory go away, as it might otherwise create a new bundle 

634 # with the same name, fooling the wait loop in _make_bundledir. 

635 barrier() 

636 

637 def _rename_bundle(self, oldname, newname): 

638 "Rename a bundle. Used to create the .bak" 

639 if self.master: 

640 os.rename(oldname, newname) 

641 else: 

642 while os.path.exists(oldname): 

643 time.sleep(1) 

644 # The master may not proceed before all tasks have seen the 

645 # directory go away. 

646 barrier() 

647 

648 def _make_bundledir(self, filename): 

649 """Make the main bundle directory. 

650 

651 Since all MPI tasks might write to it, all tasks must wait for 

652 the directory to appear. 

653 """ 

654 self.log('Making directory ' + filename) 

655 assert not os.path.isdir(filename) 

656 barrier() 

657 if self.master: 

658 os.mkdir(filename) 

659 else: 

660 i = 0 

661 while not os.path.isdir(filename): 

662 time.sleep(1) 

663 i += 1 

664 if i > 10: 

665 self.log('Waiting %d seconds for %s to appear!' 

666 % (i, filename)) 

667 

668 def _make_framedir(self, frame): 

669 """Make subdirectory for the frame. 

670 

671 As only the master writes to it, no synchronization between 

672 MPI tasks is necessary. 

673 """ 

674 framedir = os.path.join(self.filename, 'F' + str(frame)) 

675 if self.master: 

676 self.log('Making directory ' + framedir) 

677 os.mkdir(framedir) 

678 return framedir 

679 

680 def pre_write_attach(self, function, interval=1, *args, **kwargs): 

681 """Attach a function to be called before writing begins. 

682 

683 function: The function or callable object to be called. 

684 

685 interval: How often the function is called. Default: every time (1). 

686 

687 All other arguments are stored, and passed to the function. 

688 """ 

689 if not callable(function): 

690 raise ValueError('Callback object must be callable.') 

691 self.pre_observers.append((function, interval, args, kwargs)) 

692 

693 def post_write_attach(self, function, interval=1, *args, **kwargs): 

694 """Attach a function to be called after writing ends. 

695 

696 function: The function or callable object to be called. 

697 

698 interval: How often the function is called. Default: every time (1). 

699 

700 All other arguments are stored, and passed to the function. 

701 """ 

702 if not callable(function): 

703 raise ValueError('Callback object must be callable.') 

704 self.post_observers.append((function, interval, args, kwargs)) 

705 

706 def _call_observers(self, obs): 

707 "Call pre/post write observers." 

708 for function, interval, args, kwargs in obs: 

709 if (self.nframes + 1) % interval == 0: 

710 function(*args, **kwargs) 

711 

712 

713class UlmBundleBackend: 

714 """Backend for BundleTrajectories stored as ASE Ulm files.""" 

715 

716 def __init__(self, master, singleprecision): 

717 # Store if this backend will actually write anything 

718 self.writesmall = master 

719 self.writelarge = master 

720 self.singleprecision = singleprecision 

721 

722 # Integer data may be downconverted to the following types 

723 self.integral_dtypes = ['uint8', 'int8', 'uint16', 'int16', 

724 'uint32', 'int32', 'uint64', 'int64'] 

725 # Dict comprehensions not supported in Python 2.6 :-( 

726 self.int_dtype = {k: getattr(np, k) 

727 for k in self.integral_dtypes} 

728 self.int_minval = {k: np.iinfo(self.int_dtype[k]).min 

729 for k in self.integral_dtypes} 

730 self.int_maxval = {k: np.iinfo(self.int_dtype[k]).max 

731 for k in self.integral_dtypes} 

732 self.int_itemsize = {k: np.dtype(self.int_dtype[k]).itemsize 

733 for k in self.integral_dtypes} 

734 

735 def write_small(self, framedir, smalldata): 

736 "Write small data to be written jointly." 

737 if self.writesmall: 

738 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'w') as fd: 

739 fd.write(**smalldata) 

740 

741 def write(self, framedir, name, data): 

742 "Write data to separate file." 

743 if self.writelarge: 

744 shape = data.shape 

745 dtype = str(data.dtype) 

746 stored_as = dtype 

747 all_identical = False 

748 # Check if it a type that can be stored with less space 

749 if np.issubdtype(data.dtype, np.integer): 

750 # An integer type, we may want to convert 

751 minval = data.min() 

752 maxval = data.max() 

753 

754 # ulm cannot write np.bool_: 

755 all_identical = bool(minval == maxval) 

756 if all_identical: 

757 data = int(data.flat[0]) # Convert to standard integer 

758 else: 

759 for typ in self.integral_dtypes: 

760 if (minval >= self.int_minval[typ] and 

761 maxval <= self.int_maxval[typ] and 

762 data.itemsize > self.int_itemsize[typ]): 

763 

764 # Convert to smaller type 

765 stored_as = typ 

766 data = data.astype(self.int_dtype[typ]) 

767 elif data.dtype == np.float32 or data.dtype == np.float64: 

768 all_identical = bool(data.min() == data.max()) 

769 if all_identical: 

770 data = float(data.flat[0]) # Convert to standard float 

771 elif data.dtype == np.float64 and self.singleprecision: 

772 # Downconvert double to single precision 

773 stored_as = 'float32' 

774 data = data.astype(np.float32) 

775 fn = os.path.join(framedir, name + '.ulm') 

776 with ulmopen(fn, 'w') as fd: 

777 fd.write(shape=shape, 

778 dtype=dtype, 

779 stored_as=stored_as, 

780 all_identical=all_identical, 

781 data=data) 

782 

783 def read_small(self, framedir): 

784 "Read small data." 

785 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'r') as fd: 

786 return fd.asdict() 

787 

788 def read(self, framedir, name): 

789 "Read data from separate file." 

790 fn = os.path.join(framedir, name + '.ulm') 

791 with ulmopen(fn, 'r') as fd: 

792 if fd.all_identical: 

793 # Only a single data value 

794 data = np.zeros(fd.shape, 

795 dtype=getattr(np, fd.dtype)) + fd.data 

796 elif fd.dtype == fd.stored_as: 

797 # Easy, the array can be returned as-is. 

798 data = fd.data 

799 else: 

800 # Cast the data back 

801 data = fd.data.astype(getattr(np, fd.dtype)) 

802 return data 

803 

804 def read_info(self, framedir, name, split=None): 

805 """Read information about file contents without reading the data. 

806 

807 Information is a dictionary containing as aminimum the shape and 

808 type. 

809 """ 

810 fn = os.path.join(framedir, name + '.ulm') 

811 if split is None or os.path.exists(fn): 

812 with ulmopen(fn, 'r') as fd: 

813 info = {} 

814 info['shape'] = fd.shape 

815 info['type'] = fd.dtype 

816 info['stored_as'] = fd.stored_as 

817 info['identical'] = fd.all_identical 

818 return info 

819 else: 

820 info = {} 

821 for i in range(split): 

822 fn = os.path.join(framedir, name + '_' + str(i) + '.ulm') 

823 with ulmopen(fn, 'r') as fd: 

824 if i == 0: 

825 info['shape'] = list(fd.shape) 

826 info['type'] = fd.dtype 

827 info['stored_as'] = fd.stored_as 

828 info['identical'] = fd.all_identical 

829 else: 

830 info['shape'][0] += fd.shape[0] 

831 assert info['type'] == fd.dtype 

832 info['identical'] = (info['identical'] 

833 and fd.all_identical) 

834 info['shape'] = tuple(info['shape']) 

835 return info 

836 

837 def set_fragments(self, nfrag): 

838 self.nfrag = nfrag 

839 

840 def read_split(self, framedir, name): 

841 """Read data from multiple files. 

842 

843 Falls back to reading from single file if that is how data is stored. 

844 

845 Returns the data and an object indicating if the data was really 

846 read from split files. The latter object is False if not 

847 read from split files, but is an array of the segment length if 

848 split files were used. 

849 """ 

850 data = [] 

851 if os.path.exists(os.path.join(framedir, name + '.ulm')): 

852 # Not stored in split form! 

853 return (self.read(framedir, name), False) 

854 for i in range(self.nfrag): 

855 suf = '_%d' % (i,) 

856 data.append(self.read(framedir, name + suf)) 

857 seglengths = [len(d) for d in data] 

858 return (np.concatenate(data), seglengths) 

859 

860 def close(self, log=None): 

861 """Close anything that needs to be closed by the backend. 

862 

863 The default backend does nothing here. 

864 """ 

865 

866 

867def read_bundletrajectory(filename, index=-1): 

868 """Reads one or more atoms objects from a BundleTrajectory. 

869 

870 Arguments: 

871 

872 filename: str 

873 The name of the bundle (really a directory!) 

874 index: int 

875 An integer specifying which frame to read, or an index object 

876 for reading multiple frames. Default: -1 (reads the last 

877 frame). 

878 """ 

879 traj = BundleTrajectory(filename, mode='r') 

880 for i in range(*index.indices(len(traj))): 

881 yield traj[i] 

882 

883 

884def write_bundletrajectory(filename, images, append=False): 

885 """Write image(s) to a BundleTrajectory. 

886 

887 Write also energy, forces, and stress if they are already 

888 calculated. 

889 """ 

890 

891 if append: 

892 mode = 'a' 

893 else: 

894 mode = 'w' 

895 traj = BundleTrajectory(filename, mode=mode) 

896 

897 if hasattr(images, 'get_positions'): 

898 images = [images] 

899 

900 for atoms in images: 

901 # Avoid potentially expensive calculations: 

902 calc = atoms.calc 

903 if hasattr(calc, 'calculation_required'): 

904 for quantity in ('energy', 'forces', 'stress', 'magmoms'): 

905 traj.select_data(quantity, 

906 not calc.calculation_required(atoms, 

907 [quantity])) 

908 traj.write(atoms) 

909 traj.close() 

910 

911 

912def print_bundletrajectory_info(filename): 

913 """Prints information about a BundleTrajectory. 

914 

915 Mainly intended to be called from a command line tool. 

916 """ 

917 if not BundleTrajectory.is_bundle(filename): 

918 raise ValueError('Not a BundleTrajectory!') 

919 if BundleTrajectory.is_empty_bundle(filename): 

920 print(filename, 'is an empty BundleTrajectory.') 

921 return 

922 # Read the metadata 

923 fn = os.path.join(filename, 'metadata.json') 

924 with open(fn) as fd: 

925 metadata = jsonio.decode(fd.read()) 

926 

927 print(f'Metadata information of BundleTrajectory "{filename}":') 

928 for k, v in metadata.items(): 

929 if k != 'datatypes': 

930 print(f" {k}: {v}") 

931 with open(os.path.join(filename, 'frames'), 'rb') as fd: 

932 nframes = int(fd.read()) 

933 print('Number of frames: %i' % (nframes,)) 

934 print('Data types:') 

935 for k, v in metadata['datatypes'].items(): 

936 if v == 'once': 

937 print(f' {k}: First frame only.') 

938 elif v: 

939 print(f' {k}: All frames.') 

940 # Look at first frame 

941 if metadata['backend'] == 'ulm': 

942 backend = UlmBundleBackend(True, False) 

943 else: 

944 raise NotImplementedError( 

945 f'Backend {metadata["backend"]} not supported.') 

946 frame = os.path.join(filename, 'F0') 

947 small = backend.read_small(frame) 

948 print('Contents of first frame:') 

949 for k, v in small.items(): 

950 if k == 'constraints': 

951 if v: 

952 print(f' {len(v)} constraints are present') 

953 else: 

954 print(' Constraints are absent.') 

955 elif k == 'pbc': 

956 print(f' Periodic boundary conditions: {str(v)}') 

957 elif k == 'natoms': 

958 print(' Number of atoms: %i' % (v,)) 

959 elif hasattr(v, 'shape'): 

960 print(f' {k}: shape = {str(v.shape)}, type = {str(v.dtype)}') 

961 if k == 'cell': 

962 print(' [[%12.6f, %12.6f, %12.6f],' % tuple(v[0])) 

963 print(' [%12.6f, %12.6f, %12.6f],' % tuple(v[1])) 

964 print(' [%12.6f, %12.6f, %12.6f]]' % tuple(v[2])) 

965 else: 

966 print(f' {k}: {str(v)}') 

967 # Read info from separate files. 

968 if metadata['subtype'] == 'split': 

969 nsplit = small['fragments'] 

970 else: 

971 nsplit = False 

972 for k, v in metadata['datatypes'].items(): 

973 if v and k not in small: 

974 info = backend.read_info(frame, k, nsplit) 

975 infoline = f' {k}: ' 

976 for k, v in info.items(): 

977 infoline += f'{k} = {str(v)}, ' 

978 infoline = infoline[:-2] + '.' # Fix punctuation. 

979 print(infoline) 

980 

981 

982class PickleBundleBackend: 

983 # Leave placeholder class so importing asap3 won't crash. 

984 pass 

985 

986 

987def main(): 

988 import optparse 

989 parser = optparse.OptionParser( 

990 usage='python -m ase.io.bundletrajectory ' 

991 'a.bundle [b.bundle ...]', 

992 description='Print information about ' 

993 'the contents of one or more bundletrajectories.') 

994 opts, args = parser.parse_args() 

995 for name in args: 

996 print_bundletrajectory_info(name) 

997 

998 

999if __name__ == '__main__': 

1000 main()