Coverage for /builds/kinetik161/ase/ase/io/bundletrajectory.py: 72.40%
576 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
1"""bundletrajectory - a module for I/O from large MD simulations.
3The BundleTrajectory class writes trajectory into a directory with the
4following structure, except we now use JSON instead of pickle,
5so this text needs updating::
7 filename.bundle (dir)
8 metadata.json Data about the file format, and about which
9 data is present.
10 frames The number of frames (ascii file)
11 F0 (dir) Frame number 0
12 smalldata.ulm Small data structures in a dictionary
13 (pbc, cell, ...)
14 numbers.ulm Atomic numbers
15 positions.ulm Positions
16 momenta.ulm Momenta
17 ...
18 F1 (dir)
20There is a folder for each frame, and the data is in the ASE Ulm format.
21"""
23import os
24import shutil
25import sys
26import time
27from pathlib import Path
29import numpy as np
31from ase import Atoms
32from ase.calculators.singlepoint import (PropertyNotImplementedError,
33 SinglePointCalculator)
34# The system json module causes memory leaks! Use ase's own.
35# import json
36from ase.io import jsonio
37from ase.io.ulm import open as ulmopen
38from ase.parallel import barrier, paropen, world
41class BundleTrajectory:
42 """Reads and writes atoms into a .bundle directory.
44 The BundleTrajectory is an alternative way of storing
45 trajectories, intended for large-scale molecular dynamics
46 simulations, where a single flat file becomes unwieldy. Instead,
47 the data is stored in directory, a 'bundle' (the name bundle is
48 inspired from bundles in Mac OS, which are really just directories
49 the user is supposed to think of as a single file-like unit).
51 Parameters:
53 filename:
54 The name of the directory. Preferably ending in .bundle.
56 mode (optional):
57 The file opening mode. 'r' means open for reading, 'w' for
58 writing and 'a' for appending. Default: 'r'. If opening in
59 write mode, and the filename already exists, the old file is
60 renamed to .bak (any old .bak file is deleted), except if the
61 existing file is empty.
63 atoms (optional):
64 The atoms that will be written. Can only be specified in
65 write or append mode. If not specified, the atoms must be
66 given as an argument to the .write() method instead.
68 backup=True:
69 Use backup=False to disable renaming of an existing file.
71 backend='ulm':
72 Request a backend. Currently only 'ulm' is supported.
73 Only honored when writing.
75 singleprecision=False:
76 Store floating point data in single precision (ulm backend only).
77 """
78 slavelog = True # Log from all nodes
80 def __init__(self, filename, mode='r', atoms=None, backup=True,
81 backend='ulm', singleprecision=False):
82 self.state = 'constructing'
83 self.filename = filename
84 self.pre_observers = [] # callback functions before write is performed
85 self.post_observers = [] # callback functions after write is performed
86 self.master = world.rank == 0
87 self.extra_data = []
88 self.singleprecision = singleprecision
89 self._set_defaults()
90 if mode == 'r':
91 if atoms is not None:
92 raise ValueError('You cannot specify atoms in read mode.')
93 self._open_read()
94 elif mode == 'w':
95 self._open_write(atoms, backup, backend)
96 elif mode == 'a':
97 self._open_append(atoms)
98 else:
99 raise ValueError('Unknown mode: ' + str(mode))
101 def _set_defaults(self):
102 "Set default values for internal parameters."
103 self.version = 1
104 self.subtype = 'normal'
105 self.datatypes = {'positions': True,
106 'numbers': 'once',
107 'tags': 'once',
108 'masses': 'once',
109 'momenta': True,
110 'forces': True,
111 'energy': True,
112 'energies': False,
113 'stress': False,
114 'magmoms': True}
116 def _set_backend(self, backend):
117 """Set the backed doing the actual I/O."""
118 if backend is not None:
119 self.backend_name = backend
121 if self.backend_name == 'ulm':
122 self.backend = UlmBundleBackend(self.master, self.singleprecision)
123 else:
124 raise NotImplementedError(
125 'This version of ASE cannot use BundleTrajectory '
126 'with backend "%s"' % self.backend_name)
128 def write(self, atoms=None):
129 """Write the atoms to the file.
131 If the atoms argument is not given, the atoms object specified
132 when creating the trajectory object is used.
133 """
134 # Check that we are in write mode
135 if self.state == 'prewrite':
136 self.state = 'write'
137 assert self.nframes == 0
138 elif self.state != 'write':
139 raise RuntimeError('Cannot write in ' + self.state + ' mode.')
141 if atoms is None:
142 atoms = self.atoms
144 for image in atoms.iterimages():
145 self._write_atoms(image)
147 def _write_atoms(self, atoms):
148 # OK, it is a real atoms object. Write it.
149 self._call_observers(self.pre_observers)
150 self.log('Beginning to write frame ' + str(self.nframes))
151 framedir = self._make_framedir(self.nframes)
153 # Check which data should be written the first time:
154 # Modify datatypes so any element of type 'once' becomes true
155 # for the first frame but false for subsequent frames.
156 datatypes = {}
157 for k, v in self.datatypes.items():
158 if v == 'once':
159 v = (self.nframes == 0)
160 datatypes[k] = v
162 # Write 'small' data structures. They are written jointly.
163 smalldata = {'pbc': atoms.get_pbc(),
164 'cell': atoms.get_cell(),
165 'natoms': atoms.get_global_number_of_atoms(),
166 'constraints': atoms.constraints}
167 if datatypes.get('energy'):
168 try:
169 smalldata['energy'] = atoms.get_potential_energy()
170 except (RuntimeError, PropertyNotImplementedError):
171 self.datatypes['energy'] = False
172 if datatypes.get('stress'):
173 try:
174 smalldata['stress'] = atoms.get_stress()
175 except PropertyNotImplementedError:
176 self.datatypes['stress'] = False
177 self.backend.write_small(framedir, smalldata)
179 # Write the large arrays.
180 if datatypes.get('positions'):
181 self.backend.write(framedir, 'positions', atoms.get_positions())
182 if datatypes.get('numbers'):
183 self.backend.write(framedir, 'numbers', atoms.get_atomic_numbers())
184 if datatypes.get('tags'):
185 if atoms.has('tags'):
186 self.backend.write(framedir, 'tags', atoms.get_tags())
187 else:
188 self.datatypes['tags'] = False
189 if datatypes.get('masses'):
190 if atoms.has('masses'):
191 self.backend.write(framedir, 'masses', atoms.get_masses())
192 else:
193 self.datatypes['masses'] = False
194 if datatypes.get('momenta'):
195 if atoms.has('momenta'):
196 self.backend.write(framedir, 'momenta', atoms.get_momenta())
197 else:
198 self.datatypes['momenta'] = False
199 if datatypes.get('magmoms'):
200 if atoms.has('initial_magmoms'):
201 self.backend.write(framedir, 'magmoms',
202 atoms.get_initial_magnetic_moments())
203 else:
204 self.datatypes['magmoms'] = False
205 if datatypes.get('forces'):
206 try:
207 x = atoms.get_forces()
208 except (RuntimeError, PropertyNotImplementedError):
209 self.datatypes['forces'] = False
210 else:
211 self.backend.write(framedir, 'forces', x)
212 del x
213 if datatypes.get('energies'):
214 try:
215 x = atoms.get_potential_energies()
216 except (RuntimeError, PropertyNotImplementedError):
217 self.datatypes['energies'] = False
218 else:
219 self.backend.write(framedir, 'energies', x)
220 del x
221 # Write any extra data
222 for (label, source, once) in self.extra_data:
223 if self.nframes == 0 or not once:
224 if source is not None:
225 x = source()
226 else:
227 x = atoms.get_array(label)
228 self.backend.write(framedir, label, x)
229 del x
230 if once:
231 self.datatypes[label] = 'once'
232 else:
233 self.datatypes[label] = True
234 # Finally, write metadata if it is the first frame
235 if self.nframes == 0:
236 metadata = {'datatypes': self.datatypes}
237 self._write_metadata(metadata)
238 self._write_nframes(self.nframes + 1)
239 self._call_observers(self.post_observers)
240 self.log('Done writing frame ' + str(self.nframes))
241 self.nframes += 1
243 def select_data(self, data, value):
244 """Selects if a given data type should be written.
246 Data can be written in every frame (specify True), in the
247 first frame only (specify 'only') or not at all (specify
248 False). Not all data types support the 'only' keyword, if not
249 supported it is interpreted as True.
251 The following data types are supported, the letter in parenthesis
252 indicates the default:
254 positions (T), numbers (O), tags (O), masses (O), momenta (T),
255 forces (T), energy (T), energies (F), stress (F), magmoms (T)
257 If a given property is not present during the first write, it
258 will be not be saved at all.
259 """
260 if value not in (True, False, 'once'):
261 raise ValueError('Unknown write mode')
262 if data not in self.datatypes:
263 raise ValueError('Unsupported data type: ' + data)
264 self.datatypes[data] = value
266 def set_extra_data(self, name, source=None, once=False):
267 """Adds extra data to be written.
269 Parameters:
270 name: The name of the data.
272 source (optional): If specified, a callable object returning
273 the data to be written. If not specified it is instead
274 assumed that the atoms contains the data as an array of the
275 same name.
277 once (optional): If specified and True, the data will only be
278 written to the first frame.
279 """
280 self.extra_data.append((name, source, once))
282 def close(self):
283 "Closes the trajectory."
284 self.state = 'closed'
285 lf = getattr(self, 'logfile', None)
286 self.backend.close(log=lf)
287 if lf is not None:
288 lf.close()
289 del self.logfile
291 def log(self, text):
292 """Write to the log file in the bundle.
294 Logging is only possible in write/append mode.
296 This function is mainly for internal use, but can also be called by
297 the user.
298 """
299 if not (self.master or self.slavelog):
300 return
301 text = time.asctime() + ': ' + text
302 if hasattr(self, 'logfile'):
303 # Logging enabled
304 if self.logfile is None:
305 # Logfile not yet open
306 try:
307 self.logdata.append(text)
308 except AttributeError:
309 self.logdata = [text]
310 else:
311 self.logfile.write(text + '\n')
312 self.logfile.flush()
313 else:
314 raise RuntimeError('Cannot write to log file in mode ' +
315 self.state)
317 # __getitem__ is the main reading method.
318 def __getitem__(self, n):
319 return self._read(n)
321 def _read(self, n):
322 """Read an atoms object from the BundleTrajectory."""
323 if self.state != 'read':
324 raise OSError(f'Cannot read in {self.state} mode')
325 if n < 0:
326 n += self.nframes
327 if n < 0 or n >= self.nframes:
328 raise IndexError('Trajectory index %d out of range [0, %d['
329 % (n, self.nframes))
331 framedir = os.path.join(self.filename, 'F' + str(n))
332 framezero = os.path.join(self.filename, 'F0')
333 smalldata = self.backend.read_small(framedir)
334 data = {}
335 data['pbc'] = smalldata['pbc']
336 data['cell'] = smalldata['cell']
337 data['constraint'] = smalldata['constraints']
338 if self.subtype == 'split':
339 self.backend.set_fragments(smalldata['fragments'])
340 self.atom_id, dummy = self.backend.read_split(framedir, 'ID')
341 else:
342 self.atom_id = None
343 atoms = Atoms(**data)
344 natoms = smalldata['natoms']
345 for name in ('positions', 'numbers', 'tags', 'masses',
346 'momenta'):
347 if self.datatypes.get(name):
348 atoms.arrays[name] = self._read_data(framezero, framedir,
349 name, self.atom_id)
350 assert len(atoms.arrays[name]) == natoms
352 # Create the atoms object
353 if self.datatypes.get('energy'):
354 if self.datatypes.get('forces'):
355 forces = self.backend.read(framedir, 'forces')
356 else:
357 forces = None
358 if self.datatypes.get('magmoms'):
359 magmoms = self.backend.read(framedir, 'magmoms')
360 else:
361 magmoms = None
362 calc = SinglePointCalculator(atoms,
363 energy=smalldata.get('energy'),
364 forces=forces,
365 stress=smalldata.get('stress'),
366 magmoms=magmoms)
367 atoms.calc = calc
368 return atoms
370 def read_extra_data(self, name, n=0):
371 """Read extra data stored alongside the atoms.
373 Currently only used to read data stored by an NPT dynamics object.
374 The data is not associated with individual atoms.
375 """
376 if self.state != 'read':
377 raise OSError(f'Cannot read extra data in {self.state} mode')
378 # Handle negative n.
379 if n < 0:
380 n += self.nframes
381 if n < 0 or n >= self.nframes:
382 raise IndexError('Trajectory index %d out of range [0, %d['
383 % (n, self.nframes))
384 framedir = os.path.join(self.filename, 'F' + str(n))
385 framezero = os.path.join(self.filename, 'F0')
386 return self._read_data(framezero, framedir, name, self.atom_id)
388 def _read_data(self, f0, f, name, atom_id):
389 "Read single data item."
391 if self.subtype == 'normal':
392 if self.datatypes[name] == 'once':
393 d = self.backend.read(f0, name)
394 else:
395 d = self.backend.read(f, name)
396 elif self.subtype == 'split':
397 if self.datatypes[name] == 'once':
398 d, issplit = self.backend.read_split(f0, name)
399 atom_id, dummy = self.backend.read_split(f0, 'ID')
400 else:
401 d, issplit = self.backend.read_split(f, name)
402 if issplit:
403 assert atom_id is not None
404 assert len(d) == len(atom_id)
405 d[atom_id] = np.array(d)
406 return d
408 def __len__(self):
409 return self.nframes
411 def _open_log(self):
412 if not (self.master or self.slavelog):
413 return
414 if self.master:
415 lfn = os.path.join(self.filename, 'log.txt')
416 else:
417 lfn = os.path.join(self.filename, ('log-node%d.txt' %
418 (world.rank,)))
419 self.logfile = open(lfn, 'a', 1) # Append to log if it exists.
420 if hasattr(self, 'logdata'):
421 for text in self.logdata:
422 self.logfile.write(text + '\n')
423 self.logfile.flush()
424 del self.logdata
426 def _open_write(self, atoms, backup, backend):
427 "Open a bundle trajectory for writing."
428 self._set_backend(backend)
429 self.logfile = None # enable delayed logging
430 self.atoms = atoms
431 if os.path.exists(self.filename):
432 # The output directory already exists.
433 barrier() # all must have time to see it exists
434 if not self.is_bundle(self.filename, allowempty=True):
435 raise OSError(
436 'Filename "' + self.filename +
437 '" already exists, but is not a BundleTrajectory.' +
438 ' Cowardly refusing to remove it.')
439 if self.is_empty_bundle(self.filename):
440 barrier()
441 self.log(f'Deleting old "{self.filename}" as it is empty')
442 self.delete_bundle(self.filename)
443 elif not backup:
444 barrier()
445 self.log(
446 f'Deleting old "{self.filename}" as backup is turned off.')
447 self.delete_bundle(self.filename)
448 else:
449 barrier()
450 # Make a backup file
451 bakname = self.filename + '.bak'
452 if os.path.exists(bakname):
453 barrier() # All must see it exists
454 self.log(f'Deleting old backup file "{bakname}"')
455 self.delete_bundle(bakname)
456 self.log(f'Renaming "{self.filename}" to "{bakname}"')
457 self._rename_bundle(self.filename, bakname)
458 # Ready to create a new bundle.
459 barrier()
460 self.log(f'Creating new "{self.filename}"')
461 self._make_bundledir(self.filename)
462 self.state = 'prewrite'
463 self._write_metadata({})
464 self._write_nframes(0) # Mark new bundle as empty
465 self._open_log()
466 self.nframes = 0
468 def _open_read(self):
469 "Open a bundle trajectory for reading."
470 if not os.path.exists(self.filename):
471 raise OSError('File not found: ' + self.filename)
472 if not self.is_bundle(self.filename):
473 raise OSError('Not a BundleTrajectory: ' + self.filename)
474 self.state = 'read'
475 # Read the metadata
476 metadata = self._read_metadata()
477 self.metadata = metadata
478 if metadata['version'] > self.version:
479 raise NotImplementedError(
480 'This version of ASE cannot read a BundleTrajectory version ' +
481 str(metadata['version']))
482 if metadata['subtype'] not in ('normal', 'split'):
483 raise NotImplementedError(
484 'This version of ASE cannot read BundleTrajectory subtype ' +
485 metadata['subtype'])
486 self.subtype = metadata['subtype']
487 if metadata['backend'] == 'ulm':
488 self.singleprecision = metadata['ulm.singleprecision']
489 self._set_backend(metadata['backend'])
490 self.nframes = self._read_nframes()
491 if self.nframes == 0:
492 raise OSError('Empty BundleTrajectory')
493 self.datatypes = metadata['datatypes']
494 try:
495 self.pythonmajor = metadata['python_ver'][0]
496 except KeyError:
497 self.pythonmajor = 2 # Assume written with Python 2.
498 # We need to know if we are running Python 3.X and try to read
499 # a bundle written with Python 2.X
500 self.backend.readpy2 = (self.pythonmajor == 2)
501 self.state = 'read'
503 def _open_append(self, atoms):
504 if not os.path.exists(self.filename):
505 # OK, no old bundle. Open as for write instead.
506 barrier()
507 self._open_write(atoms, False)
508 return
509 if not self.is_bundle(self.filename):
510 raise OSError('Not a BundleTrajectory: ' + self.filename)
511 self.state = 'read'
512 metadata = self._read_metadata()
513 self.metadata = metadata
514 if metadata['version'] != self.version:
515 raise NotImplementedError(
516 'Cannot append to a BundleTrajectory version '
517 '%s (supported version is %s)' % (str(metadata['version']),
518 str(self.version)))
519 if metadata['subtype'] not in ('normal', 'split'):
520 raise NotImplementedError(
521 'This version of ASE cannot append to BundleTrajectory '
522 'subtype ' + metadata['subtype'])
523 self.subtype = metadata['subtype']
524 if metadata['backend'] == 'ulm':
525 self.singleprecision = metadata['ulm.singleprecision']
526 self._set_backend(metadata['backend'])
527 self.nframes = self._read_nframes()
528 self._open_log()
529 self.log('Opening "%s" in append mode (nframes=%i)' % (self.filename,
530 self.nframes))
531 self.state = 'write'
532 self.atoms = atoms
534 @property
535 def path(self):
536 return Path(self.filename)
538 @property
539 def metadata_path(self):
540 return self.path / 'metadata.json'
542 def _write_nframes(self, n):
543 "Write the number of frames in the bundle."
544 assert self.state == 'write' or self.state == 'prewrite'
545 with paropen(self.path / 'frames', 'w') as fd:
546 fd.write(str(n) + '\n')
548 def _read_nframes(self):
549 "Read the number of frames."
550 return int((self.path / 'frames').read_text())
552 def _write_metadata(self, metadata):
553 """Write the metadata file of the bundle.
555 Modifies the medadata dictionary!
556 """
557 # Add standard fields that must always be present.
558 assert self.state == 'write' or self.state == 'prewrite'
559 metadata['format'] = 'BundleTrajectory'
560 metadata['version'] = self.version
561 metadata['subtype'] = self.subtype
562 metadata['backend'] = self.backend_name
563 if self.backend_name == 'ulm':
564 metadata['ulm.singleprecision'] = self.singleprecision
565 metadata['python_ver'] = tuple(sys.version_info)
566 encode = jsonio.MyEncoder(indent=4).encode
567 fido = encode(metadata)
568 with paropen(self.metadata_path, 'w') as fd:
569 fd.write(fido)
571 def _read_metadata(self):
572 """Read the metadata."""
573 assert self.state == 'read'
574 return jsonio.decode(self.metadata_path.read_text())
576 @staticmethod
577 def is_bundle(filename, allowempty=False):
578 """Check if a filename exists and is a BundleTrajectory.
580 If allowempty=True, an empty folder is regarded as an
581 empty BundleTrajectory."""
582 filename = Path(filename)
583 if not filename.is_dir():
584 return False
585 if allowempty and not os.listdir(filename):
586 return True # An empty BundleTrajectory
587 metaname = filename / 'metadata.json'
588 if metaname.is_file():
589 mdata = jsonio.decode(metaname.read_text())
590 else:
591 return False
593 try:
594 return mdata['format'] == 'BundleTrajectory'
595 except KeyError:
596 return False
598 @staticmethod
599 def is_empty_bundle(filename):
600 """Check if a filename is an empty bundle.
602 Assumes that it is a bundle."""
603 if not os.listdir(filename):
604 return True # Empty folders are empty bundles.
605 with open(os.path.join(filename, 'frames'), 'rb') as fd:
606 nframes = int(fd.read())
608 # File may be removed by the master immediately after this.
609 barrier()
610 return nframes == 0
612 @classmethod
613 def delete_bundle(cls, filename):
614 "Deletes a bundle."
615 if world.rank == 0:
616 # Only the master deletes
617 if not cls.is_bundle(filename, allowempty=True):
618 raise OSError(
619 f'Cannot remove "{filename}" as it is not a bundle '
620 'trajectory.'
621 )
622 if os.path.islink(filename):
623 # A symbolic link to a bundle. Only remove the link.
624 os.remove(filename)
625 else:
626 # A real bundle
627 shutil.rmtree(filename)
628 else:
629 # All other tasks wait for the directory to go away.
630 while os.path.exists(filename):
631 time.sleep(1)
632 # The master may not proceed before all tasks have seen the
633 # directory go away, as it might otherwise create a new bundle
634 # with the same name, fooling the wait loop in _make_bundledir.
635 barrier()
637 def _rename_bundle(self, oldname, newname):
638 "Rename a bundle. Used to create the .bak"
639 if self.master:
640 os.rename(oldname, newname)
641 else:
642 while os.path.exists(oldname):
643 time.sleep(1)
644 # The master may not proceed before all tasks have seen the
645 # directory go away.
646 barrier()
648 def _make_bundledir(self, filename):
649 """Make the main bundle directory.
651 Since all MPI tasks might write to it, all tasks must wait for
652 the directory to appear.
653 """
654 self.log('Making directory ' + filename)
655 assert not os.path.isdir(filename)
656 barrier()
657 if self.master:
658 os.mkdir(filename)
659 else:
660 i = 0
661 while not os.path.isdir(filename):
662 time.sleep(1)
663 i += 1
664 if i > 10:
665 self.log('Waiting %d seconds for %s to appear!'
666 % (i, filename))
668 def _make_framedir(self, frame):
669 """Make subdirectory for the frame.
671 As only the master writes to it, no synchronization between
672 MPI tasks is necessary.
673 """
674 framedir = os.path.join(self.filename, 'F' + str(frame))
675 if self.master:
676 self.log('Making directory ' + framedir)
677 os.mkdir(framedir)
678 return framedir
680 def pre_write_attach(self, function, interval=1, *args, **kwargs):
681 """Attach a function to be called before writing begins.
683 function: The function or callable object to be called.
685 interval: How often the function is called. Default: every time (1).
687 All other arguments are stored, and passed to the function.
688 """
689 if not callable(function):
690 raise ValueError('Callback object must be callable.')
691 self.pre_observers.append((function, interval, args, kwargs))
693 def post_write_attach(self, function, interval=1, *args, **kwargs):
694 """Attach a function to be called after writing ends.
696 function: The function or callable object to be called.
698 interval: How often the function is called. Default: every time (1).
700 All other arguments are stored, and passed to the function.
701 """
702 if not callable(function):
703 raise ValueError('Callback object must be callable.')
704 self.post_observers.append((function, interval, args, kwargs))
706 def _call_observers(self, obs):
707 "Call pre/post write observers."
708 for function, interval, args, kwargs in obs:
709 if (self.nframes + 1) % interval == 0:
710 function(*args, **kwargs)
713class UlmBundleBackend:
714 """Backend for BundleTrajectories stored as ASE Ulm files."""
716 def __init__(self, master, singleprecision):
717 # Store if this backend will actually write anything
718 self.writesmall = master
719 self.writelarge = master
720 self.singleprecision = singleprecision
722 # Integer data may be downconverted to the following types
723 self.integral_dtypes = ['uint8', 'int8', 'uint16', 'int16',
724 'uint32', 'int32', 'uint64', 'int64']
725 # Dict comprehensions not supported in Python 2.6 :-(
726 self.int_dtype = {k: getattr(np, k)
727 for k in self.integral_dtypes}
728 self.int_minval = {k: np.iinfo(self.int_dtype[k]).min
729 for k in self.integral_dtypes}
730 self.int_maxval = {k: np.iinfo(self.int_dtype[k]).max
731 for k in self.integral_dtypes}
732 self.int_itemsize = {k: np.dtype(self.int_dtype[k]).itemsize
733 for k in self.integral_dtypes}
735 def write_small(self, framedir, smalldata):
736 "Write small data to be written jointly."
737 if self.writesmall:
738 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'w') as fd:
739 fd.write(**smalldata)
741 def write(self, framedir, name, data):
742 "Write data to separate file."
743 if self.writelarge:
744 shape = data.shape
745 dtype = str(data.dtype)
746 stored_as = dtype
747 all_identical = False
748 # Check if it a type that can be stored with less space
749 if np.issubdtype(data.dtype, np.integer):
750 # An integer type, we may want to convert
751 minval = data.min()
752 maxval = data.max()
754 # ulm cannot write np.bool_:
755 all_identical = bool(minval == maxval)
756 if all_identical:
757 data = int(data.flat[0]) # Convert to standard integer
758 else:
759 for typ in self.integral_dtypes:
760 if (minval >= self.int_minval[typ] and
761 maxval <= self.int_maxval[typ] and
762 data.itemsize > self.int_itemsize[typ]):
764 # Convert to smaller type
765 stored_as = typ
766 data = data.astype(self.int_dtype[typ])
767 elif data.dtype == np.float32 or data.dtype == np.float64:
768 all_identical = bool(data.min() == data.max())
769 if all_identical:
770 data = float(data.flat[0]) # Convert to standard float
771 elif data.dtype == np.float64 and self.singleprecision:
772 # Downconvert double to single precision
773 stored_as = 'float32'
774 data = data.astype(np.float32)
775 fn = os.path.join(framedir, name + '.ulm')
776 with ulmopen(fn, 'w') as fd:
777 fd.write(shape=shape,
778 dtype=dtype,
779 stored_as=stored_as,
780 all_identical=all_identical,
781 data=data)
783 def read_small(self, framedir):
784 "Read small data."
785 with ulmopen(os.path.join(framedir, 'smalldata.ulm'), 'r') as fd:
786 return fd.asdict()
788 def read(self, framedir, name):
789 "Read data from separate file."
790 fn = os.path.join(framedir, name + '.ulm')
791 with ulmopen(fn, 'r') as fd:
792 if fd.all_identical:
793 # Only a single data value
794 data = np.zeros(fd.shape,
795 dtype=getattr(np, fd.dtype)) + fd.data
796 elif fd.dtype == fd.stored_as:
797 # Easy, the array can be returned as-is.
798 data = fd.data
799 else:
800 # Cast the data back
801 data = fd.data.astype(getattr(np, fd.dtype))
802 return data
804 def read_info(self, framedir, name, split=None):
805 """Read information about file contents without reading the data.
807 Information is a dictionary containing as aminimum the shape and
808 type.
809 """
810 fn = os.path.join(framedir, name + '.ulm')
811 if split is None or os.path.exists(fn):
812 with ulmopen(fn, 'r') as fd:
813 info = {}
814 info['shape'] = fd.shape
815 info['type'] = fd.dtype
816 info['stored_as'] = fd.stored_as
817 info['identical'] = fd.all_identical
818 return info
819 else:
820 info = {}
821 for i in range(split):
822 fn = os.path.join(framedir, name + '_' + str(i) + '.ulm')
823 with ulmopen(fn, 'r') as fd:
824 if i == 0:
825 info['shape'] = list(fd.shape)
826 info['type'] = fd.dtype
827 info['stored_as'] = fd.stored_as
828 info['identical'] = fd.all_identical
829 else:
830 info['shape'][0] += fd.shape[0]
831 assert info['type'] == fd.dtype
832 info['identical'] = (info['identical']
833 and fd.all_identical)
834 info['shape'] = tuple(info['shape'])
835 return info
837 def set_fragments(self, nfrag):
838 self.nfrag = nfrag
840 def read_split(self, framedir, name):
841 """Read data from multiple files.
843 Falls back to reading from single file if that is how data is stored.
845 Returns the data and an object indicating if the data was really
846 read from split files. The latter object is False if not
847 read from split files, but is an array of the segment length if
848 split files were used.
849 """
850 data = []
851 if os.path.exists(os.path.join(framedir, name + '.ulm')):
852 # Not stored in split form!
853 return (self.read(framedir, name), False)
854 for i in range(self.nfrag):
855 suf = '_%d' % (i,)
856 data.append(self.read(framedir, name + suf))
857 seglengths = [len(d) for d in data]
858 return (np.concatenate(data), seglengths)
860 def close(self, log=None):
861 """Close anything that needs to be closed by the backend.
863 The default backend does nothing here.
864 """
867def read_bundletrajectory(filename, index=-1):
868 """Reads one or more atoms objects from a BundleTrajectory.
870 Arguments:
872 filename: str
873 The name of the bundle (really a directory!)
874 index: int
875 An integer specifying which frame to read, or an index object
876 for reading multiple frames. Default: -1 (reads the last
877 frame).
878 """
879 traj = BundleTrajectory(filename, mode='r')
880 for i in range(*index.indices(len(traj))):
881 yield traj[i]
884def write_bundletrajectory(filename, images, append=False):
885 """Write image(s) to a BundleTrajectory.
887 Write also energy, forces, and stress if they are already
888 calculated.
889 """
891 if append:
892 mode = 'a'
893 else:
894 mode = 'w'
895 traj = BundleTrajectory(filename, mode=mode)
897 if hasattr(images, 'get_positions'):
898 images = [images]
900 for atoms in images:
901 # Avoid potentially expensive calculations:
902 calc = atoms.calc
903 if hasattr(calc, 'calculation_required'):
904 for quantity in ('energy', 'forces', 'stress', 'magmoms'):
905 traj.select_data(quantity,
906 not calc.calculation_required(atoms,
907 [quantity]))
908 traj.write(atoms)
909 traj.close()
912def print_bundletrajectory_info(filename):
913 """Prints information about a BundleTrajectory.
915 Mainly intended to be called from a command line tool.
916 """
917 if not BundleTrajectory.is_bundle(filename):
918 raise ValueError('Not a BundleTrajectory!')
919 if BundleTrajectory.is_empty_bundle(filename):
920 print(filename, 'is an empty BundleTrajectory.')
921 return
922 # Read the metadata
923 fn = os.path.join(filename, 'metadata.json')
924 with open(fn) as fd:
925 metadata = jsonio.decode(fd.read())
927 print(f'Metadata information of BundleTrajectory "{filename}":')
928 for k, v in metadata.items():
929 if k != 'datatypes':
930 print(f" {k}: {v}")
931 with open(os.path.join(filename, 'frames'), 'rb') as fd:
932 nframes = int(fd.read())
933 print('Number of frames: %i' % (nframes,))
934 print('Data types:')
935 for k, v in metadata['datatypes'].items():
936 if v == 'once':
937 print(f' {k}: First frame only.')
938 elif v:
939 print(f' {k}: All frames.')
940 # Look at first frame
941 if metadata['backend'] == 'ulm':
942 backend = UlmBundleBackend(True, False)
943 else:
944 raise NotImplementedError(
945 f'Backend {metadata["backend"]} not supported.')
946 frame = os.path.join(filename, 'F0')
947 small = backend.read_small(frame)
948 print('Contents of first frame:')
949 for k, v in small.items():
950 if k == 'constraints':
951 if v:
952 print(f' {len(v)} constraints are present')
953 else:
954 print(' Constraints are absent.')
955 elif k == 'pbc':
956 print(f' Periodic boundary conditions: {str(v)}')
957 elif k == 'natoms':
958 print(' Number of atoms: %i' % (v,))
959 elif hasattr(v, 'shape'):
960 print(f' {k}: shape = {str(v.shape)}, type = {str(v.dtype)}')
961 if k == 'cell':
962 print(' [[%12.6f, %12.6f, %12.6f],' % tuple(v[0]))
963 print(' [%12.6f, %12.6f, %12.6f],' % tuple(v[1]))
964 print(' [%12.6f, %12.6f, %12.6f]]' % tuple(v[2]))
965 else:
966 print(f' {k}: {str(v)}')
967 # Read info from separate files.
968 if metadata['subtype'] == 'split':
969 nsplit = small['fragments']
970 else:
971 nsplit = False
972 for k, v in metadata['datatypes'].items():
973 if v and k not in small:
974 info = backend.read_info(frame, k, nsplit)
975 infoline = f' {k}: '
976 for k, v in info.items():
977 infoline += f'{k} = {str(v)}, '
978 infoline = infoline[:-2] + '.' # Fix punctuation.
979 print(infoline)
982class PickleBundleBackend:
983 # Leave placeholder class so importing asap3 won't crash.
984 pass
987def main():
988 import optparse
989 parser = optparse.OptionParser(
990 usage='python -m ase.io.bundletrajectory '
991 'a.bundle [b.bundle ...]',
992 description='Print information about '
993 'the contents of one or more bundletrajectories.')
994 opts, args = parser.parse_args()
995 for name in args:
996 print_bundletrajectory_info(name)
999if __name__ == '__main__':
1000 main()