Coverage for /builds/kinetik161/ase/ase/io/bundlemanipulate.py: 6.56%
122 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
1"""Functions for in-place manipulation of bundletrajectories.
3This module defines a number of functions that can be used to
4extract and delete data from BundleTrajectories directly on
5disk. The functions are intended for large-scale MD output,
6so they avoid copying the potentially large amounts of data.
7Instead, data is either directly deleted in-place; or copies
8are made by creating a new directory structure, but hardlinking
9the data files. Hard links makes it possible to delete the
10original data without invalidating the copy.
12Usage from command line:
14python -m ase.io.bundlemanipulate inbundle outbundle [start [end [step]]]
15"""
17import json
18import os
19from typing import Optional
21import numpy as np
23from ase.io.bundletrajectory import UlmBundleBackend
26def copy_frames(inbundle, outbundle, start=0, end=None, step=1,
27 verbose=False):
28 """Copies selected frame from one bundle to the next."""
29 if not (isinstance(start, int) and
30 (isinstance(end, int) or end is None) and
31 isinstance(step, int)):
32 raise TypeError("copy_frames: start, end and step must be integers.")
33 metadata, nframes = read_bundle_info(inbundle)
35 if metadata['backend'] == 'ulm':
36 backend = UlmBundleBackend(True, metadata['ulm.singleprecision'])
37 elif metadata['backend'] == 'pickle':
38 raise OSError("Input BundleTrajectory uses the 'pickle' backend. " +
39 "This is not longer supported for security reasons")
40 else:
41 raise OSError("Unknown backend type '{}'".format(metadata['backend']))
43 if start < 0:
44 start += nframes
45 if end is None:
46 end = nframes
47 if end < 0:
48 end += nframes
49 if start < 0 or (start > nframes - 1 and end > 0):
50 raise ValueError("copy_frames: Invalid start value.")
51 if end < 0 or (end > nframes - 1 and end < 0):
52 raise ValueError("copy_frames: Invalid end value.")
53 if step == 0:
54 raise ValueError("copy_frames: Invalid step value (zero)")
55 frames = list(range(start, end, step))
56 if verbose:
57 print("Copying the frames", frames)
59 # Make the new bundle directory
60 os.mkdir(outbundle)
61 with open(os.path.join(outbundle, 'metadata.json'), 'w') as fd:
62 json.dump(metadata, fd, indent=2)
64 for nout, nin in enumerate(frames):
65 if verbose:
66 print("F%i -> F%i" % (nin, nout))
67 indir = os.path.join(inbundle, "F" + str(nin))
68 outdir = os.path.join(outbundle, "F" + str(nout))
69 os.mkdir(outdir)
70 names = os.listdir(indir)
71 for name in names:
72 fromfile = os.path.join(indir, name)
73 tofile = os.path.join(outdir, name)
74 os.link(fromfile, tofile)
75 if nout == 0 and nin != 0:
76 if verbose:
77 print("F0 -> F0 (supplemental)")
78 # The smalldata.ulm stuff must be updated.
79 # At the same time, check if the number of fragments
80 # has not changed.
81 data0 = backend.read_small(os.path.join(inbundle, "F0"))
82 data1 = backend.read_small(indir)
83 split_data = (metadata['subtype'] == 'split')
84 if split_data:
85 fragments0 = data0['fragments']
86 fragments1 = data1['fragments']
88 data0.update(data1) # Data in frame overrides data from frame 0.
89 backend.write_small(outdir, data0)
91 # If data is written in split mode, it must be reordered
92 firstnames = os.listdir(os.path.join(inbundle, "F0"))
93 if not split_data:
94 # Simple linking
95 for name in firstnames:
96 if name not in names:
97 if verbose:
98 print(" ", name, " (linking)")
99 fromfile = os.path.join(inbundle, "F0", name)
100 tofile = os.path.join(outdir, name)
101 os.link(fromfile, tofile)
102 else:
103 # Must read and rewrite data
104 # First we read the ID's from frame 0 and N
105 assert 'ID_0.ulm' in firstnames and 'ID_0.ulm' in names
106 backend.nfrag = fragments0
107 f0_id, dummy = backend.read_split(
108 os.path.join(inbundle, "F0"), "ID"
109 )
110 backend.nfrag = fragments1
111 fn_id, fn_sizes = backend.read_split(indir, "ID")
112 for name in firstnames:
113 # Only look at each array, not each file
114 if '_0.' not in name:
115 continue
116 if name not in names:
117 # We need to load this array
118 arrayname = name.split('_')[0]
119 print(" Reading", arrayname)
120 backend.nfrag = fragments0
121 f0_data, dummy = backend.read_split(
122 os.path.join(inbundle, "F0"), arrayname
123 )
124 # Sort data
125 f0_data[f0_id] = np.array(f0_data)
126 # Unsort with new ordering
127 f0_data = f0_data[fn_id]
128 # Write it
129 print(" Writing reshuffled", arrayname)
130 pointer = 0
131 backend.nfrag = fragments1
132 for i, s in enumerate(fn_sizes):
133 segment = f0_data[pointer:pointer + s]
134 pointer += s
135 backend.write(outdir, f'{arrayname}_{i}',
136 segment)
137 # Finally, write the number of frames
138 with open(os.path.join(outbundle, 'frames'), 'w') as fd:
139 fd.write(str(len(frames)) + '\n')
142# Helper functions
143def read_bundle_info(name):
144 """Read global info about a bundle.
146 Returns (metadata, nframes)
147 """
148 if not os.path.isdir(name):
149 raise OSError(f"No directory (bundle) named '{name}' found.")
151 metaname = os.path.join(name, 'metadata.json')
153 if not os.path.isfile(metaname):
154 if os.path.isfile(os.path.join(name, 'metadata')):
155 raise OSError(
156 "Found obsolete metadata in unsecure Pickle format. "
157 "Refusing to load.")
158 else:
159 raise OSError("'{}' does not appear to be a BundleTrajectory "
160 "(no {})".format(name, metaname))
162 with open(metaname) as fd:
163 mdata = json.load(fd)
165 if 'format' not in mdata or mdata['format'] != 'BundleTrajectory':
166 raise OSError(f"'{name}' does not appear to be a BundleTrajectory")
167 if mdata['version'] != 1:
168 raise OSError("Cannot manipulate BundleTrajectories with version "
169 "number %s" % (mdata['version'],))
170 with open(os.path.join(name, "frames")) as fd:
171 nframes = int(fd.read())
172 if nframes == 0:
173 raise OSError(f"'{name}' is an empty BundleTrajectory")
174 return mdata, nframes
177if __name__ == '__main__':
178 import sys
179 if len(sys.argv) < 3:
180 print(__doc__)
181 sys.exit()
182 inname, outname = sys.argv[1:3]
183 if len(sys.argv) > 3:
184 start = int(sys.argv[3])
185 else:
186 start = 0
187 if len(sys.argv) > 4:
188 end: Optional[int] = int(sys.argv[4])
189 else:
190 end = None
191 if len(sys.argv) > 5:
192 step = int(sys.argv[5])
193 else:
194 step = 1
195 copy_frames(inname, outname, start, end, step, verbose=1)