Coverage for /builds/kinetik161/ase/ase/io/lammpsrun.py: 88.10%
210 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
1import gzip
2import struct
3from collections import deque
4from os.path import splitext
6import numpy as np
8from ase.atoms import Atoms
9from ase.calculators.lammps import convert
10from ase.calculators.singlepoint import SinglePointCalculator
11from ase.parallel import paropen
12from ase.quaternions import Quaternions
15def read_lammps_dump(infileobj, **kwargs):
16 """Method which reads a LAMMPS dump file.
18 LAMMPS chooses output method depending on the given suffix:
19 - .bin : binary file
20 - .gz : output piped through gzip
21 - .mpiio: using mpiio (should be like cleartext,
22 with different ordering)
23 - else : normal clear-text format
25 :param infileobj: string to file, opened file or file-like stream
27 """
28 # !TODO: add support for lammps-regex naming schemes (output per
29 # processor and timestep wildcards)
31 opened = False
32 if isinstance(infileobj, str):
33 opened = True
34 suffix = splitext(infileobj)[-1]
35 if suffix == ".bin":
36 fileobj = paropen(infileobj, "rb")
37 elif suffix == ".gz":
38 # !TODO: save for parallel execution?
39 fileobj = gzip.open(infileobj, "rb")
40 else:
41 fileobj = paropen(infileobj)
42 else:
43 suffix = splitext(infileobj.name)[-1]
44 fileobj = infileobj
46 if suffix == ".bin":
47 out = read_lammps_dump_binary(fileobj, **kwargs)
48 if opened:
49 fileobj.close()
50 return out
52 out = read_lammps_dump_text(fileobj, **kwargs)
54 if opened:
55 fileobj.close()
57 return out
60def lammps_data_to_ase_atoms(
61 data,
62 colnames,
63 cell,
64 celldisp,
65 pbc=False,
66 atomsobj=Atoms,
67 order=True,
68 specorder=None,
69 prismobj=None,
70 units="metal",
71):
72 """Extract positions and other per-atom parameters and create Atoms
74 :param data: per atom data
75 :param colnames: index for data
76 :param cell: cell dimensions
77 :param celldisp: origin shift
78 :param pbc: periodic boundaries
79 :param atomsobj: function to create ase-Atoms object
80 :param order: sort atoms by id. Might be faster to turn off.
81 Disregarded in case `id` column is not given in file.
82 :param specorder: list of species to map lammps types to ase-species
83 (usually .dump files to not contain type to species mapping)
84 :param prismobj: Coordinate transformation between lammps and ase
85 :type prismobj: Prism
86 :param units: lammps units for unit transformation between lammps and ase
87 :returns: Atoms object
88 :rtype: Atoms
90 """
91 if len(data.shape) == 1:
92 data = data[np.newaxis, :]
94 # read IDs if given and order if needed
95 if "id" in colnames:
96 ids = data[:, colnames.index("id")].astype(int)
97 if order:
98 sort_order = np.argsort(ids)
99 data = data[sort_order, :]
101 # determine the elements
102 if "element" in colnames:
103 # priority to elements written in file
104 elements = data[:, colnames.index("element")]
105 elif "type" in colnames:
106 # fall back to `types` otherwise
107 elements = data[:, colnames.index("type")].astype(int)
109 # reconstruct types from given specorder
110 if specorder:
111 elements = [specorder[t - 1] for t in elements]
112 else:
113 # todo: what if specorder give but no types?
114 # in principle the masses could work for atoms, but that needs
115 # lots of cases and new code I guess
116 raise ValueError("Cannot determine atom types form LAMMPS dump file")
118 def get_quantity(labels, quantity=None):
119 try:
120 cols = [colnames.index(label) for label in labels]
121 if quantity:
122 return convert(data[:, cols].astype(float), quantity,
123 units, "ASE")
125 return data[:, cols].astype(float)
126 except ValueError:
127 return None
129 # Positions
130 positions = None
131 scaled_positions = None
132 if "x" in colnames:
133 # doc: x, y, z = unscaled atom coordinates
134 positions = get_quantity(["x", "y", "z"], "distance")
135 elif "xs" in colnames:
136 # doc: xs,ys,zs = scaled atom coordinates
137 scaled_positions = get_quantity(["xs", "ys", "zs"])
138 elif "xu" in colnames:
139 # doc: xu,yu,zu = unwrapped atom coordinates
140 positions = get_quantity(["xu", "yu", "zu"], "distance")
141 elif "xsu" in colnames:
142 # xsu,ysu,zsu = scaled unwrapped atom coordinates
143 scaled_positions = get_quantity(["xsu", "ysu", "zsu"])
144 else:
145 raise ValueError("No atomic positions found in LAMMPS output")
147 velocities = get_quantity(["vx", "vy", "vz"], "velocity")
148 charges = get_quantity(["q"], "charge")
149 forces = get_quantity(["fx", "fy", "fz"], "force")
150 # !TODO: how need quaternions be converted?
151 quaternions = get_quantity(["c_q[1]", "c_q[2]", "c_q[3]", "c_q[4]"])
153 # convert cell
154 cell = convert(cell, "distance", units, "ASE")
155 celldisp = convert(celldisp, "distance", units, "ASE")
156 if prismobj:
157 celldisp = prismobj.vector_to_ase(celldisp)
158 cell = prismobj.update_cell(cell)
160 if quaternions is not None:
161 out_atoms = Quaternions(
162 symbols=elements,
163 positions=positions,
164 cell=cell,
165 celldisp=celldisp,
166 pbc=pbc,
167 quaternions=quaternions,
168 )
169 elif positions is not None:
170 # reverse coordinations transform to lammps system
171 # (for all vectors = pos, vel, force)
172 if prismobj:
173 positions = prismobj.vector_to_ase(positions, wrap=True)
175 out_atoms = atomsobj(
176 symbols=elements,
177 positions=positions,
178 pbc=pbc,
179 celldisp=celldisp,
180 cell=cell
181 )
182 elif scaled_positions is not None:
183 out_atoms = atomsobj(
184 symbols=elements,
185 scaled_positions=scaled_positions,
186 pbc=pbc,
187 celldisp=celldisp,
188 cell=cell,
189 )
191 if velocities is not None:
192 if prismobj:
193 velocities = prismobj.vector_to_ase(velocities)
194 out_atoms.set_velocities(velocities)
195 if charges is not None:
196 out_atoms.set_initial_charges([charge[0] for charge in charges])
197 if forces is not None:
198 if prismobj:
199 forces = prismobj.vector_to_ase(forces)
200 # !TODO: use another calculator if available (or move forces
201 # to atoms.property) (other problem: synchronizing
202 # parallel runs)
203 calculator = SinglePointCalculator(out_atoms, energy=0.0,
204 forces=forces)
205 out_atoms.calc = calculator
207 # process the extra columns of fixes, variables and computes
208 # that can be dumped, add as additional arrays to atoms object
209 for colname in colnames:
210 # determine if it is a compute or fix (but not the quaternian)
211 if (colname.startswith('f_') or colname.startswith('v_') or
212 (colname.startswith('c_') and not colname.startswith('c_q['))):
213 out_atoms.new_array(colname, get_quantity([colname]),
214 dtype='float')
216 return out_atoms
219def construct_cell(diagdisp, offdiag):
220 """Help function to create an ASE-cell with displacement vector from
221 the lammps coordination system parameters.
223 :param diagdisp: cell dimension convoluted with the displacement vector
224 :param offdiag: off-diagonal cell elements
225 :returns: cell and cell displacement vector
226 :rtype: tuple
227 """
228 xlo, xhi, ylo, yhi, zlo, zhi = diagdisp
229 xy, xz, yz = offdiag
231 # create ase-cell from lammps-box
232 xhilo = (xhi - xlo) - abs(xy) - abs(xz)
233 yhilo = (yhi - ylo) - abs(yz)
234 zhilo = zhi - zlo
235 celldispx = xlo - min(0, xy) - min(0, xz)
236 celldispy = ylo - min(0, yz)
237 celldispz = zlo
238 cell = np.array([[xhilo, 0, 0], [xy, yhilo, 0], [xz, yz, zhilo]])
239 celldisp = np.array([celldispx, celldispy, celldispz])
241 return cell, celldisp
244def get_max_index(index):
245 if np.isscalar(index):
246 return index
247 elif isinstance(index, slice):
248 return index.stop if (index.stop is not None) else float("inf")
251def read_lammps_dump_text(fileobj, index=-1, **kwargs):
252 """Process cleartext lammps dumpfiles
254 :param fileobj: filestream providing the trajectory data
255 :param index: integer or slice object (default: get the last timestep)
256 :returns: list of Atoms objects
257 :rtype: list
258 """
259 # Load all dumped timesteps into memory simultaneously
260 lines = deque(fileobj.readlines())
261 index_end = get_max_index(index)
263 n_atoms = 0
264 images = []
266 # avoid references before assignment in case of incorrect file structure
267 cell, celldisp, pbc = None, None, False
269 while len(lines) > n_atoms:
270 line = lines.popleft()
272 if "ITEM: TIMESTEP" in line:
273 n_atoms = 0
274 line = lines.popleft()
275 # !TODO: pyflakes complains about this line -> do something
276 # ntimestep = int(line.split()[0]) # NOQA
278 if "ITEM: NUMBER OF ATOMS" in line:
279 line = lines.popleft()
280 n_atoms = int(line.split()[0])
282 if "ITEM: BOX BOUNDS" in line:
283 # save labels behind "ITEM: BOX BOUNDS" in triclinic case
284 # (>=lammps-7Jul09)
285 tilt_items = line.split()[3:]
286 celldatarows = [lines.popleft() for _ in range(3)]
287 celldata = np.loadtxt(celldatarows)
288 diagdisp = celldata[:, :2].reshape(6, 1).flatten()
290 # determine cell tilt (triclinic case!)
291 if len(celldata[0]) > 2:
292 # for >=lammps-7Jul09 use labels behind "ITEM: BOX BOUNDS"
293 # to assign tilt (vector) elements ...
294 offdiag = celldata[:, 2]
295 # ... otherwise assume default order in 3rd column
296 # (if the latter was present)
297 if len(tilt_items) >= 3:
298 sort_index = [tilt_items.index(i)
299 for i in ["xy", "xz", "yz"]]
300 offdiag = offdiag[sort_index]
301 else:
302 offdiag = (0.0,) * 3
304 cell, celldisp = construct_cell(diagdisp, offdiag)
306 # Handle pbc conditions
307 if len(tilt_items) == 3:
308 pbc_items = tilt_items
309 elif len(tilt_items) > 3:
310 pbc_items = tilt_items[3:6]
311 else:
312 pbc_items = ["f", "f", "f"]
313 pbc = ["p" in d.lower() for d in pbc_items]
315 if "ITEM: ATOMS" in line:
316 colnames = line.split()[2:]
317 datarows = [lines.popleft() for _ in range(n_atoms)]
318 data = np.loadtxt(datarows, dtype=str)
319 out_atoms = lammps_data_to_ase_atoms(
320 data=data,
321 colnames=colnames,
322 cell=cell,
323 celldisp=celldisp,
324 atomsobj=Atoms,
325 pbc=pbc,
326 **kwargs
327 )
328 images.append(out_atoms)
330 if len(images) > index_end >= 0:
331 break
333 return images[index]
336def read_lammps_dump_binary(
337 fileobj, index=-1, colnames=None, intformat="SMALLBIG", **kwargs
338):
339 """Read binary dump-files (after binary2txt.cpp from lammps/tools)
341 :param fileobj: file-stream containing the binary lammps data
342 :param index: integer or slice object (default: get the last timestep)
343 :param colnames: data is columns and identified by a header
344 :param intformat: lammps support different integer size. Parameter set \
345 at compile-time and can unfortunately not derived from data file
346 :returns: list of Atoms-objects
347 :rtype: list
348 """
349 # depending on the chosen compilation flag lammps uses either normal
350 # integers or long long for its id or timestep numbering
351 # !TODO: tags are cast to double -> missing/double ids (add check?)
352 tagformat, bigformat = dict(
353 SMALLSMALL=("i", "i"), SMALLBIG=("i", "q"), BIGBIG=("q", "q")
354 )[intformat]
356 index_end = get_max_index(index)
358 # Standard columns layout from lammpsrun
359 if not colnames:
360 colnames = ["id", "type", "x", "y", "z",
361 "vx", "vy", "vz", "fx", "fy", "fz"]
363 images = []
365 # wrap struct.unpack to raise EOFError
366 def read_variables(string):
367 obj_len = struct.calcsize(string)
368 data_obj = fileobj.read(obj_len)
369 if obj_len != len(data_obj):
370 raise EOFError
371 return struct.unpack(string, data_obj)
373 while True:
374 try:
375 # Assume that the binary dump file is in the old (pre-29Oct2020)
376 # format
377 magic_string = None
379 # read header
380 ntimestep, = read_variables("=" + bigformat)
382 # In the new LAMMPS binary dump format (version 29Oct2020 and
383 # onward), a negative timestep is used to indicate that the next
384 # few bytes will contain certain metadata
385 if ntimestep < 0:
386 # First bigint was actually encoding the negative of the format
387 # name string length (we call this 'magic_string' to
388 magic_string_len = -ntimestep
390 # The next `magic_string_len` bytes will hold a string
391 # indicating the format of the dump file
392 magic_string = b''.join(read_variables(
393 "=" + str(magic_string_len) + "c"))
395 # Read endianness (integer). For now, we'll disregard the value
396 # and simply use the host machine's endianness (via '='
397 # character used with struct.calcsize).
398 #
399 # TODO: Use the endianness of the dump file in subsequent
400 # read_variables rather than just assuming it will match
401 # that of the host
402 endian, = read_variables("=i")
404 # Read revision number (integer)
405 revision, = read_variables("=i")
407 # Finally, read the actual timestep (bigint)
408 ntimestep, = read_variables("=" + bigformat)
410 n_atoms, triclinic = read_variables("=" + bigformat + "i")
411 boundary = read_variables("=6i")
412 diagdisp = read_variables("=6d")
413 if triclinic != 0:
414 offdiag = read_variables("=3d")
415 else:
416 offdiag = (0.0,) * 3
417 size_one, = read_variables("=i")
419 if len(colnames) != size_one:
420 raise ValueError("Provided columns do not match binary file")
422 if magic_string and revision > 1:
423 # New binary dump format includes units string,
424 # columns string, and time
425 units_str_len, = read_variables("=i")
427 if units_str_len > 0:
428 # Read lammps units style
429 _ = b''.join(
430 read_variables("=" + str(units_str_len) + "c"))
432 flag, = read_variables("=c")
433 if flag != b'\x00':
434 # Flag was non-empty string
435 time, = read_variables("=d")
437 # Length of column string
438 columns_str_len, = read_variables("=i")
440 # Read column string (e.g., "id type x y z vx vy vz fx fy fz")
441 _ = b''.join(read_variables("=" + str(columns_str_len) + "c"))
443 nchunk, = read_variables("=i")
445 # lammps cells/boxes can have different boundary conditions on each
446 # sides (makes mainly sense for different non-periodic conditions
447 # (e.g. [f]ixed and [s]hrink for a irradiation simulation))
448 # periodic case: b 0 = 'p'
449 # non-peridic cases 1: 'f', 2 : 's', 3: 'm'
450 pbc = np.sum(np.array(boundary).reshape((3, 2)), axis=1) == 0
452 cell, celldisp = construct_cell(diagdisp, offdiag)
454 data = []
455 for _ in range(nchunk):
456 # number-of-data-entries
457 n_data, = read_variables("=i")
458 # retrieve per atom data
459 data += read_variables("=" + str(n_data) + "d")
460 data = np.array(data).reshape((-1, size_one))
462 # map data-chunk to ase atoms
463 out_atoms = lammps_data_to_ase_atoms(
464 data=data,
465 colnames=colnames,
466 cell=cell,
467 celldisp=celldisp,
468 pbc=pbc,
469 **kwargs
470 )
472 images.append(out_atoms)
474 # stop if requested index has been found
475 if len(images) > index_end >= 0:
476 break
478 except EOFError:
479 break
481 return images[index]