Coverage for /builds/kinetik161/ase/ase/io/lammpsdata.py: 92.99%
314 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
1import re
2import warnings
4import numpy as np
6from ase.atoms import Atoms
7from ase.calculators.lammps import Prism, convert
8from ase.data import atomic_masses, atomic_numbers
9from ase.utils import reader, writer
12@reader
13def read_lammps_data(
14 fileobj,
15 Z_of_type: dict = None,
16 sort_by_id: bool = True,
17 read_image_flags: bool = True,
18 units: str = 'metal',
19 atom_style: str = None,
20 style: str = None,
21):
22 """Method which reads a LAMMPS data file.
24 Parameters
25 ----------
26 fileobj : file | str
27 File from which data should be read.
28 Z_of_type : dict[int, int], optional
29 Mapping from LAMMPS atom types (typically starting from 1) to atomic
30 numbers. If None, if there is the "Masses" section, atomic numbers are
31 guessed from the atomic masses. Otherwise, atomic numbers of 1 (H), 2
32 (He), etc. are assigned to atom types of 1, 2, etc. Default is None.
33 sort_by_id : bool, optional
34 Order the particles according to their id. Might be faster to set it
35 False. Default is True.
36 read_image_flags: bool, default True
37 If True, the lattice translation vectors derived from image flags are
38 added to atomic positions.
39 units : str, optional
40 `LAMMPS units <https://docs.lammps.org/units.html>`__. Default is
41 'metal'.
42 atom_style : {'atomic', 'charge', 'full'} etc., optional
43 `LAMMPS atom style <https://docs.lammps.org/atom_style.html>`__.
44 If None, `atom_style` is guessed in the following priority (1) comment
45 after `Atoms` (2) length of fields (valid only `atomic` and `full`).
46 Default is None.
47 """
48 if style is not None:
49 warnings.warn(
50 FutureWarning('"style" is deprecated; please use "atom_style".'),
51 )
52 atom_style = style
53 # begin read_lammps_data
54 file_comment = next(fileobj).rstrip()
56 # default values (https://docs.lammps.org/read_data.html)
57 # in most cases these will be updated below
58 natoms = 0
59 # N_types = 0
60 xlo, xhi = -0.5, 0.5
61 ylo, yhi = -0.5, 0.5
62 zlo, zhi = -0.5, 0.5
63 xy, xz, yz = 0.0, 0.0, 0.0
65 mass_in = {}
66 vel_in = {}
67 bonds_in = []
68 angles_in = []
69 dihedrals_in = []
71 sections = [
72 'Atoms',
73 'Velocities',
74 'Masses',
75 'Charges',
76 'Ellipsoids',
77 'Lines',
78 'Triangles',
79 'Bodies',
80 'Bonds',
81 'Angles',
82 'Dihedrals',
83 'Impropers',
84 'Impropers Pair Coeffs',
85 'PairIJ Coeffs',
86 'Pair Coeffs',
87 'Bond Coeffs',
88 'Angle Coeffs',
89 'Dihedral Coeffs',
90 'Improper Coeffs',
91 'BondBond Coeffs',
92 'BondAngle Coeffs',
93 'MiddleBondTorsion Coeffs',
94 'EndBondTorsion Coeffs',
95 'AngleTorsion Coeffs',
96 'AngleAngleTorsion Coeffs',
97 'BondBond13 Coeffs',
98 'AngleAngle Coeffs',
99 ]
100 header_fields = [
101 'atoms',
102 'bonds',
103 'angles',
104 'dihedrals',
105 'impropers',
106 'atom types',
107 'bond types',
108 'angle types',
109 'dihedral types',
110 'improper types',
111 'extra bond per atom',
112 'extra angle per atom',
113 'extra dihedral per atom',
114 'extra improper per atom',
115 'extra special per atom',
116 'ellipsoids',
117 'lines',
118 'triangles',
119 'bodies',
120 'xlo xhi',
121 'ylo yhi',
122 'zlo zhi',
123 'xy xz yz',
124 ]
125 sections_re = '(' + '|'.join(sections).replace(' ', '\\s+') + ')'
126 header_fields_re = '(' + '|'.join(header_fields).replace(' ', '\\s+') + ')'
128 section = None
129 header = True
130 for line in fileobj:
131 # get string after #; if # does not exist, return ''
132 line_comment = re.sub(r'^.*#|^.*$', '', line).strip()
133 line = re.sub('#.*', '', line).rstrip().lstrip()
134 if re.match('^\\s*$', line): # skip blank lines
135 continue
137 # check for known section names
138 match = re.match(sections_re, line)
139 if match is not None:
140 section = match.group(0).rstrip().lstrip()
141 header = False
142 if section == 'Atoms': # id *
143 # guess `atom_style` from the comment after `Atoms` if exists
144 if atom_style is None and line_comment != '':
145 atom_style = line_comment
146 mol_id_in, type_in, charge_in, pos_in, travel_in = \
147 _read_atoms_section(fileobj, natoms, atom_style)
148 continue
150 if header:
151 field = None
152 val = None
153 match = re.match('(.*)\\s+' + header_fields_re, line)
154 if match is not None:
155 field = match.group(2).lstrip().rstrip()
156 val = match.group(1).lstrip().rstrip()
157 if field is not None and val is not None:
158 if field == 'atoms':
159 natoms = int(val)
160 elif field == 'xlo xhi':
161 (xlo, xhi) = (float(x) for x in val.split())
162 elif field == 'ylo yhi':
163 (ylo, yhi) = (float(x) for x in val.split())
164 elif field == 'zlo zhi':
165 (zlo, zhi) = (float(x) for x in val.split())
166 elif field == 'xy xz yz':
167 (xy, xz, yz) = (float(x) for x in val.split())
169 if section is not None:
170 fields = line.split()
171 if section == 'Velocities': # id vx vy vz
172 atom_id = int(fields[0])
173 vel_in[atom_id] = [float(fields[_]) for _ in (1, 2, 3)]
174 elif section == 'Masses':
175 mass_in[int(fields[0])] = float(fields[1])
176 elif section == 'Bonds': # id type atom1 atom2
177 bonds_in.append([int(fields[_]) for _ in (1, 2, 3)])
178 elif section == 'Angles': # id type atom1 atom2 atom3
179 angles_in.append([int(fields[_]) for _ in (1, 2, 3, 4)])
180 elif section == 'Dihedrals': # id type atom1 atom2 atom3 atom4
181 dihedrals_in.append([int(fields[_]) for _ in (1, 2, 3, 4, 5)])
183 # set cell
184 cell = np.zeros((3, 3))
185 cell[0, 0] = xhi - xlo
186 cell[1, 1] = yhi - ylo
187 cell[2, 2] = zhi - zlo
188 cell[1, 0] = xy
189 cell[2, 0] = xz
190 cell[2, 1] = yz
192 # initialize arrays for per-atom quantities
193 positions = np.zeros((natoms, 3))
194 numbers = np.zeros(natoms, int)
195 ids = np.zeros(natoms, int)
196 types = np.zeros(natoms, int)
197 velocities = np.zeros((natoms, 3)) if len(vel_in) > 0 else None
198 masses = np.zeros(natoms) if len(mass_in) > 0 else None
199 mol_id = np.zeros(natoms, int) if len(mol_id_in) > 0 else None
200 charge = np.zeros(natoms, float) if len(charge_in) > 0 else None
201 travel = np.zeros((natoms, 3), int) if len(travel_in) > 0 else None
202 bonds = [''] * natoms if len(bonds_in) > 0 else None
203 angles = [''] * natoms if len(angles_in) > 0 else None
204 dihedrals = [''] * natoms if len(dihedrals_in) > 0 else None
206 ind_of_id = {}
207 # copy per-atom quantities from read-in values
208 for (i, atom_id) in enumerate(pos_in.keys()):
209 # by id
210 if sort_by_id:
211 ind = atom_id - 1
212 else:
213 ind = i
214 ind_of_id[atom_id] = ind
215 atom_type = type_in[atom_id]
216 positions[ind, :] = pos_in[atom_id]
217 if velocities is not None:
218 velocities[ind, :] = vel_in[atom_id]
219 if travel is not None:
220 travel[ind] = travel_in[atom_id]
221 if mol_id is not None:
222 mol_id[ind] = mol_id_in[atom_id]
223 if charge is not None:
224 charge[ind] = charge_in[atom_id]
225 ids[ind] = atom_id
226 # by type
227 types[ind] = atom_type
228 if Z_of_type is None:
229 numbers[ind] = atom_type
230 else:
231 numbers[ind] = Z_of_type[atom_type]
232 if masses is not None:
233 masses[ind] = mass_in[atom_type]
234 # convert units
235 positions = convert(positions, 'distance', units, 'ASE')
236 cell = convert(cell, 'distance', units, 'ASE')
237 if masses is not None:
238 masses = convert(masses, 'mass', units, 'ASE')
239 if velocities is not None:
240 velocities = convert(velocities, 'velocity', units, 'ASE')
242 # guess atomic numbers from atomic masses
243 # this must be after the above mass-unit conversion
244 if Z_of_type is None and masses is not None:
245 numbers = _masses2numbers(masses)
247 # create ase.Atoms
248 atoms = Atoms(
249 positions=positions,
250 numbers=numbers,
251 masses=masses,
252 cell=cell,
253 pbc=[True, True, True],
254 )
256 # add lattice translation vectors
257 if read_image_flags and travel is not None:
258 scaled_positions = atoms.get_scaled_positions(wrap=False) + travel
259 atoms.set_scaled_positions(scaled_positions)
261 # set velocities (can't do it via constructor)
262 if velocities is not None:
263 atoms.set_velocities(velocities)
265 atoms.arrays['id'] = ids
266 atoms.arrays['type'] = types
267 if mol_id is not None:
268 atoms.arrays['mol-id'] = mol_id
269 if charge is not None:
270 atoms.arrays['initial_charges'] = charge
271 atoms.arrays['mmcharges'] = charge.copy()
273 if bonds is not None:
274 for (atom_type, at1, at2) in bonds_in:
275 i_a1 = ind_of_id[at1]
276 i_a2 = ind_of_id[at2]
277 if len(bonds[i_a1]) > 0:
278 bonds[i_a1] += ','
279 bonds[i_a1] += f'{i_a2:d}({atom_type:d})'
280 for i, bond in enumerate(bonds):
281 if len(bond) == 0:
282 bonds[i] = '_'
283 atoms.arrays['bonds'] = np.array(bonds)
285 if angles is not None:
286 for (atom_type, at1, at2, at3) in angles_in:
287 i_a1 = ind_of_id[at1]
288 i_a2 = ind_of_id[at2]
289 i_a3 = ind_of_id[at3]
290 if len(angles[i_a2]) > 0:
291 angles[i_a2] += ','
292 angles[i_a2] += f'{i_a1:d}-{i_a3:d}({atom_type:d})'
293 for i, angle in enumerate(angles):
294 if len(angle) == 0:
295 angles[i] = '_'
296 atoms.arrays['angles'] = np.array(angles)
298 if dihedrals is not None:
299 for (atom_type, at1, at2, at3, at4) in dihedrals_in:
300 i_a1 = ind_of_id[at1]
301 i_a2 = ind_of_id[at2]
302 i_a3 = ind_of_id[at3]
303 i_a4 = ind_of_id[at4]
304 if len(dihedrals[i_a1]) > 0:
305 dihedrals[i_a1] += ','
306 dihedrals[i_a1] += f'{i_a2:d}-{i_a3:d}-{i_a4:d}({atom_type:d})'
307 for i, dihedral in enumerate(dihedrals):
308 if len(dihedral) == 0:
309 dihedrals[i] = '_'
310 atoms.arrays['dihedrals'] = np.array(dihedrals)
312 atoms.info['comment'] = file_comment
314 return atoms
317def _read_atoms_section(fileobj, natoms: int, style: str = None):
318 type_in = {}
319 mol_id_in = {}
320 charge_in = {}
321 pos_in = {}
322 travel_in = {}
323 next(fileobj) # skip blank line just after `Atoms`
324 for _ in range(natoms):
325 line = next(fileobj)
326 line = re.sub('#.*', '', line).rstrip().lstrip()
327 fields = line.split()
328 if style is None:
329 style = _guess_atom_style(fields)
330 atom_id = int(fields[0])
331 if style == 'full' and len(fields) in (7, 10):
332 # id mol-id type q x y z [tx ty tz]
333 type_in[atom_id] = int(fields[2])
334 pos_in[atom_id] = tuple(float(fields[_]) for _ in (4, 5, 6))
335 mol_id_in[atom_id] = int(fields[1])
336 charge_in[atom_id] = float(fields[3])
337 if len(fields) == 10:
338 travel_in[atom_id] = tuple(int(fields[_]) for _ in (7, 8, 9))
339 elif style == 'atomic' and len(fields) in (5, 8):
340 # id type x y z [tx ty tz]
341 type_in[atom_id] = int(fields[1])
342 pos_in[atom_id] = tuple(float(fields[_]) for _ in (2, 3, 4))
343 if len(fields) == 8:
344 travel_in[atom_id] = tuple(int(fields[_]) for _ in (5, 6, 7))
345 elif style in ('angle', 'bond', 'molecular') and len(fields) in (6, 9):
346 # id mol-id type x y z [tx ty tz]
347 type_in[atom_id] = int(fields[2])
348 pos_in[atom_id] = tuple(float(fields[_]) for _ in (3, 4, 5))
349 mol_id_in[atom_id] = int(fields[1])
350 if len(fields) == 9:
351 travel_in[atom_id] = tuple(int(fields[_]) for _ in (6, 7, 8))
352 elif style == 'charge' and len(fields) in (6, 9):
353 # id type q x y z [tx ty tz]
354 type_in[atom_id] = int(fields[1])
355 pos_in[atom_id] = tuple(float(fields[_]) for _ in (3, 4, 5))
356 charge_in[atom_id] = float(fields[2])
357 if len(fields) == 9:
358 travel_in[atom_id] = tuple(int(fields[_]) for _ in (6, 7, 8))
359 else:
360 raise RuntimeError(
361 f'Style "{style}" not supported or invalid. '
362 f'Number of fields: {len(fields)}'
363 )
364 return mol_id_in, type_in, charge_in, pos_in, travel_in
367def _guess_atom_style(fields):
368 """Guess `atom_sytle` from the length of fields."""
369 if len(fields) in (5, 8):
370 return 'atomic'
371 if len(fields) in (7, 10):
372 return 'full'
373 raise ValueError('atom_style cannot be guessed from len(fields)')
376def _masses2numbers(masses):
377 """Guess atomic numbers from atomic masses."""
378 return np.argmin(np.abs(atomic_masses - masses[:, None]), axis=1)
381@writer
382def write_lammps_data(
383 fd,
384 atoms: Atoms,
385 *,
386 specorder: list = None,
387 reduce_cell: bool = False,
388 force_skew: bool = False,
389 prismobj: Prism = None,
390 write_image_flags: bool = False,
391 masses: bool = False,
392 velocities: bool = False,
393 units: str = 'metal',
394 atom_style: str = 'atomic',
395):
396 """Write atomic structure data to a LAMMPS data file.
398 Parameters
399 ----------
400 fd : file|str
401 File to which the output will be written.
402 atoms : Atoms
403 Atoms to be written.
404 specorder : list[str], optional
405 Chemical symbols in the order of LAMMPS atom types, by default None
406 force_skew : bool, optional
407 Force to write the cell as a
408 `triclinic <https://docs.lammps.org/Howto_triclinic.html>`__ box,
409 by default False
410 reduce_cell : bool, optional
411 Whether the cell shape is reduced or not, by default False
412 prismobj : Prism|None, optional
413 Prism, by default None
414 write_image_flags : bool, default False
415 If True, the image flags, i.e., in which images of the periodic
416 simulation box the atoms are, are written.
417 masses : bool, optional
418 Whether the atomic masses are written or not, by default False
419 velocities : bool, optional
420 Whether the atomic velocities are written or not, by default False
421 units : str, optional
422 `LAMMPS units <https://docs.lammps.org/units.html>`__,
423 by default 'metal'
424 atom_style : {'atomic', 'charge', 'full'}, optional
425 `LAMMPS atom style <https://docs.lammps.org/atom_style.html>`__,
426 by default 'atomic'
427 """
429 # FIXME: We should add a check here that the encoding of the file object
430 # is actually ascii once the 'encoding' attribute of IOFormat objects
431 # starts functioning in implementation (currently it doesn't do
432 # anything).
434 if isinstance(atoms, list):
435 if len(atoms) > 1:
436 raise ValueError(
437 'Can only write one configuration to a lammps data file!'
438 )
439 atoms = atoms[0]
441 fd.write('(written by ASE)\n\n')
443 symbols = atoms.get_chemical_symbols()
444 n_atoms = len(symbols)
445 fd.write(f'{n_atoms} atoms\n')
447 if specorder is None:
448 # This way it is assured that LAMMPS atom types are always
449 # assigned predictably according to the alphabetic order
450 species = sorted(set(symbols))
451 else:
452 # To index elements in the LAMMPS data file
453 # (indices must correspond to order in the potential file)
454 species = specorder
455 n_atom_types = len(species)
456 fd.write(f'{n_atom_types} atom types\n\n')
458 if prismobj is None:
459 prismobj = Prism(atoms.get_cell(), reduce_cell=reduce_cell)
461 # Get cell parameters and convert from ASE units to LAMMPS units
462 xhi, yhi, zhi, xy, xz, yz = convert(
463 prismobj.get_lammps_prism(), 'distance', 'ASE', units)
465 fd.write(f'0.0 {xhi:23.17g} xlo xhi\n')
466 fd.write(f'0.0 {yhi:23.17g} ylo yhi\n')
467 fd.write(f'0.0 {zhi:23.17g} zlo zhi\n')
469 if force_skew or prismobj.is_skewed():
470 fd.write(f'{xy:23.17g} {xz:23.17g} {yz:23.17g} xy xz yz\n')
471 fd.write('\n')
473 if masses:
474 _write_masses(fd, atoms, species, units)
476 # Write (unwrapped) atomic positions. If wrapping of atoms back into the
477 # cell along periodic directions is desired, this should be done manually
478 # on the Atoms object itself beforehand.
479 fd.write(f'Atoms # {atom_style}\n\n')
481 if write_image_flags:
482 scaled_positions = atoms.get_scaled_positions(wrap=False)
483 image_flags = np.floor(scaled_positions).astype(int)
485 # when `write_image_flags` is True, the positions are wrapped while the
486 # unwrapped positions can be recovered from the image flags
487 pos = prismobj.vector_to_lammps(
488 atoms.get_positions(),
489 wrap=write_image_flags,
490 )
492 if atom_style == 'atomic':
493 # Convert position from ASE units to LAMMPS units
494 pos = convert(pos, 'distance', 'ASE', units)
495 for i, r in enumerate(pos):
496 s = species.index(symbols[i]) + 1
497 line = (
498 f'{i+1:>6} {s:>3}'
499 f' {r[0]:23.17g} {r[1]:23.17g} {r[2]:23.17g}'
500 )
501 if write_image_flags:
502 img = image_flags[i]
503 line += f' {img[0]:6d} {img[1]:6d} {img[2]:6d}'
504 line += '\n'
505 fd.write(line)
506 elif atom_style == 'charge':
507 charges = atoms.get_initial_charges()
508 # Convert position and charge from ASE units to LAMMPS units
509 pos = convert(pos, 'distance', 'ASE', units)
510 charges = convert(charges, 'charge', 'ASE', units)
511 for i, (q, r) in enumerate(zip(charges, pos)):
512 s = species.index(symbols[i]) + 1
513 line = (
514 f'{i+1:>6} {s:>3} {q:>5}'
515 f' {r[0]:23.17g} {r[1]:23.17g} {r[2]:23.17g}'
516 )
517 if write_image_flags:
518 img = image_flags[i]
519 line += f' {img[0]:6d} {img[1]:6d} {img[2]:6d}'
520 line += '\n'
521 fd.write(line)
522 elif atom_style == 'full':
523 charges = atoms.get_initial_charges()
524 # The label 'mol-id' has apparenlty been introduced in read earlier,
525 # but so far not implemented here. Wouldn't a 'underscored' label
526 # be better, i.e. 'mol_id' or 'molecule_id'?
527 if atoms.has('mol-id'):
528 molecules = atoms.get_array('mol-id')
529 if not np.issubdtype(molecules.dtype, np.integer):
530 raise TypeError(
531 f'If "atoms" object has "mol-id" array, then '
532 f'mol-id dtype must be subtype of np.integer, and '
533 f'not {str(molecules.dtype):s}.')
534 if (len(molecules) != len(atoms)) or (molecules.ndim != 1):
535 raise TypeError(
536 'If "atoms" object has "mol-id" array, then '
537 'each atom must have exactly one mol-id.')
538 else:
539 # Assigning each atom to a distinct molecule id would seem
540 # preferableabove assigning all atoms to a single molecule
541 # id per default, as done within ase <= v 3.19.1. I.e.,
542 # molecules = np.arange(start=1, stop=len(atoms)+1,
543 # step=1, dtype=int) However, according to LAMMPS default
544 # behavior,
545 molecules = np.zeros(len(atoms), dtype=int)
546 # which is what happens if one creates new atoms within LAMMPS
547 # without explicitly taking care of the molecule id.
548 # Quote from docs at https://lammps.sandia.gov/doc/read_data.html:
549 # The molecule ID is a 2nd identifier attached to an atom.
550 # Normally, it is a number from 1 to N, identifying which
551 # molecule the atom belongs to. It can be 0 if it is a
552 # non-bonded atom or if you don't care to keep track of molecule
553 # assignments.
555 # Convert position and charge from ASE units to LAMMPS units
556 pos = convert(pos, 'distance', 'ASE', units)
557 charges = convert(charges, 'charge', 'ASE', units)
558 for i, (m, q, r) in enumerate(zip(molecules, charges, pos)):
559 s = species.index(symbols[i]) + 1
560 line = (
561 f'{i+1:>6} {m:>3} {s:>3} {q:>5}'
562 f' {r[0]:23.17g} {r[1]:23.17g} {r[2]:23.17g}'
563 )
564 if write_image_flags:
565 img = image_flags[i]
566 line += f' {img[0]:6d} {img[1]:6d} {img[2]:6d}'
567 line += '\n'
568 fd.write(line)
569 else:
570 raise ValueError(atom_style)
572 if velocities and atoms.get_velocities() is not None:
573 fd.write('\n\nVelocities\n\n')
574 vel = prismobj.vector_to_lammps(atoms.get_velocities())
575 # Convert velocity from ASE units to LAMMPS units
576 vel = convert(vel, 'velocity', 'ASE', units)
577 for i, v in enumerate(vel):
578 fd.write(f'{i+1:>6} {v[0]:23.17g} {v[1]:23.17g} {v[2]:23.17g}\n')
580 fd.flush()
583def _write_masses(fd, atoms: Atoms, species: list, units: str):
584 symbols_indices = atoms.symbols.indices()
585 fd.write('Masses\n\n')
586 for i, s in enumerate(species):
587 if s in symbols_indices:
588 # Find the first atom of the element `s` and extract its mass
589 # Cover by `float` to make a new object for safety
590 mass = float(atoms[symbols_indices[s][0]].mass)
591 else:
592 # Fetch from ASE data if the element `s` is not in the system
593 mass = atomic_masses[atomic_numbers[s]]
594 # Convert mass from ASE units to LAMMPS units
595 mass = convert(mass, 'mass', 'ASE', units)
596 atom_type = i + 1
597 fd.write(f'{atom_type:>6} {mass:23.17g} # {s}\n')
598 fd.write('\n')