Coverage for /builds/kinetik161/ase/ase/io/lammpsrun.py: 88.10%

1import gzip

2import struct

3from collections import deque

4from os.path import splitext

6import numpy as np

8from ase.atoms import Atoms

9from ase.calculators.lammps import convert

10from ase.calculators.singlepoint import SinglePointCalculator

11from ase.parallel import paropen

12from ase.quaternions import Quaternions

15def read_lammps_dump(infileobj, **kwargs):

16 """Method which reads a LAMMPS dump file.

18 LAMMPS chooses output method depending on the given suffix:

19 - .bin : binary file

20 - .gz : output piped through gzip

21 - .mpiio: using mpiio (should be like cleartext,

22 with different ordering)

23 - else : normal clear-text format

25 :param infileobj: string to file, opened file or file-like stream

27 """

28 # !TODO: add support for lammps-regex naming schemes (output per

29 # processor and timestep wildcards)

31 opened = False

32 if isinstance(infileobj, str):

33 opened = True

34 suffix = splitext(infileobj)[-1]

35 if suffix == ".bin":

36 fileobj = paropen(infileobj, "rb")

37 elif suffix == ".gz":

38 # !TODO: save for parallel execution?

39 fileobj = gzip.open(infileobj, "rb")

40 else:

41 fileobj = paropen(infileobj)

42 else:

43 suffix = splitext(infileobj.name)[-1]

44 fileobj = infileobj

46 if suffix == ".bin":

47 out = read_lammps_dump_binary(fileobj, **kwargs)

48 if opened:

49 fileobj.close()

50 return out

52 out = read_lammps_dump_text(fileobj, **kwargs)

54 if opened:

55 fileobj.close()

57 return out

60def lammps_data_to_ase_atoms(

61 data,

62 colnames,

63 cell,

64 celldisp,

65 pbc=False,

66 atomsobj=Atoms,

67 order=True,

68 specorder=None,

69 prismobj=None,

70 units="metal",

71):

72 """Extract positions and other per-atom parameters and create Atoms

74 :param data: per atom data

75 :param colnames: index for data

76 :param cell: cell dimensions

77 :param celldisp: origin shift

78 :param pbc: periodic boundaries

79 :param atomsobj: function to create ase-Atoms object

80 :param order: sort atoms by id. Might be faster to turn off.

81 Disregarded in case `id` column is not given in file.

82 :param specorder: list of species to map lammps types to ase-species

83 (usually .dump files to not contain type to species mapping)

84 :param prismobj: Coordinate transformation between lammps and ase

85 :type prismobj: Prism

86 :param units: lammps units for unit transformation between lammps and ase

87 :returns: Atoms object

88 :rtype: Atoms

90 """

91 if len(data.shape) == 1:

92 data = data[np.newaxis, :]

94 # read IDs if given and order if needed

95 if "id" in colnames:

96 ids = data[:, colnames.index("id")].astype(int)

97 if order:

98 sort_order = np.argsort(ids)

99 data = data[sort_order, :]

100

101 # determine the elements

102 if "element" in colnames:

103 # priority to elements written in file

104 elements = data[:, colnames.index("element")]

105 elif "type" in colnames:

106 # fall back to `types` otherwise

107 elements = data[:, colnames.index("type")].astype(int)

108

109 # reconstruct types from given specorder

110 if specorder:

111 elements = [specorder[t - 1] for t in elements]

112 else:

113 # todo: what if specorder give but no types?

114 # in principle the masses could work for atoms, but that needs

115 # lots of cases and new code I guess

116 raise ValueError("Cannot determine atom types form LAMMPS dump file")

117

118 def get_quantity(labels, quantity=None):

119 try:

120 cols = [colnames.index(label) for label in labels]

121 if quantity:

122 return convert(data[:, cols].astype(float), quantity,

123 units, "ASE")

124

125 return data[:, cols].astype(float)

126 except ValueError:

127 return None

128

129 # Positions

130 positions = None

131 scaled_positions = None

132 if "x" in colnames:

133 # doc: x, y, z = unscaled atom coordinates

134 positions = get_quantity(["x", "y", "z"], "distance")

135 elif "xs" in colnames:

136 # doc: xs,ys,zs = scaled atom coordinates

137 scaled_positions = get_quantity(["xs", "ys", "zs"])

138 elif "xu" in colnames:

139 # doc: xu,yu,zu = unwrapped atom coordinates

140 positions = get_quantity(["xu", "yu", "zu"], "distance")

141 elif "xsu" in colnames:

142 # xsu,ysu,zsu = scaled unwrapped atom coordinates

143 scaled_positions = get_quantity(["xsu", "ysu", "zsu"])

144 else:

145 raise ValueError("No atomic positions found in LAMMPS output")

146

147 velocities = get_quantity(["vx", "vy", "vz"], "velocity")

148 charges = get_quantity(["q"], "charge")

149 forces = get_quantity(["fx", "fy", "fz"], "force")

150 # !TODO: how need quaternions be converted?

151 quaternions = get_quantity(["c_q[1]", "c_q[2]", "c_q[3]", "c_q[4]"])

152

153 # convert cell

154 cell = convert(cell, "distance", units, "ASE")

155 celldisp = convert(celldisp, "distance", units, "ASE")

156 if prismobj:

157 celldisp = prismobj.vector_to_ase(celldisp)

158 cell = prismobj.update_cell(cell)

159

160 if quaternions is not None:

161 out_atoms = Quaternions(

162 symbols=elements,

163 positions=positions,

164 cell=cell,

165 celldisp=celldisp,

166 pbc=pbc,

167 quaternions=quaternions,

168 )

169 elif positions is not None:

170 # reverse coordinations transform to lammps system

171 # (for all vectors = pos, vel, force)

172 if prismobj:

173 positions = prismobj.vector_to_ase(positions, wrap=True)

174

175 out_atoms = atomsobj(

176 symbols=elements,

177 positions=positions,

178 pbc=pbc,

179 celldisp=celldisp,

180 cell=cell

181 )

182 elif scaled_positions is not None:

183 out_atoms = atomsobj(

184 symbols=elements,

185 scaled_positions=scaled_positions,

186 pbc=pbc,

187 celldisp=celldisp,

188 cell=cell,

189 )

190

191 if velocities is not None:

192 if prismobj:

193 velocities = prismobj.vector_to_ase(velocities)

194 out_atoms.set_velocities(velocities)

195 if charges is not None:

196 out_atoms.set_initial_charges([charge[0] for charge in charges])

197 if forces is not None:

198 if prismobj:

199 forces = prismobj.vector_to_ase(forces)

200 # !TODO: use another calculator if available (or move forces

201 # to atoms.property) (other problem: synchronizing

202 # parallel runs)

203 calculator = SinglePointCalculator(out_atoms, energy=0.0,

204 forces=forces)

205 out_atoms.calc = calculator

206

207 # process the extra columns of fixes, variables and computes

208 # that can be dumped, add as additional arrays to atoms object

209 for colname in colnames:

210 # determine if it is a compute or fix (but not the quaternian)

211 if (colname.startswith('f_') or colname.startswith('v_') or

212 (colname.startswith('c_') and not colname.startswith('c_q['))):

213 out_atoms.new_array(colname, get_quantity([colname]),

214 dtype='float')

215

216 return out_atoms

217

218

219def construct_cell(diagdisp, offdiag):

220 """Help function to create an ASE-cell with displacement vector from

221 the lammps coordination system parameters.

222

223 :param diagdisp: cell dimension convoluted with the displacement vector

224 :param offdiag: off-diagonal cell elements

225 :returns: cell and cell displacement vector

226 :rtype: tuple

227 """

228 xlo, xhi, ylo, yhi, zlo, zhi = diagdisp

229 xy, xz, yz = offdiag

230

231 # create ase-cell from lammps-box

232 xhilo = (xhi - xlo) - abs(xy) - abs(xz)

233 yhilo = (yhi - ylo) - abs(yz)

234 zhilo = zhi - zlo

235 celldispx = xlo - min(0, xy) - min(0, xz)

236 celldispy = ylo - min(0, yz)

237 celldispz = zlo

238 cell = np.array([[xhilo, 0, 0], [xy, yhilo, 0], [xz, yz, zhilo]])

239 celldisp = np.array([celldispx, celldispy, celldispz])

240

241 return cell, celldisp

242

243

244def get_max_index(index):

245 if np.isscalar(index):

246 return index

247 elif isinstance(index, slice):

248 return index.stop if (index.stop is not None) else float("inf")

249

250

251def read_lammps_dump_text(fileobj, index=-1, **kwargs):

252 """Process cleartext lammps dumpfiles

253

254 :param fileobj: filestream providing the trajectory data

255 :param index: integer or slice object (default: get the last timestep)

256 :returns: list of Atoms objects

257 :rtype: list

258 """

259 # Load all dumped timesteps into memory simultaneously

260 lines = deque(fileobj.readlines())

261 index_end = get_max_index(index)

262

263 n_atoms = 0

264 images = []

265

266 # avoid references before assignment in case of incorrect file structure

267 cell, celldisp, pbc = None, None, False

268

269 while len(lines) > n_atoms:

270 line = lines.popleft()

271

272 if "ITEM: TIMESTEP" in line:

273 n_atoms = 0

274 line = lines.popleft()

275 # !TODO: pyflakes complains about this line -> do something

276 # ntimestep = int(line.split()[0]) # NOQA

277

278 if "ITEM: NUMBER OF ATOMS" in line:

279 line = lines.popleft()

280 n_atoms = int(line.split()[0])

281

282 if "ITEM: BOX BOUNDS" in line:

283 # save labels behind "ITEM: BOX BOUNDS" in triclinic case

284 # (>=lammps-7Jul09)

285 tilt_items = line.split()[3:]

286 celldatarows = [lines.popleft() for _ in range(3)]

287 celldata = np.loadtxt(celldatarows)

288 diagdisp = celldata[:, :2].reshape(6, 1).flatten()

289

290 # determine cell tilt (triclinic case!)

291 if len(celldata[0]) > 2:

292 # for >=lammps-7Jul09 use labels behind "ITEM: BOX BOUNDS"

293 # to assign tilt (vector) elements ...

294 offdiag = celldata[:, 2]

295 # ... otherwise assume default order in 3rd column

296 # (if the latter was present)

297 if len(tilt_items) >= 3:

298 sort_index = [tilt_items.index(i)

299 for i in ["xy", "xz", "yz"]]

300 offdiag = offdiag[sort_index]

301 else:

302 offdiag = (0.0,) * 3

303

304 cell, celldisp = construct_cell(diagdisp, offdiag)

305

306 # Handle pbc conditions

307 if len(tilt_items) == 3:

308 pbc_items = tilt_items

309 elif len(tilt_items) > 3:

310 pbc_items = tilt_items[3:6]

311 else:

312 pbc_items = ["f", "f", "f"]

313 pbc = ["p" in d.lower() for d in pbc_items]

314

315 if "ITEM: ATOMS" in line:

316 colnames = line.split()[2:]

317 datarows = [lines.popleft() for _ in range(n_atoms)]

318 data = np.loadtxt(datarows, dtype=str)

319 out_atoms = lammps_data_to_ase_atoms(

320 data=data,

321 colnames=colnames,

322 cell=cell,

323 celldisp=celldisp,

324 atomsobj=Atoms,

325 pbc=pbc,

326 **kwargs

327 )

328 images.append(out_atoms)

329

330 if len(images) > index_end >= 0:

331 break

332

333 return images[index]

334

335

336def read_lammps_dump_binary(

337 fileobj, index=-1, colnames=None, intformat="SMALLBIG", **kwargs

338):

339 """Read binary dump-files (after binary2txt.cpp from lammps/tools)

340

341 :param fileobj: file-stream containing the binary lammps data

342 :param index: integer or slice object (default: get the last timestep)

343 :param colnames: data is columns and identified by a header

344 :param intformat: lammps support different integer size. Parameter set \

345 at compile-time and can unfortunately not derived from data file

346 :returns: list of Atoms-objects

347 :rtype: list

348 """

349 # depending on the chosen compilation flag lammps uses either normal

350 # integers or long long for its id or timestep numbering

351 # !TODO: tags are cast to double -> missing/double ids (add check?)

352 tagformat, bigformat = dict(

353 SMALLSMALL=("i", "i"), SMALLBIG=("i", "q"), BIGBIG=("q", "q")

354 )[intformat]

355

356 index_end = get_max_index(index)

357

358 # Standard columns layout from lammpsrun

359 if not colnames:

360 colnames = ["id", "type", "x", "y", "z",

361 "vx", "vy", "vz", "fx", "fy", "fz"]

362

363 images = []

364

365 # wrap struct.unpack to raise EOFError

366 def read_variables(string):

367 obj_len = struct.calcsize(string)

368 data_obj = fileobj.read(obj_len)

369 if obj_len != len(data_obj):

370 raise EOFError

371 return struct.unpack(string, data_obj)

372

373 while True:

374 try:

375 # Assume that the binary dump file is in the old (pre-29Oct2020)

376 # format

377 magic_string = None

378

379 # read header

380 ntimestep, = read_variables("=" + bigformat)

381

382 # In the new LAMMPS binary dump format (version 29Oct2020 and

383 # onward), a negative timestep is used to indicate that the next

384 # few bytes will contain certain metadata

385 if ntimestep < 0:

386 # First bigint was actually encoding the negative of the format

387 # name string length (we call this 'magic_string' to

388 magic_string_len = -ntimestep

389

390 # The next `magic_string_len` bytes will hold a string

391 # indicating the format of the dump file

392 magic_string = b''.join(read_variables(

393 "=" + str(magic_string_len) + "c"))

394

395 # Read endianness (integer). For now, we'll disregard the value

396 # and simply use the host machine's endianness (via '='

397 # character used with struct.calcsize).

398 #

399 # TODO: Use the endianness of the dump file in subsequent

400 # read_variables rather than just assuming it will match

401 # that of the host

402 endian, = read_variables("=i")

403

404 # Read revision number (integer)

405 revision, = read_variables("=i")

406

407 # Finally, read the actual timestep (bigint)

408 ntimestep, = read_variables("=" + bigformat)

409

410 n_atoms, triclinic = read_variables("=" + bigformat + "i")

411 boundary = read_variables("=6i")

412 diagdisp = read_variables("=6d")

413 if triclinic != 0:

414 offdiag = read_variables("=3d")

415 else:

416 offdiag = (0.0,) * 3

417 size_one, = read_variables("=i")

418

419 if len(colnames) != size_one:

420 raise ValueError("Provided columns do not match binary file")

421

422 if magic_string and revision > 1:

423 # New binary dump format includes units string,

424 # columns string, and time

425 units_str_len, = read_variables("=i")

426

427 if units_str_len > 0:

428 # Read lammps units style

429 _ = b''.join(

430 read_variables("=" + str(units_str_len) + "c"))

431

432 flag, = read_variables("=c")

433 if flag != b'\x00':

434 # Flag was non-empty string

436

437 # Length of column string

438 columns_str_len, = read_variables("=i")

439

440 # Read column string (e.g., "id type x y z vx vy vz fx fy fz")

441 _ = b''.join(read_variables("=" + str(columns_str_len) + "c"))

442

443 nchunk, = read_variables("=i")

444

445 # lammps cells/boxes can have different boundary conditions on each

446 # sides (makes mainly sense for different non-periodic conditions

447 # (e.g. [f]ixed and [s]hrink for a irradiation simulation))

448 # periodic case: b 0 = 'p'

449 # non-peridic cases 1: 'f', 2 : 's', 3: 'm'

450 pbc = np.sum(np.array(boundary).reshape((3, 2)), axis=1) == 0

451

452 cell, celldisp = construct_cell(diagdisp, offdiag)

453

454 data = []

455 for _ in range(nchunk):

456 # number-of-data-entries

457 n_data, = read_variables("=i")

458 # retrieve per atom data

459 data += read_variables("=" + str(n_data) + "d")

460 data = np.array(data).reshape((-1, size_one))

461

462 # map data-chunk to ase atoms

463 out_atoms = lammps_data_to_ase_atoms(

464 data=data,

465 colnames=colnames,

466 cell=cell,

467 celldisp=celldisp,

468 pbc=pbc,

469 **kwargs

470 )

471

472 images.append(out_atoms)

473

474 # stop if requested index has been found

475 if len(images) > index_end >= 0:

476 break

477

478 except EOFError:

479 break

480

481 return images[index]