Coverage for /builds/kinetik161/ase/ase/io/lammpsrun.py: 88.10%

210 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-12-10 11:04 +0000

1import gzip 

2import struct 

3from collections import deque 

4from os.path import splitext 

5 

6import numpy as np 

7 

8from ase.atoms import Atoms 

9from ase.calculators.lammps import convert 

10from ase.calculators.singlepoint import SinglePointCalculator 

11from ase.parallel import paropen 

12from ase.quaternions import Quaternions 

13 

14 

15def read_lammps_dump(infileobj, **kwargs): 

16 """Method which reads a LAMMPS dump file. 

17 

18 LAMMPS chooses output method depending on the given suffix: 

19 - .bin : binary file 

20 - .gz : output piped through gzip 

21 - .mpiio: using mpiio (should be like cleartext, 

22 with different ordering) 

23 - else : normal clear-text format 

24 

25 :param infileobj: string to file, opened file or file-like stream 

26 

27 """ 

28 # !TODO: add support for lammps-regex naming schemes (output per 

29 # processor and timestep wildcards) 

30 

31 opened = False 

32 if isinstance(infileobj, str): 

33 opened = True 

34 suffix = splitext(infileobj)[-1] 

35 if suffix == ".bin": 

36 fileobj = paropen(infileobj, "rb") 

37 elif suffix == ".gz": 

38 # !TODO: save for parallel execution? 

39 fileobj = gzip.open(infileobj, "rb") 

40 else: 

41 fileobj = paropen(infileobj) 

42 else: 

43 suffix = splitext(infileobj.name)[-1] 

44 fileobj = infileobj 

45 

46 if suffix == ".bin": 

47 out = read_lammps_dump_binary(fileobj, **kwargs) 

48 if opened: 

49 fileobj.close() 

50 return out 

51 

52 out = read_lammps_dump_text(fileobj, **kwargs) 

53 

54 if opened: 

55 fileobj.close() 

56 

57 return out 

58 

59 

60def lammps_data_to_ase_atoms( 

61 data, 

62 colnames, 

63 cell, 

64 celldisp, 

65 pbc=False, 

66 atomsobj=Atoms, 

67 order=True, 

68 specorder=None, 

69 prismobj=None, 

70 units="metal", 

71): 

72 """Extract positions and other per-atom parameters and create Atoms 

73 

74 :param data: per atom data 

75 :param colnames: index for data 

76 :param cell: cell dimensions 

77 :param celldisp: origin shift 

78 :param pbc: periodic boundaries 

79 :param atomsobj: function to create ase-Atoms object 

80 :param order: sort atoms by id. Might be faster to turn off. 

81 Disregarded in case `id` column is not given in file. 

82 :param specorder: list of species to map lammps types to ase-species 

83 (usually .dump files to not contain type to species mapping) 

84 :param prismobj: Coordinate transformation between lammps and ase 

85 :type prismobj: Prism 

86 :param units: lammps units for unit transformation between lammps and ase 

87 :returns: Atoms object 

88 :rtype: Atoms 

89 

90 """ 

91 if len(data.shape) == 1: 

92 data = data[np.newaxis, :] 

93 

94 # read IDs if given and order if needed 

95 if "id" in colnames: 

96 ids = data[:, colnames.index("id")].astype(int) 

97 if order: 

98 sort_order = np.argsort(ids) 

99 data = data[sort_order, :] 

100 

101 # determine the elements 

102 if "element" in colnames: 

103 # priority to elements written in file 

104 elements = data[:, colnames.index("element")] 

105 elif "type" in colnames: 

106 # fall back to `types` otherwise 

107 elements = data[:, colnames.index("type")].astype(int) 

108 

109 # reconstruct types from given specorder 

110 if specorder: 

111 elements = [specorder[t - 1] for t in elements] 

112 else: 

113 # todo: what if specorder give but no types? 

114 # in principle the masses could work for atoms, but that needs 

115 # lots of cases and new code I guess 

116 raise ValueError("Cannot determine atom types form LAMMPS dump file") 

117 

118 def get_quantity(labels, quantity=None): 

119 try: 

120 cols = [colnames.index(label) for label in labels] 

121 if quantity: 

122 return convert(data[:, cols].astype(float), quantity, 

123 units, "ASE") 

124 

125 return data[:, cols].astype(float) 

126 except ValueError: 

127 return None 

128 

129 # Positions 

130 positions = None 

131 scaled_positions = None 

132 if "x" in colnames: 

133 # doc: x, y, z = unscaled atom coordinates 

134 positions = get_quantity(["x", "y", "z"], "distance") 

135 elif "xs" in colnames: 

136 # doc: xs,ys,zs = scaled atom coordinates 

137 scaled_positions = get_quantity(["xs", "ys", "zs"]) 

138 elif "xu" in colnames: 

139 # doc: xu,yu,zu = unwrapped atom coordinates 

140 positions = get_quantity(["xu", "yu", "zu"], "distance") 

141 elif "xsu" in colnames: 

142 # xsu,ysu,zsu = scaled unwrapped atom coordinates 

143 scaled_positions = get_quantity(["xsu", "ysu", "zsu"]) 

144 else: 

145 raise ValueError("No atomic positions found in LAMMPS output") 

146 

147 velocities = get_quantity(["vx", "vy", "vz"], "velocity") 

148 charges = get_quantity(["q"], "charge") 

149 forces = get_quantity(["fx", "fy", "fz"], "force") 

150 # !TODO: how need quaternions be converted? 

151 quaternions = get_quantity(["c_q[1]", "c_q[2]", "c_q[3]", "c_q[4]"]) 

152 

153 # convert cell 

154 cell = convert(cell, "distance", units, "ASE") 

155 celldisp = convert(celldisp, "distance", units, "ASE") 

156 if prismobj: 

157 celldisp = prismobj.vector_to_ase(celldisp) 

158 cell = prismobj.update_cell(cell) 

159 

160 if quaternions is not None: 

161 out_atoms = Quaternions( 

162 symbols=elements, 

163 positions=positions, 

164 cell=cell, 

165 celldisp=celldisp, 

166 pbc=pbc, 

167 quaternions=quaternions, 

168 ) 

169 elif positions is not None: 

170 # reverse coordinations transform to lammps system 

171 # (for all vectors = pos, vel, force) 

172 if prismobj: 

173 positions = prismobj.vector_to_ase(positions, wrap=True) 

174 

175 out_atoms = atomsobj( 

176 symbols=elements, 

177 positions=positions, 

178 pbc=pbc, 

179 celldisp=celldisp, 

180 cell=cell 

181 ) 

182 elif scaled_positions is not None: 

183 out_atoms = atomsobj( 

184 symbols=elements, 

185 scaled_positions=scaled_positions, 

186 pbc=pbc, 

187 celldisp=celldisp, 

188 cell=cell, 

189 ) 

190 

191 if velocities is not None: 

192 if prismobj: 

193 velocities = prismobj.vector_to_ase(velocities) 

194 out_atoms.set_velocities(velocities) 

195 if charges is not None: 

196 out_atoms.set_initial_charges([charge[0] for charge in charges]) 

197 if forces is not None: 

198 if prismobj: 

199 forces = prismobj.vector_to_ase(forces) 

200 # !TODO: use another calculator if available (or move forces 

201 # to atoms.property) (other problem: synchronizing 

202 # parallel runs) 

203 calculator = SinglePointCalculator(out_atoms, energy=0.0, 

204 forces=forces) 

205 out_atoms.calc = calculator 

206 

207 # process the extra columns of fixes, variables and computes 

208 # that can be dumped, add as additional arrays to atoms object 

209 for colname in colnames: 

210 # determine if it is a compute or fix (but not the quaternian) 

211 if (colname.startswith('f_') or colname.startswith('v_') or 

212 (colname.startswith('c_') and not colname.startswith('c_q['))): 

213 out_atoms.new_array(colname, get_quantity([colname]), 

214 dtype='float') 

215 

216 return out_atoms 

217 

218 

219def construct_cell(diagdisp, offdiag): 

220 """Help function to create an ASE-cell with displacement vector from 

221 the lammps coordination system parameters. 

222 

223 :param diagdisp: cell dimension convoluted with the displacement vector 

224 :param offdiag: off-diagonal cell elements 

225 :returns: cell and cell displacement vector 

226 :rtype: tuple 

227 """ 

228 xlo, xhi, ylo, yhi, zlo, zhi = diagdisp 

229 xy, xz, yz = offdiag 

230 

231 # create ase-cell from lammps-box 

232 xhilo = (xhi - xlo) - abs(xy) - abs(xz) 

233 yhilo = (yhi - ylo) - abs(yz) 

234 zhilo = zhi - zlo 

235 celldispx = xlo - min(0, xy) - min(0, xz) 

236 celldispy = ylo - min(0, yz) 

237 celldispz = zlo 

238 cell = np.array([[xhilo, 0, 0], [xy, yhilo, 0], [xz, yz, zhilo]]) 

239 celldisp = np.array([celldispx, celldispy, celldispz]) 

240 

241 return cell, celldisp 

242 

243 

244def get_max_index(index): 

245 if np.isscalar(index): 

246 return index 

247 elif isinstance(index, slice): 

248 return index.stop if (index.stop is not None) else float("inf") 

249 

250 

251def read_lammps_dump_text(fileobj, index=-1, **kwargs): 

252 """Process cleartext lammps dumpfiles 

253 

254 :param fileobj: filestream providing the trajectory data 

255 :param index: integer or slice object (default: get the last timestep) 

256 :returns: list of Atoms objects 

257 :rtype: list 

258 """ 

259 # Load all dumped timesteps into memory simultaneously 

260 lines = deque(fileobj.readlines()) 

261 index_end = get_max_index(index) 

262 

263 n_atoms = 0 

264 images = [] 

265 

266 # avoid references before assignment in case of incorrect file structure 

267 cell, celldisp, pbc = None, None, False 

268 

269 while len(lines) > n_atoms: 

270 line = lines.popleft() 

271 

272 if "ITEM: TIMESTEP" in line: 

273 n_atoms = 0 

274 line = lines.popleft() 

275 # !TODO: pyflakes complains about this line -> do something 

276 # ntimestep = int(line.split()[0]) # NOQA 

277 

278 if "ITEM: NUMBER OF ATOMS" in line: 

279 line = lines.popleft() 

280 n_atoms = int(line.split()[0]) 

281 

282 if "ITEM: BOX BOUNDS" in line: 

283 # save labels behind "ITEM: BOX BOUNDS" in triclinic case 

284 # (>=lammps-7Jul09) 

285 tilt_items = line.split()[3:] 

286 celldatarows = [lines.popleft() for _ in range(3)] 

287 celldata = np.loadtxt(celldatarows) 

288 diagdisp = celldata[:, :2].reshape(6, 1).flatten() 

289 

290 # determine cell tilt (triclinic case!) 

291 if len(celldata[0]) > 2: 

292 # for >=lammps-7Jul09 use labels behind "ITEM: BOX BOUNDS" 

293 # to assign tilt (vector) elements ... 

294 offdiag = celldata[:, 2] 

295 # ... otherwise assume default order in 3rd column 

296 # (if the latter was present) 

297 if len(tilt_items) >= 3: 

298 sort_index = [tilt_items.index(i) 

299 for i in ["xy", "xz", "yz"]] 

300 offdiag = offdiag[sort_index] 

301 else: 

302 offdiag = (0.0,) * 3 

303 

304 cell, celldisp = construct_cell(diagdisp, offdiag) 

305 

306 # Handle pbc conditions 

307 if len(tilt_items) == 3: 

308 pbc_items = tilt_items 

309 elif len(tilt_items) > 3: 

310 pbc_items = tilt_items[3:6] 

311 else: 

312 pbc_items = ["f", "f", "f"] 

313 pbc = ["p" in d.lower() for d in pbc_items] 

314 

315 if "ITEM: ATOMS" in line: 

316 colnames = line.split()[2:] 

317 datarows = [lines.popleft() for _ in range(n_atoms)] 

318 data = np.loadtxt(datarows, dtype=str) 

319 out_atoms = lammps_data_to_ase_atoms( 

320 data=data, 

321 colnames=colnames, 

322 cell=cell, 

323 celldisp=celldisp, 

324 atomsobj=Atoms, 

325 pbc=pbc, 

326 **kwargs 

327 ) 

328 images.append(out_atoms) 

329 

330 if len(images) > index_end >= 0: 

331 break 

332 

333 return images[index] 

334 

335 

336def read_lammps_dump_binary( 

337 fileobj, index=-1, colnames=None, intformat="SMALLBIG", **kwargs 

338): 

339 """Read binary dump-files (after binary2txt.cpp from lammps/tools) 

340 

341 :param fileobj: file-stream containing the binary lammps data 

342 :param index: integer or slice object (default: get the last timestep) 

343 :param colnames: data is columns and identified by a header 

344 :param intformat: lammps support different integer size. Parameter set \ 

345 at compile-time and can unfortunately not derived from data file 

346 :returns: list of Atoms-objects 

347 :rtype: list 

348 """ 

349 # depending on the chosen compilation flag lammps uses either normal 

350 # integers or long long for its id or timestep numbering 

351 # !TODO: tags are cast to double -> missing/double ids (add check?) 

352 tagformat, bigformat = dict( 

353 SMALLSMALL=("i", "i"), SMALLBIG=("i", "q"), BIGBIG=("q", "q") 

354 )[intformat] 

355 

356 index_end = get_max_index(index) 

357 

358 # Standard columns layout from lammpsrun 

359 if not colnames: 

360 colnames = ["id", "type", "x", "y", "z", 

361 "vx", "vy", "vz", "fx", "fy", "fz"] 

362 

363 images = [] 

364 

365 # wrap struct.unpack to raise EOFError 

366 def read_variables(string): 

367 obj_len = struct.calcsize(string) 

368 data_obj = fileobj.read(obj_len) 

369 if obj_len != len(data_obj): 

370 raise EOFError 

371 return struct.unpack(string, data_obj) 

372 

373 while True: 

374 try: 

375 # Assume that the binary dump file is in the old (pre-29Oct2020) 

376 # format 

377 magic_string = None 

378 

379 # read header 

380 ntimestep, = read_variables("=" + bigformat) 

381 

382 # In the new LAMMPS binary dump format (version 29Oct2020 and 

383 # onward), a negative timestep is used to indicate that the next 

384 # few bytes will contain certain metadata 

385 if ntimestep < 0: 

386 # First bigint was actually encoding the negative of the format 

387 # name string length (we call this 'magic_string' to 

388 magic_string_len = -ntimestep 

389 

390 # The next `magic_string_len` bytes will hold a string 

391 # indicating the format of the dump file 

392 magic_string = b''.join(read_variables( 

393 "=" + str(magic_string_len) + "c")) 

394 

395 # Read endianness (integer). For now, we'll disregard the value 

396 # and simply use the host machine's endianness (via '=' 

397 # character used with struct.calcsize). 

398 # 

399 # TODO: Use the endianness of the dump file in subsequent 

400 # read_variables rather than just assuming it will match 

401 # that of the host 

402 endian, = read_variables("=i") 

403 

404 # Read revision number (integer) 

405 revision, = read_variables("=i") 

406 

407 # Finally, read the actual timestep (bigint) 

408 ntimestep, = read_variables("=" + bigformat) 

409 

410 n_atoms, triclinic = read_variables("=" + bigformat + "i") 

411 boundary = read_variables("=6i") 

412 diagdisp = read_variables("=6d") 

413 if triclinic != 0: 

414 offdiag = read_variables("=3d") 

415 else: 

416 offdiag = (0.0,) * 3 

417 size_one, = read_variables("=i") 

418 

419 if len(colnames) != size_one: 

420 raise ValueError("Provided columns do not match binary file") 

421 

422 if magic_string and revision > 1: 

423 # New binary dump format includes units string, 

424 # columns string, and time 

425 units_str_len, = read_variables("=i") 

426 

427 if units_str_len > 0: 

428 # Read lammps units style 

429 _ = b''.join( 

430 read_variables("=" + str(units_str_len) + "c")) 

431 

432 flag, = read_variables("=c") 

433 if flag != b'\x00': 

434 # Flag was non-empty string 

435 time, = read_variables("=d") 

436 

437 # Length of column string 

438 columns_str_len, = read_variables("=i") 

439 

440 # Read column string (e.g., "id type x y z vx vy vz fx fy fz") 

441 _ = b''.join(read_variables("=" + str(columns_str_len) + "c")) 

442 

443 nchunk, = read_variables("=i") 

444 

445 # lammps cells/boxes can have different boundary conditions on each 

446 # sides (makes mainly sense for different non-periodic conditions 

447 # (e.g. [f]ixed and [s]hrink for a irradiation simulation)) 

448 # periodic case: b 0 = 'p' 

449 # non-peridic cases 1: 'f', 2 : 's', 3: 'm' 

450 pbc = np.sum(np.array(boundary).reshape((3, 2)), axis=1) == 0 

451 

452 cell, celldisp = construct_cell(diagdisp, offdiag) 

453 

454 data = [] 

455 for _ in range(nchunk): 

456 # number-of-data-entries 

457 n_data, = read_variables("=i") 

458 # retrieve per atom data 

459 data += read_variables("=" + str(n_data) + "d") 

460 data = np.array(data).reshape((-1, size_one)) 

461 

462 # map data-chunk to ase atoms 

463 out_atoms = lammps_data_to_ase_atoms( 

464 data=data, 

465 colnames=colnames, 

466 cell=cell, 

467 celldisp=celldisp, 

468 pbc=pbc, 

469 **kwargs 

470 ) 

471 

472 images.append(out_atoms) 

473 

474 # stop if requested index has been found 

475 if len(images) > index_end >= 0: 

476 break 

477 

478 except EOFError: 

479 break 

480 

481 return images[index]