Coverage for /builds/kinetik161/ase/ase/io/rmc6f.py: 95.85%

193 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-12-10 11:04 +0000

1import re 

2import time 

3 

4import numpy as np 

5 

6from ase.atoms import Atoms 

7from ase.cell import Cell 

8from ase.utils import reader, writer 

9 

10__all__ = ['read_rmc6f', 'write_rmc6f'] 

11 

12ncols2style = {9: 'no_labels', 

13 10: 'labels', 

14 11: 'magnetic'} 

15 

16 

17def _read_construct_regex(lines): 

18 """ 

19 Utility for constructing regular expressions used by reader. 

20 """ 

21 lines = [line.strip() for line in lines] 

22 lines_re = '|'.join(lines) 

23 lines_re = lines_re.replace(' ', r'\s+') 

24 lines_re = lines_re.replace('(', r'\(') 

25 lines_re = lines_re.replace(')', r'\)') 

26 return f'({lines_re})' 

27 

28 

29def _read_line_of_atoms_section(fields): 

30 """ 

31 Process `fields` line of Atoms section in rmc6f file and output extracted 

32 info as atom id (key) and list of properties for Atoms object (values). 

33 

34 Parameters 

35 ---------- 

36 fields: list[str] 

37 List of columns from line in rmc6f file. 

38 

39 

40 Returns 

41 ------ 

42 atom_id: int 

43 Atom ID 

44 properties: list[str|float] 

45 List of Atoms properties based on rmc6f style. 

46 Basically, have 1) element and fractional coordinates for 'labels' 

47 or 'no_labels' style and 2) same for 'magnetic' style except adds 

48 the spin. 

49 Examples for 1) 'labels' or 'no_labels' styles or 2) 'magnetic' style: 

50 1) [element, xf, yf, zf] 

51 2) [element, xf, yf, zf, spin] 

52 """ 

53 # Atom id 

54 atom_id = int(fields[0]) 

55 

56 # Get style used for rmc6f from file based on number of columns 

57 ncols = len(fields) 

58 style = ncols2style[ncols] 

59 

60 # Create the position dictionary 

61 properties = [] 

62 element = str(fields[1]) 

63 if style == 'no_labels': 

64 # id element xf yf zf ref_num ucell_x ucell_y ucell_z 

65 xf = float(fields[2]) 

66 yf = float(fields[3]) 

67 zf = float(fields[4]) 

68 properties = [element, xf, yf, zf] 

69 

70 elif style == 'labels': 

71 # id element label xf yf zf ref_num ucell_x ucell_y ucell_z 

72 xf = float(fields[3]) 

73 yf = float(fields[4]) 

74 zf = float(fields[5]) 

75 properties = [element, xf, yf, zf] 

76 

77 elif style == 'magnetic': 

78 # id element xf yf zf ref_num ucell_x ucell_y ucell_z M: spin 

79 xf = float(fields[2]) 

80 yf = float(fields[3]) 

81 zf = float(fields[4]) 

82 spin = float(fields[10].strip("M:")) 

83 properties = [element, xf, yf, zf, spin] 

84 else: 

85 raise Exception("Unsupported style for parsing rmc6f file format.") 

86 

87 return atom_id, properties 

88 

89 

90def _read_process_rmc6f_lines_to_pos_and_cell(lines): 

91 """ 

92 Processes the lines of rmc6f file to atom position dictionary and cell 

93 

94 Parameters 

95 ---------- 

96 lines: list[str] 

97 List of lines from rmc6f file. 

98 

99 Returns 

100 ------ 

101 pos : dict{int:list[str|float]} 

102 Dict for each atom id and Atoms properties based on rmc6f style. 

103 Basically, have 1) element and fractional coordinates for 'labels' 

104 or 'no_labels' style and 2) same for 'magnetic' style except adds 

105 the spin. 

106 Examples for 1) 'labels' or 'no_labels' styles or 2) 'magnetic' style: 

107 1) pos[aid] = [element, xf, yf, zf] 

108 2) pos[aid] = [element, xf, yf, zf, spin] 

109 cell: Cell object 

110 The ASE Cell object created from cell parameters read from the 'Cell' 

111 section of rmc6f file. 

112 """ 

113 

114 # Inititalize output pos dictionary 

115 pos = {} 

116 

117 # Defined the header an section lines we process 

118 header_lines = [ 

119 "Number of atoms:", 

120 "Supercell dimensions:", 

121 "Cell (Ang/deg):", 

122 "Lattice vectors (Ang):"] 

123 sections = ["Atoms"] 

124 

125 # Construct header and sections regex 

126 header_lines_re = _read_construct_regex(header_lines) 

127 sections_re = _read_construct_regex(sections) 

128 

129 section = None 

130 header = True 

131 

132 # Remove any lines that are blank 

133 lines = [line for line in lines if line != ''] 

134 

135 # Process each line of rmc6f file 

136 pos = {} 

137 for line in lines: 

138 

139 # check if in a section 

140 m = re.match(sections_re, line) 

141 if m is not None: 

142 section = m.group(0).strip() 

143 header = False 

144 continue 

145 

146 # header section 

147 if header: 

148 field = None 

149 val = None 

150 

151 # Regex that matches whitespace-separated floats 

152 float_list_re = r'\s+(\d[\d|\s\.]+[\d|\.])' 

153 m = re.search(header_lines_re + float_list_re, line) 

154 if m is not None: 

155 field = m.group(1) 

156 val = m.group(2) 

157 

158 if field is not None and val is not None: 

159 

160 if field == "Number of atoms:": 

161 """ 

162 NOTE: Can just capture via number of atoms ingested. 

163 Maybe use in future for a check. 

164 code: natoms = int(val) 

165 """ 

166 

167 if field.startswith('Supercell'): 

168 """ 

169 NOTE: wrapping back down to unit cell is not 

170 necessarily needed for ASE object. 

171 

172 code: supercell = [int(x) for x in val.split()] 

173 """ 

174 

175 if field.startswith('Cell'): 

176 cellpar = [float(x) for x in val.split()] 

177 cell = Cell.fromcellpar(cellpar) 

178 

179 if field.startswith('Lattice'): 

180 """ 

181 NOTE: Have questions about RMC fractionalization matrix for 

182 conversion in data2config vs. the ASE matrix. 

183 Currently, just support the Cell section. 

184 """ 

185 

186 # main body section 

187 if section is not None: 

188 if section == 'Atoms': 

189 atom_id, atom_props = _read_line_of_atoms_section(line.split()) 

190 pos[atom_id] = atom_props 

191 

192 return pos, cell 

193 

194 

195def _write_output_column_format(columns, arrays): 

196 """ 

197 Helper function to build output for data columns in rmc6f format 

198 

199 Parameters 

200 ---------- 

201 columns: list[str] 

202 List of keys in arrays. Will be columns in the output file. 

203 arrays: dict{str:np.array} 

204 Dict with arrays for each column of rmc6f file that are 

205 property of Atoms object. 

206 

207 Returns 

208 ------ 

209 property_ncols : list[int] 

210 Number of columns for each property. 

211 dtype_obj: np.dtype 

212 Data type object for the columns. 

213 formats_as_str: str 

214 The format for printing the columns. 

215 

216 """ 

217 fmt_map = {'d': ('R', '%14.6f '), 

218 'f': ('R', '%14.6f '), 

219 'i': ('I', '%8d '), 

220 'O': ('S', '%s'), 

221 'S': ('S', '%s'), 

222 'U': ('S', '%s'), 

223 'b': ('L', ' %.1s ')} 

224 

225 property_types = [] 

226 property_ncols = [] 

227 dtypes = [] 

228 formats = [] 

229 

230 # Take each column and setup formatting vectors 

231 for column in columns: 

232 array = arrays[column] 

233 dtype = array.dtype 

234 

235 property_type, fmt = fmt_map[dtype.kind] 

236 property_types.append(property_type) 

237 

238 # Flags for 1d vectors 

239 is_1d = len(array.shape) == 1 

240 is_1d_as_2d = len(array.shape) == 2 and array.shape[1] == 1 

241 

242 # Construct the list of key pairs of column with dtype 

243 if (is_1d or is_1d_as_2d): 

244 ncol = 1 

245 dtypes.append((column, dtype)) 

246 else: 

247 ncol = array.shape[1] 

248 for c in range(ncol): 

249 dtypes.append((column + str(c), dtype)) 

250 

251 # Add format and number of columns for this property to output array 

252 formats.extend([fmt] * ncol) 

253 property_ncols.append(ncol) 

254 

255 # Prepare outputs to correct data structure 

256 dtype_obj = np.dtype(dtypes) 

257 formats_as_str = ''.join(formats) + '\n' 

258 

259 return property_ncols, dtype_obj, formats_as_str 

260 

261 

262def _write_output(filename, header_lines, data, fmt, order=None): 

263 """ 

264 Helper function to write information to the filename 

265 

266 Parameters 

267 ---------- 

268 filename : file|str 

269 A file like object or filename 

270 header_lines : list[str] 

271 Header section of output rmc6f file 

272 data: np.array[len(atoms)] 

273 Array for the Atoms section to write to file. Has 

274 the columns that need to be written on each row 

275 fmt: str 

276 The format string to use for writing each column in 

277 the rows of data. 

278 order : list[str] 

279 If not None, gives a list of atom types for the order 

280 to write out each. 

281 """ 

282 fd = filename 

283 

284 # Write header section 

285 for line in header_lines: 

286 fd.write(f"{line} \n") 

287 

288 # If specifying the order, fix the atom id and write to file 

289 natoms = data.shape[0] 

290 if order is not None: 

291 new_id = 0 

292 for atype in order: 

293 for i in range(natoms): 

294 if atype == data[i][1]: 

295 new_id += 1 

296 data[i][0] = new_id 

297 fd.write(fmt % tuple(data[i])) 

298 # ...just write rows to file 

299 else: 

300 for i in range(natoms): 

301 fd.write(fmt % tuple(data[i])) 

302 

303 

304@reader 

305def read_rmc6f(filename, atom_type_map=None): 

306 """ 

307 Parse a RMCProfile rmc6f file into ASE Atoms object 

308 

309 Parameters 

310 ---------- 

311 filename : file|str 

312 A file like object or filename. 

313 atom_type_map: dict{str:str} 

314 Map of atom types for conversions. Mainly used if there is 

315 an atom type in the file that is not supported by ASE but 

316 want to map to a supported atom type instead. 

317 

318 Example to map deuterium to hydrogen: 

319 atom_type_map = { 'D': 'H' } 

320 

321 Returns 

322 ------ 

323 structure : Atoms 

324 The Atoms object read in from the rmc6f file. 

325 """ 

326 

327 fd = filename 

328 lines = fd.readlines() 

329 

330 # Process the rmc6f file to extract positions and cell 

331 pos, cell = _read_process_rmc6f_lines_to_pos_and_cell(lines) 

332 

333 # create an atom type map if one does not exist from unique atomic symbols 

334 if atom_type_map is None: 

335 symbols = [atom[0] for atom in pos.values()] 

336 atom_type_map = {atype: atype for atype in symbols} 

337 

338 # Use map of tmp symbol to actual symbol 

339 for atom in pos.values(): 

340 atom[0] = atom_type_map[atom[0]] 

341 

342 # create Atoms from read-in data 

343 symbols = [] 

344 scaled_positions = [] 

345 spin = None 

346 magmoms = [] 

347 for atom in pos.values(): 

348 if len(atom) == 4: 

349 element, x, y, z = atom 

350 else: 

351 element, x, y, z, spin = atom 

352 

353 element = atom_type_map[element] 

354 symbols.append(element) 

355 scaled_positions.append([x, y, z]) 

356 if spin is not None: 

357 magmoms.append(spin) 

358 

359 atoms = Atoms(scaled_positions=scaled_positions, 

360 symbols=symbols, 

361 cell=cell, 

362 magmoms=magmoms, 

363 pbc=[True, True, True]) 

364 

365 return atoms 

366 

367 

368@writer 

369def write_rmc6f(filename, atoms, order=None, atom_type_map=None): 

370 """ 

371 Write output in rmc6f format - RMCProfile v6 fractional coordinates 

372 

373 Parameters 

374 ---------- 

375 filename : file|str 

376 A file like object or filename. 

377 atoms: Atoms object 

378 The Atoms object to be written. 

379 

380 order : list[str] 

381 If not None, gives a list of atom types for the order 

382 to write out each. 

383 atom_type_map: dict{str:str} 

384 Map of atom types for conversions. Mainly used if there is 

385 an atom type in the Atoms object that is a placeholder 

386 for a different atom type. This is used when the atom type 

387 is not supported by ASE but is in RMCProfile. 

388 

389 Example to map hydrogen to deuterium: 

390 atom_type_map = { 'H': 'D' } 

391 """ 

392 

393 # get atom types and how many of each (and set to order if passed) 

394 atom_types = set(atoms.symbols) 

395 if order is not None: 

396 if set(atom_types) != set(order): 

397 raise Exception("The order is not a set of the atom types.") 

398 atom_types = order 

399 

400 atom_count_dict = atoms.symbols.formula.count() 

401 natom_types = [str(atom_count_dict[atom_type]) for atom_type in atom_types] 

402 

403 # create an atom type map if one does not exist from unique atomic symbols 

404 if atom_type_map is None: 

405 symbols = set(np.array(atoms.symbols)) 

406 atom_type_map = {atype: atype for atype in symbols} 

407 

408 # HEADER SECTION 

409 

410 # get type and number of each atom type 

411 atom_types_list = [atom_type_map[atype] for atype in atom_types] 

412 atom_types_present = ' '.join(atom_types_list) 

413 natom_types_present = ' '.join(natom_types) 

414 

415 header_lines = [ 

416 "(Version 6f format configuration file)", 

417 "(Generated by ASE - Atomic Simulation Environment https://wiki.fysik.dtu.dk/ase/ )", # noqa: E501 

418 "Metadata date: " + time.strftime('%d-%m-%Y'), 

419 f"Number of types of atoms: {len(atom_types)} ", 

420 f"Atom types present: {atom_types_present}", 

421 f"Number of each atom type: {natom_types_present}", 

422 "Number of moves generated: 0", 

423 "Number of moves tried: 0", 

424 "Number of moves accepted: 0", 

425 "Number of prior configuration saves: 0", 

426 f"Number of atoms: {len(atoms)}", 

427 "Supercell dimensions: 1 1 1"] 

428 

429 # magnetic moments 

430 if atoms.has('magmoms'): 

431 spin_str = "Number of spins: {}" 

432 spin_line = spin_str.format(len(atoms.get_initial_magnetic_moments())) 

433 header_lines.extend([spin_line]) 

434 

435 density_str = "Number density (Ang^-3): {}" 

436 density_line = density_str.format(len(atoms) / atoms.get_volume()) 

437 cell_angles = [str(x) for x in atoms.cell.cellpar()] 

438 cell_line = "Cell (Ang/deg): " + ' '.join(cell_angles) 

439 header_lines.extend([density_line, cell_line]) 

440 

441 # get lattice vectors from cell lengths and angles 

442 # NOTE: RMCProfile uses a different convention for the fractionalization 

443 # matrix 

444 

445 cell_parameters = atoms.cell.cellpar() 

446 cell = Cell.fromcellpar(cell_parameters).T 

447 x_line = ' '.join([f'{i:12.6f}' for i in cell[0]]) 

448 y_line = ' '.join([f'{i:12.6f}' for i in cell[1]]) 

449 z_line = ' '.join([f'{i:12.6f}' for i in cell[2]]) 

450 lat_lines = ["Lattice vectors (Ang):", x_line, y_line, z_line] 

451 header_lines.extend(lat_lines) 

452 header_lines.extend(['Atoms:']) 

453 

454 # ATOMS SECTION 

455 

456 # create columns of data for atoms (fr_cols) 

457 fr_cols = ['id', 'symbols', 'scaled_positions', 'ref_num', 'ref_cell'] 

458 if atoms.has('magmoms'): 

459 fr_cols.extend('magmom') 

460 

461 # Collect data to be written out 

462 natoms = len(atoms) 

463 

464 arrays = {} 

465 arrays['id'] = np.array(range(1, natoms + 1, 1), int) 

466 arrays['symbols'] = np.array(atoms.symbols) 

467 arrays['ref_num'] = np.zeros(natoms, int) 

468 arrays['ref_cell'] = np.zeros((natoms, 3), int) 

469 arrays['scaled_positions'] = np.array(atoms.get_scaled_positions()) 

470 

471 # get formatting for writing output based on atom arrays 

472 ncols, dtype_obj, fmt = _write_output_column_format(fr_cols, arrays) 

473 

474 # Pack fr_cols into record array 

475 data = np.zeros(natoms, dtype_obj) 

476 for column, ncol in zip(fr_cols, ncols): 

477 value = arrays[column] 

478 if ncol == 1: 

479 data[column] = np.squeeze(value) 

480 else: 

481 for c in range(ncol): 

482 data[column + str(c)] = value[:, c] 

483 

484 # Use map of tmp symbol to actual symbol 

485 for i in range(natoms): 

486 data[i][1] = atom_type_map[data[i][1]] 

487 

488 # Write the output 

489 _write_output(filename, header_lines, data, fmt, order=order)