Coverage for /builds/kinetik161/ase/ase/io/rmc6f.py: 95.85%

1import re

2import time

4import numpy as np

6from ase.atoms import Atoms

7from ase.cell import Cell

8from ase.utils import reader, writer

10__all__ = ['read_rmc6f', 'write_rmc6f']

12ncols2style = {9: 'no_labels',

13 10: 'labels',

14 11: 'magnetic'}

17def _read_construct_regex(lines):

18 """

19 Utility for constructing regular expressions used by reader.

20 """

21 lines = [line.strip() for line in lines]

22 lines_re = '|'.join(lines)

23 lines_re = lines_re.replace(' ', r'\s+')

24 lines_re = lines_re.replace('(', r'\(')

25 lines_re = lines_re.replace(')', r'\)')

26 return f'({lines_re})'

29def _read_line_of_atoms_section(fields):

30 """

31 Process `fields` line of Atoms section in rmc6f file and output extracted

32 info as atom id (key) and list of properties for Atoms object (values).

34 Parameters

35 ----------

36 fields: list[str]

37 List of columns from line in rmc6f file.

40 Returns

41 ------

42 atom_id: int

43 Atom ID

44 properties: list[str|float]

45 List of Atoms properties based on rmc6f style.

46 Basically, have 1) element and fractional coordinates for 'labels'

47 or 'no_labels' style and 2) same for 'magnetic' style except adds

48 the spin.

49 Examples for 1) 'labels' or 'no_labels' styles or 2) 'magnetic' style:

50 1) [element, xf, yf, zf]

51 2) [element, xf, yf, zf, spin]

52 """

53 # Atom id

54 atom_id = int(fields[0])

56 # Get style used for rmc6f from file based on number of columns

57 ncols = len(fields)

58 style = ncols2style[ncols]

60 # Create the position dictionary

61 properties = []

62 element = str(fields[1])

63 if style == 'no_labels':

64 # id element xf yf zf ref_num ucell_x ucell_y ucell_z

65 xf = float(fields[2])

66 yf = float(fields[3])

67 zf = float(fields[4])

68 properties = [element, xf, yf, zf]

70 elif style == 'labels':

71 # id element label xf yf zf ref_num ucell_x ucell_y ucell_z

72 xf = float(fields[3])

73 yf = float(fields[4])

74 zf = float(fields[5])

75 properties = [element, xf, yf, zf]

77 elif style == 'magnetic':

78 # id element xf yf zf ref_num ucell_x ucell_y ucell_z M: spin

79 xf = float(fields[2])

80 yf = float(fields[3])

81 zf = float(fields[4])

82 spin = float(fields[10].strip("M:"))

83 properties = [element, xf, yf, zf, spin]

84 else:

85 raise Exception("Unsupported style for parsing rmc6f file format.")

87 return atom_id, properties

90def _read_process_rmc6f_lines_to_pos_and_cell(lines):

91 """

92 Processes the lines of rmc6f file to atom position dictionary and cell

94 Parameters

95 ----------

96 lines: list[str]

97 List of lines from rmc6f file.

99 Returns

100 ------

101 pos : dict{int:list[str|float]}

102 Dict for each atom id and Atoms properties based on rmc6f style.

103 Basically, have 1) element and fractional coordinates for 'labels'

104 or 'no_labels' style and 2) same for 'magnetic' style except adds

105 the spin.

106 Examples for 1) 'labels' or 'no_labels' styles or 2) 'magnetic' style:

107 1) pos[aid] = [element, xf, yf, zf]

108 2) pos[aid] = [element, xf, yf, zf, spin]

109 cell: Cell object

110 The ASE Cell object created from cell parameters read from the 'Cell'

111 section of rmc6f file.

112 """

113

114 # Inititalize output pos dictionary

115 pos = {}

116

117 # Defined the header an section lines we process

118 header_lines = [

119 "Number of atoms:",

120 "Supercell dimensions:",

121 "Cell (Ang/deg):",

122 "Lattice vectors (Ang):"]

123 sections = ["Atoms"]

124

125 # Construct header and sections regex

126 header_lines_re = _read_construct_regex(header_lines)

127 sections_re = _read_construct_regex(sections)

128

129 section = None

130 header = True

131

132 # Remove any lines that are blank

133 lines = [line for line in lines if line != '']

134

135 # Process each line of rmc6f file

136 pos = {}

137 for line in lines:

138

139 # check if in a section

140 m = re.match(sections_re, line)

141 if m is not None:

142 section = m.group(0).strip()

143 header = False

144 continue

145

146 # header section

147 if header:

148 field = None

149 val = None

150

151 # Regex that matches whitespace-separated floats

152 float_list_re = r'\s+(\d[\d|\s\.]+[\d|\.])'

153 m = re.search(header_lines_re + float_list_re, line)

154 if m is not None:

155 field = m.group(1)

156 val = m.group(2)

157

158 if field is not None and val is not None:

159

160 if field == "Number of atoms:":

161 """

162 NOTE: Can just capture via number of atoms ingested.

163 Maybe use in future for a check.

164 code: natoms = int(val)

165 """

166

167 if field.startswith('Supercell'):

168 """

169 NOTE: wrapping back down to unit cell is not

170 necessarily needed for ASE object.

171

172 code: supercell = [int(x) for x in val.split()]

173 """

174

175 if field.startswith('Cell'):

176 cellpar = [float(x) for x in val.split()]

177 cell = Cell.fromcellpar(cellpar)

178

179 if field.startswith('Lattice'):

180 """

181 NOTE: Have questions about RMC fractionalization matrix for

182 conversion in data2config vs. the ASE matrix.

183 Currently, just support the Cell section.

184 """

185

186 # main body section

187 if section is not None:

188 if section == 'Atoms':

189 atom_id, atom_props = _read_line_of_atoms_section(line.split())

190 pos[atom_id] = atom_props

191

192 return pos, cell

193

194

195def _write_output_column_format(columns, arrays):

196 """

197 Helper function to build output for data columns in rmc6f format

198

199 Parameters

200 ----------

201 columns: list[str]

202 List of keys in arrays. Will be columns in the output file.

203 arrays: dict{str:np.array}

204 Dict with arrays for each column of rmc6f file that are

205 property of Atoms object.

206

207 Returns

208 ------

209 property_ncols : list[int]

210 Number of columns for each property.

211 dtype_obj: np.dtype

212 Data type object for the columns.

213 formats_as_str: str

214 The format for printing the columns.

215

216 """

217 fmt_map = {'d': ('R', '%14.6f '),

218 'f': ('R', '%14.6f '),

219 'i': ('I', '%8d '),

220 'O': ('S', '%s'),

221 'S': ('S', '%s'),

222 'U': ('S', '%s'),

223 'b': ('L', ' %.1s ')}

224

225 property_types = []

226 property_ncols = []

227 dtypes = []

228 formats = []

229

230 # Take each column and setup formatting vectors

231 for column in columns:

232 array = arrays[column]

233 dtype = array.dtype

234

235 property_type, fmt = fmt_map[dtype.kind]

236 property_types.append(property_type)

237

238 # Flags for 1d vectors

239 is_1d = len(array.shape) == 1

240 is_1d_as_2d = len(array.shape) == 2 and array.shape[1] == 1

241

242 # Construct the list of key pairs of column with dtype

243 if (is_1d or is_1d_as_2d):

244 ncol = 1

245 dtypes.append((column, dtype))

246 else:

247 ncol = array.shape[1]

248 for c in range(ncol):

249 dtypes.append((column + str(c), dtype))

250

251 # Add format and number of columns for this property to output array

252 formats.extend([fmt] * ncol)

253 property_ncols.append(ncol)

254

255 # Prepare outputs to correct data structure

256 dtype_obj = np.dtype(dtypes)

257 formats_as_str = ''.join(formats) + '\n'

258

259 return property_ncols, dtype_obj, formats_as_str

260

261

262def _write_output(filename, header_lines, data, fmt, order=None):

263 """

264 Helper function to write information to the filename

265

266 Parameters

267 ----------

268 filename : file|str

269 A file like object or filename

270 header_lines : list[str]

271 Header section of output rmc6f file

272 data: np.array[len(atoms)]

273 Array for the Atoms section to write to file. Has

274 the columns that need to be written on each row

275 fmt: str

276 The format string to use for writing each column in

277 the rows of data.

278 order : list[str]

279 If not None, gives a list of atom types for the order

280 to write out each.

281 """

282 fd = filename

283

284 # Write header section

285 for line in header_lines:

286 fd.write(f"{line} \n")

287

288 # If specifying the order, fix the atom id and write to file

289 natoms = data.shape[0]

290 if order is not None:

291 new_id = 0

292 for atype in order:

293 for i in range(natoms):

294 if atype == data[i][1]:

295 new_id += 1

296 data[i][0] = new_id

297 fd.write(fmt % tuple(data[i]))

298 # ...just write rows to file

299 else:

300 for i in range(natoms):

301 fd.write(fmt % tuple(data[i]))

302

303

304@reader

305def read_rmc6f(filename, atom_type_map=None):

306 """

307 Parse a RMCProfile rmc6f file into ASE Atoms object

308

309 Parameters

310 ----------

311 filename : file|str

312 A file like object or filename.

313 atom_type_map: dict{str:str}

314 Map of atom types for conversions. Mainly used if there is

315 an atom type in the file that is not supported by ASE but

316 want to map to a supported atom type instead.

317

318 Example to map deuterium to hydrogen:

319 atom_type_map = { 'D': 'H' }

320

321 Returns

322 ------

323 structure : Atoms

324 The Atoms object read in from the rmc6f file.

325 """

326

327 fd = filename

328 lines = fd.readlines()

329

330 # Process the rmc6f file to extract positions and cell

331 pos, cell = _read_process_rmc6f_lines_to_pos_and_cell(lines)

332

333 # create an atom type map if one does not exist from unique atomic symbols

334 if atom_type_map is None:

335 symbols = [atom[0] for atom in pos.values()]

336 atom_type_map = {atype: atype for atype in symbols}

337

338 # Use map of tmp symbol to actual symbol

339 for atom in pos.values():

340 atom[0] = atom_type_map[atom[0]]

341

342 # create Atoms from read-in data

343 symbols = []

344 scaled_positions = []

345 spin = None

346 magmoms = []

347 for atom in pos.values():

348 if len(atom) == 4:

349 element, x, y, z = atom

350 else:

351 element, x, y, z, spin = atom

352

353 element = atom_type_map[element]

354 symbols.append(element)

355 scaled_positions.append([x, y, z])

356 if spin is not None:

357 magmoms.append(spin)

358

359 atoms = Atoms(scaled_positions=scaled_positions,

360 symbols=symbols,

361 cell=cell,

362 magmoms=magmoms,

363 pbc=[True, True, True])

364

365 return atoms

366

367

368@writer

369def write_rmc6f(filename, atoms, order=None, atom_type_map=None):

370 """

371 Write output in rmc6f format - RMCProfile v6 fractional coordinates

372

373 Parameters

374 ----------

375 filename : file|str

376 A file like object or filename.

377 atoms: Atoms object

378 The Atoms object to be written.

379

380 order : list[str]

381 If not None, gives a list of atom types for the order

382 to write out each.

383 atom_type_map: dict{str:str}

384 Map of atom types for conversions. Mainly used if there is

385 an atom type in the Atoms object that is a placeholder

386 for a different atom type. This is used when the atom type

387 is not supported by ASE but is in RMCProfile.

388

389 Example to map hydrogen to deuterium:

390 atom_type_map = { 'H': 'D' }

391 """

392

393 # get atom types and how many of each (and set to order if passed)

394 atom_types = set(atoms.symbols)

395 if order is not None:

396 if set(atom_types) != set(order):

397 raise Exception("The order is not a set of the atom types.")

398 atom_types = order

399

400 atom_count_dict = atoms.symbols.formula.count()

401 natom_types = [str(atom_count_dict[atom_type]) for atom_type in atom_types]

402

403 # create an atom type map if one does not exist from unique atomic symbols

404 if atom_type_map is None:

405 symbols = set(np.array(atoms.symbols))

406 atom_type_map = {atype: atype for atype in symbols}

407

408 # HEADER SECTION

409

410 # get type and number of each atom type

411 atom_types_list = [atom_type_map[atype] for atype in atom_types]

412 atom_types_present = ' '.join(atom_types_list)

413 natom_types_present = ' '.join(natom_types)

414

415 header_lines = [

416 "(Version 6f format configuration file)",

417 "(Generated by ASE - Atomic Simulation Environment https://wiki.fysik.dtu.dk/ase/ )", # noqa: E501

418 "Metadata date: " + time.strftime('%d-%m-%Y'),

419 f"Number of types of atoms: {len(atom_types)} ",

420 f"Atom types present: {atom_types_present}",

421 f"Number of each atom type: {natom_types_present}",

422 "Number of moves generated: 0",

423 "Number of moves tried: 0",

424 "Number of moves accepted: 0",

425 "Number of prior configuration saves: 0",

426 f"Number of atoms: {len(atoms)}",

427 "Supercell dimensions: 1 1 1"]

428

429 # magnetic moments

430 if atoms.has('magmoms'):

431 spin_str = "Number of spins: {}"

432 spin_line = spin_str.format(len(atoms.get_initial_magnetic_moments()))

433 header_lines.extend([spin_line])

434

435 density_str = "Number density (Ang^-3): {}"

436 density_line = density_str.format(len(atoms) / atoms.get_volume())

437 cell_angles = [str(x) for x in atoms.cell.cellpar()]

438 cell_line = "Cell (Ang/deg): " + ' '.join(cell_angles)

439 header_lines.extend([density_line, cell_line])

440

441 # get lattice vectors from cell lengths and angles

442 # NOTE: RMCProfile uses a different convention for the fractionalization

443 # matrix

444

445 cell_parameters = atoms.cell.cellpar()

446 cell = Cell.fromcellpar(cell_parameters).T

447 x_line = ' '.join([f'{i:12.6f}' for i in cell[0]])

448 y_line = ' '.join([f'{i:12.6f}' for i in cell[1]])

449 z_line = ' '.join([f'{i:12.6f}' for i in cell[2]])

450 lat_lines = ["Lattice vectors (Ang):", x_line, y_line, z_line]

451 header_lines.extend(lat_lines)

452 header_lines.extend(['Atoms:'])

453

454 # ATOMS SECTION

455

456 # create columns of data for atoms (fr_cols)

457 fr_cols = ['id', 'symbols', 'scaled_positions', 'ref_num', 'ref_cell']

458 if atoms.has('magmoms'):

459 fr_cols.extend('magmom')

460

461 # Collect data to be written out

462 natoms = len(atoms)

463

464 arrays = {}

465 arrays['id'] = np.array(range(1, natoms + 1, 1), int)

466 arrays['symbols'] = np.array(atoms.symbols)

467 arrays['ref_num'] = np.zeros(natoms, int)

468 arrays['ref_cell'] = np.zeros((natoms, 3), int)

469 arrays['scaled_positions'] = np.array(atoms.get_scaled_positions())

470

471 # get formatting for writing output based on atom arrays

472 ncols, dtype_obj, fmt = _write_output_column_format(fr_cols, arrays)

473

474 # Pack fr_cols into record array

475 data = np.zeros(natoms, dtype_obj)

476 for column, ncol in zip(fr_cols, ncols):

477 value = arrays[column]

478 if ncol == 1:

479 data[column] = np.squeeze(value)

480 else:

481 for c in range(ncol):

482 data[column + str(c)] = value[:, c]

483

484 # Use map of tmp symbol to actual symbol

485 for i in range(natoms):

486 data[i][1] = atom_type_map[data[i][1]]

487

488 # Write the output

489 _write_output(filename, header_lines, data, fmt, order=order)