Coverage for /builds/kinetik161/ase/ase/io/rmc6f.py: 95.85%
193 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
1import re
2import time
4import numpy as np
6from ase.atoms import Atoms
7from ase.cell import Cell
8from ase.utils import reader, writer
10__all__ = ['read_rmc6f', 'write_rmc6f']
12ncols2style = {9: 'no_labels',
13 10: 'labels',
14 11: 'magnetic'}
17def _read_construct_regex(lines):
18 """
19 Utility for constructing regular expressions used by reader.
20 """
21 lines = [line.strip() for line in lines]
22 lines_re = '|'.join(lines)
23 lines_re = lines_re.replace(' ', r'\s+')
24 lines_re = lines_re.replace('(', r'\(')
25 lines_re = lines_re.replace(')', r'\)')
26 return f'({lines_re})'
29def _read_line_of_atoms_section(fields):
30 """
31 Process `fields` line of Atoms section in rmc6f file and output extracted
32 info as atom id (key) and list of properties for Atoms object (values).
34 Parameters
35 ----------
36 fields: list[str]
37 List of columns from line in rmc6f file.
40 Returns
41 ------
42 atom_id: int
43 Atom ID
44 properties: list[str|float]
45 List of Atoms properties based on rmc6f style.
46 Basically, have 1) element and fractional coordinates for 'labels'
47 or 'no_labels' style and 2) same for 'magnetic' style except adds
48 the spin.
49 Examples for 1) 'labels' or 'no_labels' styles or 2) 'magnetic' style:
50 1) [element, xf, yf, zf]
51 2) [element, xf, yf, zf, spin]
52 """
53 # Atom id
54 atom_id = int(fields[0])
56 # Get style used for rmc6f from file based on number of columns
57 ncols = len(fields)
58 style = ncols2style[ncols]
60 # Create the position dictionary
61 properties = []
62 element = str(fields[1])
63 if style == 'no_labels':
64 # id element xf yf zf ref_num ucell_x ucell_y ucell_z
65 xf = float(fields[2])
66 yf = float(fields[3])
67 zf = float(fields[4])
68 properties = [element, xf, yf, zf]
70 elif style == 'labels':
71 # id element label xf yf zf ref_num ucell_x ucell_y ucell_z
72 xf = float(fields[3])
73 yf = float(fields[4])
74 zf = float(fields[5])
75 properties = [element, xf, yf, zf]
77 elif style == 'magnetic':
78 # id element xf yf zf ref_num ucell_x ucell_y ucell_z M: spin
79 xf = float(fields[2])
80 yf = float(fields[3])
81 zf = float(fields[4])
82 spin = float(fields[10].strip("M:"))
83 properties = [element, xf, yf, zf, spin]
84 else:
85 raise Exception("Unsupported style for parsing rmc6f file format.")
87 return atom_id, properties
90def _read_process_rmc6f_lines_to_pos_and_cell(lines):
91 """
92 Processes the lines of rmc6f file to atom position dictionary and cell
94 Parameters
95 ----------
96 lines: list[str]
97 List of lines from rmc6f file.
99 Returns
100 ------
101 pos : dict{int:list[str|float]}
102 Dict for each atom id and Atoms properties based on rmc6f style.
103 Basically, have 1) element and fractional coordinates for 'labels'
104 or 'no_labels' style and 2) same for 'magnetic' style except adds
105 the spin.
106 Examples for 1) 'labels' or 'no_labels' styles or 2) 'magnetic' style:
107 1) pos[aid] = [element, xf, yf, zf]
108 2) pos[aid] = [element, xf, yf, zf, spin]
109 cell: Cell object
110 The ASE Cell object created from cell parameters read from the 'Cell'
111 section of rmc6f file.
112 """
114 # Inititalize output pos dictionary
115 pos = {}
117 # Defined the header an section lines we process
118 header_lines = [
119 "Number of atoms:",
120 "Supercell dimensions:",
121 "Cell (Ang/deg):",
122 "Lattice vectors (Ang):"]
123 sections = ["Atoms"]
125 # Construct header and sections regex
126 header_lines_re = _read_construct_regex(header_lines)
127 sections_re = _read_construct_regex(sections)
129 section = None
130 header = True
132 # Remove any lines that are blank
133 lines = [line for line in lines if line != '']
135 # Process each line of rmc6f file
136 pos = {}
137 for line in lines:
139 # check if in a section
140 m = re.match(sections_re, line)
141 if m is not None:
142 section = m.group(0).strip()
143 header = False
144 continue
146 # header section
147 if header:
148 field = None
149 val = None
151 # Regex that matches whitespace-separated floats
152 float_list_re = r'\s+(\d[\d|\s\.]+[\d|\.])'
153 m = re.search(header_lines_re + float_list_re, line)
154 if m is not None:
155 field = m.group(1)
156 val = m.group(2)
158 if field is not None and val is not None:
160 if field == "Number of atoms:":
161 """
162 NOTE: Can just capture via number of atoms ingested.
163 Maybe use in future for a check.
164 code: natoms = int(val)
165 """
167 if field.startswith('Supercell'):
168 """
169 NOTE: wrapping back down to unit cell is not
170 necessarily needed for ASE object.
172 code: supercell = [int(x) for x in val.split()]
173 """
175 if field.startswith('Cell'):
176 cellpar = [float(x) for x in val.split()]
177 cell = Cell.fromcellpar(cellpar)
179 if field.startswith('Lattice'):
180 """
181 NOTE: Have questions about RMC fractionalization matrix for
182 conversion in data2config vs. the ASE matrix.
183 Currently, just support the Cell section.
184 """
186 # main body section
187 if section is not None:
188 if section == 'Atoms':
189 atom_id, atom_props = _read_line_of_atoms_section(line.split())
190 pos[atom_id] = atom_props
192 return pos, cell
195def _write_output_column_format(columns, arrays):
196 """
197 Helper function to build output for data columns in rmc6f format
199 Parameters
200 ----------
201 columns: list[str]
202 List of keys in arrays. Will be columns in the output file.
203 arrays: dict{str:np.array}
204 Dict with arrays for each column of rmc6f file that are
205 property of Atoms object.
207 Returns
208 ------
209 property_ncols : list[int]
210 Number of columns for each property.
211 dtype_obj: np.dtype
212 Data type object for the columns.
213 formats_as_str: str
214 The format for printing the columns.
216 """
217 fmt_map = {'d': ('R', '%14.6f '),
218 'f': ('R', '%14.6f '),
219 'i': ('I', '%8d '),
220 'O': ('S', '%s'),
221 'S': ('S', '%s'),
222 'U': ('S', '%s'),
223 'b': ('L', ' %.1s ')}
225 property_types = []
226 property_ncols = []
227 dtypes = []
228 formats = []
230 # Take each column and setup formatting vectors
231 for column in columns:
232 array = arrays[column]
233 dtype = array.dtype
235 property_type, fmt = fmt_map[dtype.kind]
236 property_types.append(property_type)
238 # Flags for 1d vectors
239 is_1d = len(array.shape) == 1
240 is_1d_as_2d = len(array.shape) == 2 and array.shape[1] == 1
242 # Construct the list of key pairs of column with dtype
243 if (is_1d or is_1d_as_2d):
244 ncol = 1
245 dtypes.append((column, dtype))
246 else:
247 ncol = array.shape[1]
248 for c in range(ncol):
249 dtypes.append((column + str(c), dtype))
251 # Add format and number of columns for this property to output array
252 formats.extend([fmt] * ncol)
253 property_ncols.append(ncol)
255 # Prepare outputs to correct data structure
256 dtype_obj = np.dtype(dtypes)
257 formats_as_str = ''.join(formats) + '\n'
259 return property_ncols, dtype_obj, formats_as_str
262def _write_output(filename, header_lines, data, fmt, order=None):
263 """
264 Helper function to write information to the filename
266 Parameters
267 ----------
268 filename : file|str
269 A file like object or filename
270 header_lines : list[str]
271 Header section of output rmc6f file
272 data: np.array[len(atoms)]
273 Array for the Atoms section to write to file. Has
274 the columns that need to be written on each row
275 fmt: str
276 The format string to use for writing each column in
277 the rows of data.
278 order : list[str]
279 If not None, gives a list of atom types for the order
280 to write out each.
281 """
282 fd = filename
284 # Write header section
285 for line in header_lines:
286 fd.write(f"{line} \n")
288 # If specifying the order, fix the atom id and write to file
289 natoms = data.shape[0]
290 if order is not None:
291 new_id = 0
292 for atype in order:
293 for i in range(natoms):
294 if atype == data[i][1]:
295 new_id += 1
296 data[i][0] = new_id
297 fd.write(fmt % tuple(data[i]))
298 # ...just write rows to file
299 else:
300 for i in range(natoms):
301 fd.write(fmt % tuple(data[i]))
304@reader
305def read_rmc6f(filename, atom_type_map=None):
306 """
307 Parse a RMCProfile rmc6f file into ASE Atoms object
309 Parameters
310 ----------
311 filename : file|str
312 A file like object or filename.
313 atom_type_map: dict{str:str}
314 Map of atom types for conversions. Mainly used if there is
315 an atom type in the file that is not supported by ASE but
316 want to map to a supported atom type instead.
318 Example to map deuterium to hydrogen:
319 atom_type_map = { 'D': 'H' }
321 Returns
322 ------
323 structure : Atoms
324 The Atoms object read in from the rmc6f file.
325 """
327 fd = filename
328 lines = fd.readlines()
330 # Process the rmc6f file to extract positions and cell
331 pos, cell = _read_process_rmc6f_lines_to_pos_and_cell(lines)
333 # create an atom type map if one does not exist from unique atomic symbols
334 if atom_type_map is None:
335 symbols = [atom[0] for atom in pos.values()]
336 atom_type_map = {atype: atype for atype in symbols}
338 # Use map of tmp symbol to actual symbol
339 for atom in pos.values():
340 atom[0] = atom_type_map[atom[0]]
342 # create Atoms from read-in data
343 symbols = []
344 scaled_positions = []
345 spin = None
346 magmoms = []
347 for atom in pos.values():
348 if len(atom) == 4:
349 element, x, y, z = atom
350 else:
351 element, x, y, z, spin = atom
353 element = atom_type_map[element]
354 symbols.append(element)
355 scaled_positions.append([x, y, z])
356 if spin is not None:
357 magmoms.append(spin)
359 atoms = Atoms(scaled_positions=scaled_positions,
360 symbols=symbols,
361 cell=cell,
362 magmoms=magmoms,
363 pbc=[True, True, True])
365 return atoms
368@writer
369def write_rmc6f(filename, atoms, order=None, atom_type_map=None):
370 """
371 Write output in rmc6f format - RMCProfile v6 fractional coordinates
373 Parameters
374 ----------
375 filename : file|str
376 A file like object or filename.
377 atoms: Atoms object
378 The Atoms object to be written.
380 order : list[str]
381 If not None, gives a list of atom types for the order
382 to write out each.
383 atom_type_map: dict{str:str}
384 Map of atom types for conversions. Mainly used if there is
385 an atom type in the Atoms object that is a placeholder
386 for a different atom type. This is used when the atom type
387 is not supported by ASE but is in RMCProfile.
389 Example to map hydrogen to deuterium:
390 atom_type_map = { 'H': 'D' }
391 """
393 # get atom types and how many of each (and set to order if passed)
394 atom_types = set(atoms.symbols)
395 if order is not None:
396 if set(atom_types) != set(order):
397 raise Exception("The order is not a set of the atom types.")
398 atom_types = order
400 atom_count_dict = atoms.symbols.formula.count()
401 natom_types = [str(atom_count_dict[atom_type]) for atom_type in atom_types]
403 # create an atom type map if one does not exist from unique atomic symbols
404 if atom_type_map is None:
405 symbols = set(np.array(atoms.symbols))
406 atom_type_map = {atype: atype for atype in symbols}
408 # HEADER SECTION
410 # get type and number of each atom type
411 atom_types_list = [atom_type_map[atype] for atype in atom_types]
412 atom_types_present = ' '.join(atom_types_list)
413 natom_types_present = ' '.join(natom_types)
415 header_lines = [
416 "(Version 6f format configuration file)",
417 "(Generated by ASE - Atomic Simulation Environment https://wiki.fysik.dtu.dk/ase/ )", # noqa: E501
418 "Metadata date: " + time.strftime('%d-%m-%Y'),
419 f"Number of types of atoms: {len(atom_types)} ",
420 f"Atom types present: {atom_types_present}",
421 f"Number of each atom type: {natom_types_present}",
422 "Number of moves generated: 0",
423 "Number of moves tried: 0",
424 "Number of moves accepted: 0",
425 "Number of prior configuration saves: 0",
426 f"Number of atoms: {len(atoms)}",
427 "Supercell dimensions: 1 1 1"]
429 # magnetic moments
430 if atoms.has('magmoms'):
431 spin_str = "Number of spins: {}"
432 spin_line = spin_str.format(len(atoms.get_initial_magnetic_moments()))
433 header_lines.extend([spin_line])
435 density_str = "Number density (Ang^-3): {}"
436 density_line = density_str.format(len(atoms) / atoms.get_volume())
437 cell_angles = [str(x) for x in atoms.cell.cellpar()]
438 cell_line = "Cell (Ang/deg): " + ' '.join(cell_angles)
439 header_lines.extend([density_line, cell_line])
441 # get lattice vectors from cell lengths and angles
442 # NOTE: RMCProfile uses a different convention for the fractionalization
443 # matrix
445 cell_parameters = atoms.cell.cellpar()
446 cell = Cell.fromcellpar(cell_parameters).T
447 x_line = ' '.join([f'{i:12.6f}' for i in cell[0]])
448 y_line = ' '.join([f'{i:12.6f}' for i in cell[1]])
449 z_line = ' '.join([f'{i:12.6f}' for i in cell[2]])
450 lat_lines = ["Lattice vectors (Ang):", x_line, y_line, z_line]
451 header_lines.extend(lat_lines)
452 header_lines.extend(['Atoms:'])
454 # ATOMS SECTION
456 # create columns of data for atoms (fr_cols)
457 fr_cols = ['id', 'symbols', 'scaled_positions', 'ref_num', 'ref_cell']
458 if atoms.has('magmoms'):
459 fr_cols.extend('magmom')
461 # Collect data to be written out
462 natoms = len(atoms)
464 arrays = {}
465 arrays['id'] = np.array(range(1, natoms + 1, 1), int)
466 arrays['symbols'] = np.array(atoms.symbols)
467 arrays['ref_num'] = np.zeros(natoms, int)
468 arrays['ref_cell'] = np.zeros((natoms, 3), int)
469 arrays['scaled_positions'] = np.array(atoms.get_scaled_positions())
471 # get formatting for writing output based on atom arrays
472 ncols, dtype_obj, fmt = _write_output_column_format(fr_cols, arrays)
474 # Pack fr_cols into record array
475 data = np.zeros(natoms, dtype_obj)
476 for column, ncol in zip(fr_cols, ncols):
477 value = arrays[column]
478 if ncol == 1:
479 data[column] = np.squeeze(value)
480 else:
481 for c in range(ncol):
482 data[column + str(c)] = value[:, c]
484 # Use map of tmp symbol to actual symbol
485 for i in range(natoms):
486 data[i][1] = atom_type_map[data[i][1]]
488 # Write the output
489 _write_output(filename, header_lines, data, fmt, order=order)