Coverage for /builds/kinetik161/ase/ase/io/qbox.py: 96.67%

90 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-12-10 11:04 +0000

1"""This module contains functions to read from QBox output files""" 

2 

3import re 

4import xml.etree.ElementTree as ET 

5 

6from ase import Atom, Atoms 

7from ase.calculators.singlepoint import SinglePointCalculator 

8from ase.utils import reader 

9 

10# Compile regexs for fixing XML 

11re_find_bad_xml = re.compile(r'<(/?)([A-z]+) expectation ([a-z]+)') 

12 

13 

14@reader 

15def read_qbox(f, index=-1): 

16 """Read data from QBox output file 

17 

18 Inputs: 

19 f - str or fileobj, path to file or file object to read from 

20 index - int or slice, which frames to return 

21 

22 Returns: 

23 list of Atoms or atoms, requested frame(s) 

24 """ 

25 

26 # Check whether this is a QB@all output 

27 version = None 

28 for line in f: 

29 if '<release>' in line: 

30 version = ET.fromstring(line) 

31 break 

32 if version is None: 

33 raise Exception('Parse Error: Version not found') 

34 is_qball = 'qb@LL' in version.text or 'qball' in version.text 

35 

36 # Load in atomic species 

37 species = {} 

38 if is_qball: 

39 # Read all of the lines between release and the first call to `run` 

40 species_data = [] 

41 for line in f: 

42 if '<run' in line: 

43 break 

44 species_data.append(line) 

45 species_data = '\n'.join(species_data) 

46 

47 # Read out the species information with regular expressions 

48 symbols = re.findall('symbol_ = ([A-Z][a-z]?)', species_data) 

49 masses = re.findall('mass_ = ([0-9.]+)', species_data) 

50 names = re.findall('name_ = ([a-z]+)', species_data) 

51 numbers = re.findall('atomic_number_ = ([0-9]+)', species_data) 

52 

53 # Compile them into a dictionary 

54 for name, symbol, mass, number in zip(names, symbols, masses, numbers): 

55 spec_data = dict( 

56 symbol=symbol, 

57 mass=float(mass), 

58 number=float(number) 

59 ) 

60 species[name] = spec_data 

61 else: 

62 # Find all species 

63 species_blocks = _find_blocks(f, 'species', '<cmd>run') 

64 

65 for spec in species_blocks: 

66 name = spec.get('name') 

67 spec_data = dict( 

68 symbol=spec.find('symbol').text, 

69 mass=float(spec.find('mass').text), 

70 number=int(spec.find('atomic_number').text)) 

71 species[name] = spec_data 

72 

73 # Find all of the frames 

74 frames = _find_blocks(f, 'iteration', None) 

75 

76 # If index is an int, return one frame 

77 if isinstance(index, int): 

78 return _parse_frame(frames[index], species) 

79 else: 

80 return [_parse_frame(frame, species) for frame in frames[index]] 

81 

82 

83def _find_blocks(fp, tag, stopwords='[qbox]'): 

84 """Find and parse a certain block of the file. 

85 

86 Reads a file sequentially and stops when it either encounters the 

87 end of the file, or until the it encounters a line that contains a 

88 user-defined string *after it has already found at least one 

89 desired block*. Use the stopwords ``[qbox]`` to read until the next 

90 command is issued. 

91 

92 Groups the text between the first line that contains <tag> and the 

93 next line that contains </tag>, inclusively. The function then 

94 parses the XML and returns the Element object. 

95 

96 Inputs: 

97 fp - file-like object, file to be read from 

98 tag - str, tag to search for (e.g., 'iteration'). 

99 `None` if you want to read until the end of the file 

100 stopwords - str, halt parsing if a line containing this string 

101 is encountered 

102 

103 Returns: 

104 list of xml.ElementTree, parsed XML blocks found by this class 

105 """ 

106 

107 start_tag = f'<{tag}' 

108 end_tag = f'</{tag}>' 

109 

110 blocks = [] # Stores all blocks 

111 cur_block = [] # Block being filled 

112 in_block = False # Whether we are currently parsing 

113 for line in fp: 

114 

115 # Check if the block has started 

116 if start_tag in line: 

117 if in_block: 

118 raise Exception('Parsing failed: Encountered nested block') 

119 else: 

120 in_block = True 

121 

122 # Add data to block 

123 if in_block: 

124 cur_block.append(line) 

125 

126 # Check for stopping conditions 

127 if stopwords is not None: 

128 if stopwords in line and len(blocks) > 0: 

129 break 

130 

131 if end_tag in line: 

132 if in_block: 

133 blocks.append(cur_block) 

134 cur_block = [] 

135 in_block = False 

136 else: 

137 raise Exception('Parsing failed: End tag found before start ' 

138 'tag') 

139 

140 # Join strings in a block into a single string 

141 blocks = [''.join(b) for b in blocks] 

142 

143 # Ensure XML compatibility. There are two specific tags in QBall that are 

144 # not valid XML, so we need to run a 

145 blocks = [re_find_bad_xml.sub(r'<\1\2_expectation_\3', b) for b in blocks] 

146 

147 # Parse the blocks 

148 return [ET.fromstring(b) for b in blocks] 

149 

150 

151def _parse_frame(tree, species): 

152 """Parse a certain frame from QBOX output 

153 

154 Inputs: 

155 tree - ElementTree, <iteration> block from output file 

156 species - dict, data about species. Key is name of atom type, 

157 value is data about that type 

158 Return: 

159 Atoms object describing this iteration""" 

160 

161 # Load in data about the system 

162 energy = float(tree.find("etotal").text) 

163 

164 # Load in data about the cell 

165 unitcell = tree.find('atomset').find('unit_cell') 

166 cell = [] 

167 for d in ['a', 'b', 'c']: 

168 cell.append([float(x) for x in unitcell.get(d).split()]) 

169 

170 stress_tree = tree.find('stress_tensor') 

171 if stress_tree is None: 

172 stresses = None 

173 else: 

174 stresses = [float(stress_tree.find(f'sigma_{x}').text) 

175 for x in ['xx', 'yy', 'zz', 'yz', 'xz', 'xy']] 

176 

177 # Create the Atoms object 

178 atoms = Atoms(pbc=True, cell=cell) 

179 

180 # Load in the atom information 

181 forces = [] 

182 for atom in tree.find('atomset').findall('atom'): 

183 # Load data about the atom type 

184 spec = atom.get('species') 

185 symbol = species[spec]['symbol'] 

186 mass = species[spec]['mass'] 

187 

188 # Get data about position / velocity / force 

189 pos = [float(x) for x in atom.find('position').text.split()] 

190 force = [float(x) for x in atom.find('force').text.split()] 

191 momentum = [float(x) * mass 

192 for x in atom.find('velocity').text.split()] 

193 

194 # Create the objects 

195 atom = Atom(symbol=symbol, mass=mass, position=pos, momentum=momentum) 

196 atoms += atom 

197 forces.append(force) 

198 

199 # Create the calculator object that holds energy/forces 

200 calc = SinglePointCalculator(atoms, 

201 energy=energy, forces=forces, stress=stresses) 

202 atoms.calc = calc 

203 

204 return atoms