Coverage for /builds/kinetik161/ase/ase/io/gaussian.py: 92.89%

1import logging

2import re

3import warnings

4from collections.abc import Iterable

5from copy import deepcopy

7import numpy as np

9from ase import Atoms

10from ase.calculators.calculator import Calculator, InputError

11from ase.calculators.gaussian import Gaussian

12from ase.calculators.singlepoint import SinglePointCalculator

13from ase.data import atomic_masses_iupac2016, chemical_symbols

14from ase.io import ParseError

15from ase.io.zmatrix import parse_zmatrix

16from ase.units import Bohr, Hartree

18logger = logging.getLogger(__name__)

20_link0_keys = [

21 'mem',

22 'chk',

23 'oldchk',

24 'schk',

25 'rwf',

26 'oldmatrix',

27 'oldrawmatrix',

28 'int',

29 'd2e',

30 'save',

31 'nosave',

32 'errorsave',

33 'cpu',

34 'nprocshared',

35 'gpucpu',

36 'lindaworkers',

37 'usessh',

38 'ssh',

39 'debuglinda',

40]

43_link0_special = [

44 'kjob',

45 'subst',

46]

49# Certain problematic methods do not provide well-defined potential energy

50# surfaces, because these "composite" methods involve geometry optimization

51# and/or vibrational frequency analysis. In addition, the "energy" calculated

52# by these methods are typically ZPVE corrected and/or temperature dependent

53# free energies.

54_problem_methods = [

55 'cbs-4m', 'cbs-qb3', 'cbs-apno',

56 'g1', 'g2', 'g3', 'g4', 'g2mp2', 'g3mp2', 'g3b3', 'g3mp2b3', 'g4mp4',

57 'w1', 'w1u', 'w1bd', 'w1ro',

58]

61_xc_to_method = dict(

62 pbe='pbepbe',

63 pbe0='pbe1pbe',

64 hse06='hseh1pbe',

65 hse03='ohse2pbe',

66 lda='svwn', # gaussian "knows about" LSDA, but maybe not LDA.

67 tpss='tpsstpss',

68 revtpss='revtpssrevtpss',

69)

71_nuclear_prop_names = ['spin', 'zeff', 'qmom', 'nmagm', 'znuc',

72 'radnuclear', 'iso']

75def _get_molecule_spec(atoms, nuclear_props):

76 ''' Generate the molecule specification section to write

77 to the Gaussian input file, from the Atoms object and dict

78 of nuclear properties'''

79 molecule_spec = []

80 for i, atom in enumerate(atoms):

81 symbol_section = atom.symbol + '('

82 # Check whether any nuclear properties of the atom have been set,

83 # and if so, add them to the symbol section.

84 nuclear_props_set = False

85 for keyword, array in nuclear_props.items():

86 if array is not None and array[i] is not None:

87 string = keyword + '=' + str(array[i]) + ', '

88 symbol_section += string

89 nuclear_props_set = True

91 # Check whether the mass of the atom has been modified,

92 # and if so, add it to the symbol section:

93 mass_set = False

94 symbol = atom.symbol

95 expected_mass = atomic_masses_iupac2016[chemical_symbols.index(

96 symbol)]

97 if expected_mass != atoms[i].mass:

98 mass_set = True

99 string = 'iso' + '=' + str(atoms[i].mass)

100 symbol_section += string

101

102 if nuclear_props_set or mass_set:

103 symbol_section = symbol_section.strip(', ')

104 symbol_section += ')'

105 else:

106 symbol_section = symbol_section.strip('(')

107

108 # Then attach the properties appropriately

109 # this formatting was chosen for backwards compatibility reasons, but

110 # it would probably be better to

111 # 1) Ensure proper spacing between entries with explicit spaces

112 # 2) Use fewer columns for the element

113 # 3) Use 'e' (scientific notation) instead of 'f' for positions

114 molecule_spec.append('{:<10s}{:20.10f}{:20.10f}{:20.10f}'.format(

115 symbol_section, *atom.position))

116

117 # unit cell vectors, in case of periodic boundary conditions

118 for ipbc, tv in zip(atoms.pbc, atoms.cell):

119 if ipbc:

120 molecule_spec.append('TV {:20.10f}{:20.10f}{:20.10f}'.format(*tv))

121

122 molecule_spec.append('')

123

124 return molecule_spec

125

126

127def _format_output_type(output_type):

128 ''' Given a letter: output_type, return

129 a string formatted for a gaussian input file'''

130 # Change output type to P if it has been set to T, because ASE

131 # does not support reading info from 'terse' output files.

132 if output_type is None or output_type == '' or 't' in output_type.lower():

133 output_type = 'P'

134

135 return f'#{output_type}'

136

137

138def _check_problem_methods(method):

139 ''' Check method string for problem methods and warn appropriately'''

140 if method.lower() in _problem_methods:

141 warnings.warn(

142 'The requested method, {}, is a composite method. Composite '

143 'methods do not have well-defined potential energy surfaces, '

144 'so the energies, forces, and other properties returned by '

145 'ASE may not be meaningful, or they may correspond to a '

146 'different geometry than the one provided. '

147 'Please use these methods with caution.'.format(method)

148 )

149

150

151def _pop_link0_params(params):

152 '''Takes the params dict and returns a dict with the link0 keywords

153 removed, and a list containing the link0 keywords and options

154 to be written to the gaussian input file'''

155 params = deepcopy(params)

156 out = []

157 # Remove keywords from params, so they are not set again later in

158 # route section

159 for key in _link0_keys:

160 if key not in params:

161 continue

162 val = params.pop(key)

163 if not val or (isinstance(val, str) and key.lower() == val.lower()):

164 out.append(f'%{key}')

165 else:

166 out.append(f'%{key}={val}')

167

168 # These link0 keywords have a slightly different syntax

169 for key in _link0_special:

170 if key not in params:

171 continue

172 val = params.pop(key)

173 if not isinstance(val, str) and isinstance(val, Iterable):

174 val = ' '.join(val)

175 out.append(f'%{key} L{val}')

176

177 return params, out

178

179

180def _format_method_basis(output_type, method, basis, fitting_basis):

181 output_string = ""

182 if basis and method and fitting_basis:

183 output_string = '{} {}/{}/{} ! ASE formatted method and basis'.format(

184 output_type, method, basis, fitting_basis)

185 elif basis and method:

186 output_string = '{} {}/{} ! ASE formatted method and basis'.format(

187 output_type, method, basis)

188 else:

189 output_string = f'{output_type}'

190 for value in [method, basis]:

191 if value is not None:

192 output_string += f' {value}'

193 return output_string

194

195

196def _format_route_params(params):

197 '''Get keywords and values from the params dictionary and return

198 as a list of lines to add to the gaussian input file'''

199 out = []

200 for key, val in params.items():

201 # assume bare keyword if val is falsey, i.e. '', None, False, etc.

202 # also, for backwards compatibility: assume bare keyword if key and

203 # val are the same

204 if not val or (isinstance(val, str) and key.lower() == val.lower()):

205 out.append(key)

206 elif not isinstance(val, str) and isinstance(val, Iterable):

207 out.append('{}({})'.format(key, ','.join(val)))

208 else:

209 out.append(f'{key}({val})')

210 return out

211

212

213def _format_addsec(addsec):

214 '''Format addsec string as a list of lines to be added to the gaussian

215 input file.'''

216 out = []

217 if addsec is not None:

218 out.append('')

219 if isinstance(addsec, str):

220 out.append(addsec)

221 elif isinstance(addsec, Iterable):

222 out += list(addsec)

223 return out

224

225

226def _format_basis_set(basis, basisfile, basis_set):

227 '''Format either: the basis set filename (basisfile), the basis set file

228 contents (from reading basisfile), or the basis_set text as a list of

229 strings to be added to the gaussian input file.'''

230 out = []

231 # if basis='gen', set basisfile. Either give a path to a basisfile, or

232 # read in the provided file and paste it verbatim

233 if basisfile is not None:

234 if basisfile[0] == '@':

235 out.append(basisfile)

236 else:

237 with open(basisfile) as fd:

238 out.append(fd.read())

239 elif basis_set is not None:

240 out.append(basis_set)

241 else:

242 if basis is not None and basis.lower() == 'gen':

243 raise InputError('Please set basisfile or basis_set')

244 return out

245

246

247def write_gaussian_in(fd, atoms, properties=['energy'],

248 method=None, basis=None, fitting_basis=None,

249 output_type='P', basisfile=None, basis_set=None,

250 xc=None, charge=None, mult=None, extra=None,

251 ioplist=None, addsec=None, spinlist=None,

252 zefflist=None, qmomlist=None, nmagmlist=None,

253 znuclist=None, radnuclearlist=None,

254 **params):

255 '''

256 Generates a Gaussian input file

257

258 Parameters

259 -----------

260 fd: file-like

261 where the Gaussian input file will be written

262 atoms: Atoms

263 Structure to write to the input file

264 properties: list

265 Properties to calculate

266 method: str

267 Level of theory to use, e.g. ``hf``, ``ccsd``, ``mp2``, or ``b3lyp``.

268 Overrides ``xc`` (see below).

269 xc: str

270 Level of theory to use. Translates several XC functionals from

271 their common name (e.g. ``PBE``) to their internal Gaussian name

272 (e.g. ``PBEPBE``).

273 basis: str

274 The basis set to use. If not provided, no basis set will be requested,

275 which usually results in ``STO-3G``. Maybe omitted if basisfile is set

276 (see below).

277 fitting_basis: str

278 The name of the fitting basis set to use.

279 output_type: str

280 Level of output to record in the Gaussian

281 output file - this may be ``N``- normal or ``P`` -

282 additional.

283 basisfile: str

284 The name of the basis file to use. If a value is provided, basis may

285 be omitted (it will be automatically set to 'gen')

286 basis_set: str

287 The basis set definition to use. This is an alternative

288 to basisfile, and would be the same as the contents

289 of such a file.

290 charge: int

291 The system charge. If not provided, it will be automatically

292 determined from the ``Atoms`` object’s initial_charges.

293 mult: int

294 The system multiplicity (``spin + 1``). If not provided, it will be

295 automatically determined from the ``Atoms`` object’s

296 ``initial_magnetic_moments``.

297 extra: str

298 Extra lines to be included in the route section verbatim.

299 It should not be necessary to use this, but it is included for

300 backwards compatibility.

301 ioplist: list

302 A collection of IOPs definitions to be included in the route line.

303 addsec: str

304 Text to be added after the molecular geometry specification, e.g. for

305 defining masses with ``freq=ReadIso``.

306 spinlist: list

307 A list of nuclear spins to be added into the nuclear

308 propeties section of the molecule specification.

309 zefflist: list

310 A list of effective charges to be added into the nuclear

311 propeties section of the molecule specification.

312 qmomlist: list

313 A list of nuclear quadropole moments to be added into

314 the nuclear propeties section of the molecule

315 specification.

316 nmagmlist: list

317 A list of nuclear magnetic moments to be added into

318 the nuclear propeties section of the molecule

319 specification.

320 znuclist: list

321 A list of nuclear charges to be added into the nuclear

322 propeties section of the molecule specification.

323 radnuclearlist: list

324 A list of nuclear radii to be added into the nuclear

325 propeties section of the molecule specification.

326 params: dict

327 Contains any extra keywords and values that will be included in either

328 the link0 section or route section of the gaussian input file.

329 To be included in the link0 section, the keyword must be one of the

330 following: ``mem``, ``chk``, ``oldchk``, ``schk``, ``rwf``,

331 ``oldmatrix``, ``oldrawmatrix``, ``int``, ``d2e``, ``save``,

332 ``nosave``, ``errorsave``, ``cpu``, ``nprocshared``, ``gpucpu``,

333 ``lindaworkers``, ``usessh``, ``ssh``, ``debuglinda``.

334 Any other keywords will be placed (along with their values) in the

335 route section.

336 '''

337

338 params = deepcopy(params)

339

340 if properties is None:

341 properties = ['energy']

342

343 output_type = _format_output_type(output_type)

344

345 # basis can be omitted if basisfile is provided

346 if basis is None:

347 if basisfile is not None or basis_set is not None:

348 basis = 'gen'

349

350 # determine method from xc if it is provided

351 if method is None:

352 if xc is not None:

353 method = _xc_to_method.get(xc.lower(), xc)

354

355 # If the user requests a problematic method, rather than raising an error

356 # or proceeding blindly, give the user a warning that the results parsed

357 # by ASE may not be meaningful.

358 if method is not None:

359 _check_problem_methods(method)

360

361 # determine charge from initial charges if not passed explicitly

362 if charge is None:

363 charge = atoms.get_initial_charges().sum()

364

365 # determine multiplicity from initial magnetic moments

366 # if not passed explicitly

367 if mult is None:

368 mult = atoms.get_initial_magnetic_moments().sum() + 1

369

370 # set up link0 arguments

371 out = []

372 params, link0_list = _pop_link0_params(params)

373 out.extend(link0_list)

374

375 # begin route line

376 # note: unlike in old calculator, each route keyword is put on its own

377 # line.

378 out.append(_format_method_basis(output_type, method, basis, fitting_basis))

379

380 # If the calculator's parameter dictionary contains an isolist, we ignore

381 # this - it is up to the user to attach this info as the atoms' masses

382 # if they wish for it to be used:

383 params.pop('isolist', None)

384

385 # Any params left will belong in the route section of the file:

386 out.extend(_format_route_params(params))

387

388 if ioplist is not None:

389 out.append('IOP(' + ', '.join(ioplist) + ')')

390

391 # raw list of explicit keywords for backwards compatibility

392 if extra is not None:

393 out.append(extra)

394

395 # Add 'force' iff the user requested forces, since Gaussian crashes when

396 # 'force' is combined with certain other keywords such as opt and irc.

397 if 'forces' in properties and 'force' not in params:

398 out.append('force')

399

400 # header, charge, and mult

401 out += ['', 'Gaussian input prepared by ASE', '',

402 f'{charge:.0f} {mult:.0f}']

403

404 # make dict of nuclear properties:

405 nuclear_props = {'spin': spinlist, 'zeff': zefflist, 'qmom': qmomlist,

406 'nmagm': nmagmlist, 'znuc': znuclist,

407 'radnuclear': radnuclearlist}

408 nuclear_props = {k: v for k, v in nuclear_props.items() if v is not None}

409

410 # atomic positions and nuclear properties:

411 molecule_spec = _get_molecule_spec(atoms, nuclear_props)

412 for line in molecule_spec:

413 out.append(line)

414

415 out.extend(_format_basis_set(basis, basisfile, basis_set))

416

417 out.extend(_format_addsec(addsec))

418

419 out += ['', '']

420 fd.write('\n'.join(out))

421

422

423# Regexp for reading an input file:

424

425_re_link0 = re.compile(r'^\s*%([^\=\)$!]+)=?([^\=$\(!]+)?(!.+)?')

426# Link0 lines are in the format:

427# '% keyword = value' or '% keyword'

428# (with or without whitespaces)

429

430_re_output_type = re.compile(r'^\s*#\s*([NPTnpt]?)\s*')

431# The start of the route section begins with a '#', and then may

432# be followed by the desired level of output in the output file: P, N or T.

433

434_re_method_basis = re.compile(

435 r"\s*([\w-]+)\s*\/([^/=!]+)([\/]([^!]+))?\s*(!.+)?")

436# Matches method, basis and optional fitting basis in the format:

437# method/basis/fitting_basis ! comment

438# They will appear in this format if the Gaussian file has been generated

439# by ASE using a calculator with the basis and method keywords set.

440

441_re_chgmult = re.compile(r'^\s*[+-]?\d+(?:,\s*|\s+)[+-]?\d+\s*$')

442# This is a bit more complex of a regex than we typically want, but it

443# can be difficult to determine whether a line contains the charge and

444# multiplicity, rather than just another route keyword. By making sure

445# that the line contains exactly two *integers*, separated by either

446# a comma (and possibly whitespace) or some amount of whitespace, we

447# can be more confident that we've actually found the charge and multiplicity.

448

449_re_nuclear_props = re.compile(r'$([^$]+)\)')

450# Matches the nuclear properties, which are contained in parantheses.

451

452# The following methods are used in GaussianConfiguration's

453# parse_gaussian_input method:

454

455

456def _get_link0_param(link0_match):

457 '''Gets link0 keyword and option from a re.Match and returns them

458 in a dictionary format'''

459 value = link0_match.group(2)

460 if value is not None:

461 value = value.strip()

462 else:

463 value = ''

464 return {link0_match.group(1).lower().strip(): value.lower()}

465

466

467def _get_all_link0_params(link0_section):

468 ''' Given a string link0_section which contains the link0

469 section of a gaussian input file, returns a dictionary of

470 keywords and values'''

471 parameters = {}

472 for line in link0_section:

473 link0_match = _re_link0.match(line)

474 link0_param = _get_link0_param(link0_match)

475 if link0_param is not None:

476 parameters.update(link0_param)

477 return parameters

478

479

480def _convert_to_symbol(string):

481 '''Converts an input string into a format

482 that can be input to the 'symbol' parameter of an

483 ASE Atom object (can be a chemical symbol (str)

484 or an atomic number (int)).

485 This is achieved by either stripping any

486 integers from the string, or converting a string

487 containing an atomic number to integer type'''

488 symbol = _validate_symbol_string(string)

489 if symbol.isnumeric():

490 atomic_number = int(symbol)

491 symbol = chemical_symbols[atomic_number]

492 else:

493 match = re.match(r'([A-Za-z]+)', symbol)

494 symbol = match.group(1)

495 return symbol

496

497

498def _validate_symbol_string(string):

499 if "-" in string:

500 raise ParseError("ERROR: Could not read the Gaussian input file, as"

501 " molecule specifications for molecular mechanics "

502 "calculations are not supported.")

503 return string

504

505

506def _get_key_value_pairs(line):

507 '''Read line of a gaussian input file with keywords and options

508 separated according to the rules of the route section.

509

510 Parameters

511 ----------

512 line (string)

513 A line of a gaussian input file.

514

515 Returns

516 ---------

517 params (dict)

518 Contains the keywords and options found in the line.

519 '''

520 params = {}

521 line = line.strip(' #')

522 line = line.split('!')[0] # removes any comments

523 # First, get the keywords and options sepatated with

524 # parantheses:

525 match_iterator = re.finditer(r'$([^$]+)\)', line)

526 index_ranges = []

527 for match in match_iterator:

528 index_range = [match.start(0), match.end(0)]

529 options = match.group(1)

530 # keyword is last word in previous substring:

531 keyword_string = line[:match.start(0)]

532 keyword_match_iter = [k for k in re.finditer(

533 r'[^\,/\s]+', keyword_string) if k.group() != '=']

534 keyword = keyword_match_iter[-1].group().strip(' =')

535 index_range[0] = keyword_match_iter[-1].start()

536 params.update({keyword.lower(): options.lower()})

537 index_ranges.append(index_range)

538

539 # remove from the line the keywords and options that we have saved:

540 index_ranges.reverse()

541 for index_range in index_ranges:

542 start = index_range[0]

543 stop = index_range[1]

544 line = line[0: start:] + line[stop + 1::]

545

546 # Next, get the keywords and options separated with

547 # an equals sign, and those without an equals sign

548 # must be keywords without options:

549

550 # remove any whitespaces around '=':

551 line = re.sub(r'\s*=\s*', '=', line)

552 line = [x for x in re.split(r'[\s,\/]', line) if x != '']

553

554 for s in line:

555 if '=' in s:

556 s = s.split('=')

557 keyword = s.pop(0)

558 options = s.pop(0)

559 params.update({keyword.lower(): options.lower()})

560 else:

561 if len(s) > 0:

562 params.update({s.lower(): None})

563

564 return params

565

566

567def _get_route_params(line):

568 '''Reads keywords and values from a line in

569 a Gaussian input file's route section,

570 and returns them as a dictionary'''

571 method_basis_match = _re_method_basis.match(line)

572 if method_basis_match:

573 params = {}

574 ase_gen_comment = '! ASE formatted method and basis'

575 if method_basis_match.group(5) == ase_gen_comment:

576 params['method'] = method_basis_match.group(1).strip().lower()

577 params['basis'] = method_basis_match.group(2).strip().lower()

578 if method_basis_match.group(4):

579 params['fitting_basis'] = method_basis_match.group(

580 4).strip().lower()

581 return params

582

583 return _get_key_value_pairs(line)

584

585

586def _get_all_route_params(route_section):

587 ''' Given a string: route_section which contains the route

588 section of a gaussian input file, returns a dictionary of

589 keywords and values'''

590 parameters = {}

591

592 for line in route_section:

593 output_type_match = _re_output_type.match(line)

594 if not parameters.get('output_type') and output_type_match:

595 line = line.strip(output_type_match.group(0))

596 parameters.update(

597 {'output_type': output_type_match.group(1).lower()})

598 # read route section

599 route_params = _get_route_params(line)

600 if route_params is not None:

601 parameters.update(route_params)

602 return parameters

603

604

605def _get_charge_mult(chgmult_section):

606 '''return a dict with the charge and multiplicity from

607 a list chgmult_section that contains the charge and multiplicity

608 line, read from a gaussian input file'''

609 chgmult_match = _re_chgmult.match(str(chgmult_section))

610 try:

611 chgmult = chgmult_match.group(0).split()

612 return {'charge': int(chgmult[0]), 'mult': int(chgmult[1])}

613 except (IndexError, AttributeError):

614 raise ParseError("ERROR: Could not read the charge and multiplicity "

615 "from the Gaussian input file. These must be 2 "

616 "integers separated with whitespace or a comma.")

617

618

619def _get_nuclear_props(line):

620 ''' Reads any info in parantheses in the line and returns

621 a dictionary of the nuclear properties.'''

622 nuclear_props_match = _re_nuclear_props.search(line)

623 nuclear_props = {}

624 if nuclear_props_match:

625 nuclear_props = _get_key_value_pairs(nuclear_props_match.group(1))

626 updated_nuclear_props = {}

627 for key, value in nuclear_props.items():

628 if value.isnumeric():

629 value = int(value)

630 else:

631 value = float(value)

632 if key not in _nuclear_prop_names:

633 if "fragment" in key:

634 warnings.warn("Fragments are not "

635 "currently supported.")

636 warnings.warn("The following nuclear properties "

637 "could not be saved: {}".format(

638 {key: value}))

639 else:

640 updated_nuclear_props[key] = value

641 nuclear_props = updated_nuclear_props

642

643 for k in _nuclear_prop_names:

644 if k not in nuclear_props:

645 nuclear_props[k] = None

646

647 return nuclear_props

648

649

650def _get_atoms_info(line):

651 '''Returns the symbol and position of an atom from a line

652 in the molecule specification section'''

653 nuclear_props_match = _re_nuclear_props.search(line)

654 if nuclear_props_match:

655 line = line.replace(nuclear_props_match.group(0), '')

656 tokens = line.split()

657 symbol = _convert_to_symbol(tokens[0])

658 pos = list(tokens[1:])

659

660 return symbol, pos

661

662

663def _get_cartesian_atom_coords(symbol, pos):

664 '''Returns the coordinates: pos as a list of floats if they

665 are cartesian, and not in z-matrix format'''

666 if len(pos) < 3 or (pos[0] == '0' and symbol != 'TV'):

667 # In this case, we have a z-matrix definition, so

668 # no cartesian coords.

669 return

670 elif len(pos) > 3:

671 raise ParseError("ERROR: Gaussian input file could "

672 "not be read as freeze codes are not"

673 " supported. If using cartesian "

674 "coordinates, these must be "

675 "given as 3 numbers separated "

676 "by whitespace.")

677 else:

678 try:

679 return list(map(float, pos))

680 except ValueError:

681 raise ParseError(

682 "ERROR: Molecule specification in"

683 "Gaussian input file could not be read")

684

685

686def _get_zmatrix_line(line):

687 ''' Converts line into the format needed for it to

688 be added to the z-matrix contents '''

689 line_list = line.split()

690 if len(line_list) == 8 and line_list[7] == '1':

691 raise ParseError(

692 "ERROR: Could not read the Gaussian input file"

693 ", as the alternative Z-matrix format using "

694 "two bond angles instead of a bond angle and "

695 "a dihedral angle is not supported.")

696 return line.strip() + '\n'

697

698

699def _read_zmatrix(zmatrix_contents, zmatrix_vars=None):

700 ''' Reads a z-matrix (zmatrix_contents) using its list of variables

701 (zmatrix_vars), and returns atom positions and symbols '''

702 try:

703 atoms = parse_zmatrix(zmatrix_contents, defs=zmatrix_vars)

704 except (ValueError, AssertionError) as e:

705 raise ParseError("Failed to read Z-matrix from "

706 "Gaussian input file: ", e)

707 except KeyError as e:

708 raise ParseError("Failed to read Z-matrix from "

709 "Gaussian input file, as symbol: {}"

710 "could not be recognised. Please make "

711 "sure you use element symbols, not "

712 "atomic numbers in the element labels.".format(e))

713 positions = atoms.positions

714 symbols = atoms.get_chemical_symbols()

715 return positions, symbols

716

717

718def _get_nuclear_props_for_all_atoms(nuclear_props):

719 ''' Returns the nuclear properties for all atoms as a dictionary,

720 in the format needed for it to be added to the parameters dictionary.'''

721 params = {k + 'list': [] for k in _nuclear_prop_names}

722 for dictionary in nuclear_props:

723 for key, value in dictionary.items():

724 params[key + 'list'].append(value)

725

726 for key, array in params.items():

727 values_set = False

728 for value in array:

729 if value is not None:

730 values_set = True

731 if not values_set:

732 params[key] = None

733 return params

734

735

736def _get_atoms_from_molspec(molspec_section):

737 ''' Takes a string: molspec_section which contains the molecule

738 specification section of a gaussian input file, and returns an atoms

739 object that represents this.'''

740 # These will contain info that will be attached to the Atoms object:

741 symbols = []

742 positions = []

743 pbc = np.zeros(3, dtype=bool)

744 cell = np.zeros((3, 3))

745 npbc = 0

746

747 # Will contain a dictionary of nuclear properties for each atom,

748 # that will later be saved to the parameters dict:

749 nuclear_props = []

750

751 # Info relating to the z-matrix definition (if set)

752 zmatrix_type = False

753 zmatrix_contents = ""

754 zmatrix_var_section = False

755 zmatrix_vars = ""

756

757 for line in molspec_section:

758 # Remove any comments and replace '/' and ',' with whitespace,

759 # as these are equivalent:

760 line = line.split('!')[0].replace('/', ' ').replace(',', ' ')

761 if (line.split()):

762 if zmatrix_type:

763 # Save any variables set when defining the z-matrix:

764 if zmatrix_var_section:

765 zmatrix_vars += line.strip() + '\n'

766 continue

767 elif 'variables' in line.lower():

768 zmatrix_var_section = True

769 continue

770 elif 'constants' in line.lower():

771 zmatrix_var_section = True

772 warnings.warn("Constants in the optimisation are "

773 "not currently supported. Instead "

774 "setting constants as variables.")

775 continue

776

777 symbol, pos = _get_atoms_info(line)

778 current_nuclear_props = _get_nuclear_props(line)

779

780 if not zmatrix_type:

781 pos = _get_cartesian_atom_coords(symbol, pos)

782 if pos is None:

783 zmatrix_type = True

784

785 if symbol.upper() == 'TV' and pos is not None:

786 pbc[npbc] = True

787 cell[npbc] = pos

788 npbc += 1

789 else:

790 nuclear_props.append(current_nuclear_props)

791 if not zmatrix_type:

792 symbols.append(symbol)

793 positions.append(pos)

794

795 if zmatrix_type:

796 zmatrix_contents += _get_zmatrix_line(line)

797

798 # Now that we are past the molecule spec. section, we can read

799 # the entire z-matrix (if set):

800 if len(positions) == 0:

801 if zmatrix_type:

802 if zmatrix_vars == '':

803 zmatrix_vars = None

804 positions, symbols = _read_zmatrix(

805 zmatrix_contents, zmatrix_vars)

806

807 try:

808 atoms = Atoms(symbols, positions, pbc=pbc, cell=cell)

809 except (IndexError, ValueError, KeyError) as e:

810 raise ParseError("ERROR: Could not read the Gaussian input file, "

811 "due to a problem with the molecule "

812 "specification: {}".format(e))

813

814 nuclear_props = _get_nuclear_props_for_all_atoms(nuclear_props)

815

816 return atoms, nuclear_props

817

818

819def _get_readiso_param(parameters):

820 ''' Returns a tuple containing the frequency

821 keyword and its options, if the frequency keyword is

822 present in parameters and ReadIso is one of its options'''

823 freq_options = parameters.get('freq', None)

824 if freq_options:

825 freq_name = 'freq'

826 else:

827 freq_options = parameters.get('frequency', None)

828 freq_name = 'frequency'

829 if freq_options is not None:

830 if ('readiso' or 'readisotopes') in freq_options:

831 return freq_name, freq_options

832 return None, None

833

834

835def _get_readiso_info(line, parameters):

836 '''Reads the temperature, pressure and scale from the first line

837 of a ReadIso section of a Gaussian input file. Returns these in

838 a dictionary.'''

839 readiso_params = {}

840 if _get_readiso_param(parameters)[0] is not None:

841 # when count_iso is 0 we are in the line where

842 # temperature, pressure, [scale] is saved

843 line = line.replace(

844 '[', '').replace(']', '')

845 tokens = line.strip().split()

846 try:

847 readiso_params['temperature'] = tokens[0]

848 readiso_params['pressure'] = tokens[1]

849 readiso_params['scale'] = tokens[2]

850 except IndexError:

851 pass

852 if readiso_params != {}:

853

854 return readiso_params

855

856

857def _delete_readiso_param(parameters):

858 '''Removes the readiso parameter from the parameters dict'''

859 parameters = deepcopy(parameters)

860 freq_name, freq_options = _get_readiso_param(parameters)

861 if freq_name is not None:

862 if 'readisotopes' in freq_options:

863 iso_name = 'readisotopes'

864 else:

865 iso_name = 'readiso'

866 freq_options = [v.group() for v in re.finditer(

867 r'[^\,/\s]+', freq_options)]

868 freq_options.remove(iso_name)

869 new_freq_options = ''

870 for v in freq_options:

871 new_freq_options += v + ' '

872 if new_freq_options == '':

873 new_freq_options = None

874 else:

875 new_freq_options = new_freq_options.strip()

876 parameters[freq_name] = new_freq_options

877 return parameters

878

879

880def _update_readiso_params(parameters, symbols):

881 ''' Deletes the ReadIso option from the route section as we

882 write out the masses in the nuclear properties section

883 instead of the ReadIso section.

884 Ensures the masses array is the same length as the

885 symbols array. This is necessary due to the way the

886 ReadIso section is defined:

887 The mass of each atom is listed on a separate line, in

888 order of appearance in the molecule spec. A blank line

889 indicates not to modify the mass for that atom.

890 But you do not have to leave blank lines equal to the

891 remaining atoms after you finsihed setting masses.

892 E.g. if you had 10 masses and only want to set the mass

893 for the first atom, you don't have to leave 9 blank lines

894 after it.

895 '''

896 parameters = _delete_readiso_param(parameters)

897 if parameters.get('isolist') is not None:

898 if len(parameters['isolist']) < len(symbols):

899 for i in range(0, len(symbols) - len(parameters['isolist'])):

900 parameters['isolist'].append(None)

901 if all(m is None for m in parameters['isolist']):

902 parameters['isolist'] = None

903

904 return parameters

905

906

907def _validate_params(parameters):

908 '''Checks whether all of the required parameters exist in the

909 parameters dict and whether it contains any unsupported settings

910 '''

911 # Check for unsupported settings

912 unsupported_settings = {

913 "z-matrix", "modredun", "modredundant", "addredundant", "addredun",

914 "readopt", "rdopt"}

915

916 for s in unsupported_settings:

917 for v in parameters.values():

918 if v is not None and s in str(v):

919 raise ParseError(

920 "ERROR: Could not read the Gaussian input file"

921 ", as the option: {} is currently unsupported."

922 .format(s))

923

924 for k in list(parameters.keys()):

925 if "popt" in k:

926 parameters["opt"] = parameters.pop(k)

927 warnings.warn("The option {} is currently unsupported. "

928 "This has been replaced with {}."

929 .format("POpt", "opt"))

930 return

931

932

933def _get_extra_section_params(extra_section, parameters, atoms):

934 ''' Takes a list of strings: extra_section, which contains

935 the 'extra' lines in a gaussian input file. Also takes the parameters

936 that have been read so far, and the atoms that have been read from the

937 file.

938 Returns an updated version of the parameters dict, containing details from

939 this extra section. This may include the basis set definition or filename,

940 and/or the readiso section.'''

941

942 new_parameters = deepcopy(parameters)

943 # Will store the basis set definition (if set):

944 basis_set = ""

945 # This will indicate whether we have a readiso section:

946 readiso = _get_readiso_param(new_parameters)[0]

947 # This will indicate which line of the readiso section we are reading:

948 count_iso = 0

949 readiso_masses = []

950

951 for line in extra_section:

952 if line.split():

953 # check that the line isn't just a comment

954 if line.split()[0] == '!':

955 continue

956 line = line.strip().split('!')[0]

957

958 if len(line) > 0 and line[0] == '@':

959 # If the name of a basis file is specified, this line

960 # begins with a '@'

961 new_parameters['basisfile'] = line

962 elif readiso and count_iso < len(atoms.symbols) + 1:

963 # The ReadIso section has 1 more line than the number of

964 # symbols

965 if count_iso == 0 and line != '\n':

966 # The first line in the readiso section contains the

967 # temperature, pressure, scale. Here we save this:

968 readiso_info = _get_readiso_info(line, new_parameters)

969 if readiso_info is not None:

970 new_parameters.update(readiso_info)

971 # If the atom masses were set in the nuclear properties

972 # section, they will be overwritten by the ReadIso

973 # section

974 readiso_masses = []

975 count_iso += 1

976 elif count_iso > 0:

977 # The remaining lines in the ReadIso section are

978 # the masses of the atoms

979 try:

980 readiso_masses.append(float(line))

981 except ValueError:

982 readiso_masses.append(None)

983 count_iso += 1

984 else:

985 # If the rest of the file is not the ReadIso section,

986 # then it must be the definition of the basis set.

987 if new_parameters.get('basis', '') == 'gen' \

988 or 'gen' in new_parameters:

989 if line.strip() != '':

990 basis_set += line + '\n'

991

992 if readiso:

993 new_parameters['isolist'] = readiso_masses

994 new_parameters = _update_readiso_params(new_parameters, atoms.symbols)

995

996 # Saves the basis set definition to the parameters array if

997 # it has been set:

998 if basis_set != '':

999 new_parameters['basis_set'] = basis_set

1000 new_parameters['basis'] = 'gen'

1001 new_parameters.pop('gen', None)

1002

1003 return new_parameters

1004

1005

1006def _get_gaussian_in_sections(fd):

1007 ''' Reads a gaussian input file and returns

1008 a dictionary of the sections of the file - each dictionary

1009 value is a list of strings - each string is a line in that

1010 section. '''

1011 # These variables indicate which section of the

1012 # input file is currently being read:

1013 route_section = False

1014 atoms_section = False

1015 atoms_saved = False

1016 # Here we will store the sections of the file in a dictionary,

1017 # as lists of strings

1018 gaussian_sections = {'link0': [], 'route': [],

1019 'charge_mult': [], 'mol_spec': [], 'extra': []}

1020

1021 for line in fd:

1022 line = line.strip(' ')

1023 link0_match = _re_link0.match(line)

1024 output_type_match = _re_output_type.match(line)

1025 chgmult_match = _re_chgmult.match(line)

1026 if link0_match:

1027 gaussian_sections['link0'].append(line)

1028 # The first blank line appears at the end of the route section

1029 # and a blank line appears at the end of the atoms section:

1030 elif line == '\n' and (route_section or atoms_section):

1031 route_section = False

1032 atoms_section = False

1033 elif output_type_match or route_section:

1034 route_section = True

1035 gaussian_sections['route'].append(line)

1036 elif chgmult_match:

1037 gaussian_sections['charge_mult'] = line

1038 # After the charge and multiplicty have been set, the

1039 # molecule specification section of the input file begins:

1040 atoms_section = True

1041 elif atoms_section:

1042 gaussian_sections['mol_spec'].append(line)

1043 atoms_saved = True

1044 elif atoms_saved:

1045 # Next we read the other sections below the molecule spec.

1046 # This may include the ReadIso section and the basis set

1047 # definition or filename

1048 gaussian_sections['extra'].append(line)

1049

1050 return gaussian_sections

1051

1052

1053class GaussianConfiguration:

1054

1055 def __init__(self, atoms, parameters):

1056 self.atoms = atoms.copy()

1057 self.parameters = deepcopy(parameters)

1058

1059 def get_atoms(self):

1060 return self.atoms

1061

1062 def get_parameters(self):

1063 return self.parameters

1064

1065 def get_calculator(self):

1066 # Explicitly set parameters that must for security reasons not be

1067 # taken from file:

1068 calc = Gaussian(atoms=self.atoms, command=None, restart=None,

1069 ignore_bad_restart_file=Calculator._deprecated,

1070 label='Gaussian', directory='.', **self.parameters)

1071 return calc

1072

1073 @ staticmethod

1074 def parse_gaussian_input(fd):

1075 '''Reads a gaussian input file into an atoms object and

1076 parameters dictionary.

1077

1078 Parameters

1079 ----------

1080 fd: file-like

1081 Contains the contents of a gaussian input file

1082

1083 Returns

1084 ---------

1085 GaussianConfiguration

1086 Contains an atoms object created using the structural

1087 information from the input file.

1088 Contains a parameters dictionary, which stores any

1089 keywords and options found in the link-0 and route

1090 sections of the input file.

1091 '''

1092 # The parameters dict to attach to the calculator

1093 parameters = {}

1094

1095 file_sections = _get_gaussian_in_sections(fd)

1096

1097 # Update the parameters dictionary with the keywords and values

1098 # from each section of the input file:

1099 parameters.update(_get_all_link0_params(file_sections['link0']))

1100 parameters.update(_get_all_route_params(file_sections['route']))

1101 parameters.update(_get_charge_mult(file_sections['charge_mult']))

1102 atoms, nuclear_props = _get_atoms_from_molspec(

1103 file_sections['mol_spec'])

1104 parameters.update(nuclear_props)

1105 parameters.update(_get_extra_section_params(

1106 file_sections['extra'], parameters, atoms))

1107

1108 _validate_params(parameters)

1109

1110 return GaussianConfiguration(atoms, parameters)

1111

1112

1113def read_gaussian_in(fd, attach_calculator=False):

1114 '''

1115 Reads a gaussian input file and returns an Atoms object.

1116

1117 Parameters

1118 ----------

1119 fd: file-like

1120 Contains the contents of a gaussian input file

1121

1122 attach_calculator: bool

1123 When set to ``True``, a Gaussian calculator will be

1124 attached to the Atoms object which is returned.

1125 This will mean that additional information is read

1126 from the input file (see below for details).

1127

1128 Returns

1129 ----------

1130 Atoms

1131 An Atoms object containing the following information that has been

1132 read from the input file: symbols, positions, cell.

1133

1134 Notes

1135 ----------

1136 Gaussian input files can be read where the atoms' locations are set with

1137 cartesian coordinates or as a z-matrix. Variables may be used in the

1138 z-matrix definition, but currently setting constants for constraining

1139 a geometry optimisation is not supported. Note that the `alternative`

1140 z-matrix definition where two bond angles are set instead of a bond angle

1141 and a dihedral angle is not currently supported.

1142

1143 If the parameter ``attach_calculator`` is set to ``True``, then the Atoms

1144 object is returned with a Gaussian calculator attached.

1145

1146 This Gaussian calculator will contain a parameters dictionary which will

1147 contain the Link 0 commands and the options and keywords set in the route

1148 section of the Gaussian input file, as well as:

1149

1150 • The charge and multiplicity

1151

1152 • The selected level of output

1153

1154 • The method, basis set and (optional) fitting basis set.

1155

1156 • Basis file name/definition if set

1157

1158 • Nuclear properties

1159

1160 • Masses set in the nuclear properties section or in the ReadIsotopes

1161 section (if ``freq=ReadIso`` is set). These will be saved in an array

1162 with keyword ``isolist``, in the parameters dictionary. For these to

1163 actually be used in calculations and/or written out to input files,

1164 the Atoms object's masses must be manually updated to these values

1165 (this is not done automatically)

1166

1167 If the Gaussian input file has been generated by ASE's

1168 ``write_gaussian_in`` method, then the basis set, method and fitting

1169 basis will be saved under the ``basis``, ``method`` and ``fitting_basis``

1170 keywords in the parameters dictionary. Otherwise they are saved as

1171 keywords in the parameters dict.

1172

1173 Currently this does not support reading of any other sections which would

1174 be found below the molecule specification that have not been mentioned

1175 here (such as the ModRedundant section).

1176 '''

1177 gaussian_input = GaussianConfiguration.parse_gaussian_input(fd)

1178 atoms = gaussian_input.get_atoms()

1179

1180 if attach_calculator:

1181 atoms.calc = gaussian_input.get_calculator()

1182

1183 return atoms

1184

1185

1186# In the interest of using the same RE for both atomic positions and forces,

1187# we make one of the columns optional. That's because atomic positions have

1188# 6 columns, while forces only has 5 columns. Otherwise they are very similar.

1189_re_atom = re.compile(

1190 r'^\s*\S+\s+(\S+)\s+(?:\S+\s+)?(\S+)\s+(\S+)\s+(\S+)\s*$'

1191)

1192_re_forceblock = re.compile(r'^\s*Center\s+Atomic\s+Forces\s+\S+\s*$')

1193_re_l716 = re.compile(r'^\s*$Enter .+l716.exe$$')

1194

1195

1196def _compare_merge_configs(configs, new):

1197 """Append new to configs if it contains a new geometry or new data.

1198

1199 Gaussian sometimes repeats a geometry, for example at the end of an

1200 optimization, or when a user requests vibrational frequency

1201 analysis in the same calculation as a geometry optimization.

1202

1203 In those cases, rather than repeating the structure in the list of

1204 returned structures, try to merge results if doing so doesn't change

1205 any previously calculated values. If that's not possible, then create

1206 a new "image" with the new results.

1207 """

1208 if not configs:

1209 configs.append(new)

1210 return

1211

1212 old = configs[-1]

1213

1214 if old != new:

1215 configs.append(new)

1216 return

1217

1218 oldres = old.calc.results

1219 newres = new.calc.results

1220 common_keys = set(oldres).intersection(newres)

1221

1222 for key in common_keys:

1223 if np.any(oldres[key] != newres[key]):

1224 configs.append(new)

1225 return

1226 else:

1227 oldres.update(newres)

1228

1229

1230def read_gaussian_out(fd, index=-1):

1231 """Reads a gaussian output file and returns an Atoms object."""

1232 configs = []

1233 atoms = None

1234 energy = None

1235 dipole = None

1236 forces = None

1237 orientation = None # Orientation of the coordinates stored in atoms

1238 for line in fd:

1239 line = line.strip()

1240 if line.startswith(r'1\1\GINC'):

1241 # We've reached the "archive" block at the bottom, stop parsing

1242 break

1243

1244 if (line == 'Input orientation:'

1245 or line == 'Z-Matrix orientation:'

1246 or line == "Standard orientation:"):

1247 if atoms is not None:

1248 # Add configuration to the currently-parsed list

1249 # only after an energy or force has been parsed

1250 # (we assume these are in the output file)

1251 if energy is None:

1252 continue

1253 # "atoms" should store the first geometry encountered

1254 # in the input file which is often the input orientation,

1255 # which is the orientation for forces.

1256 # If there are forces and the orientation of atoms is not

1257 # the input coordinate system, warn the user

1258 if orientation != "Input" and forces is not None:

1259 logger.warning('Configuration geometry is not in the input'

1260 f'orientation. It is {orientation}')

1261 atoms.calc = SinglePointCalculator(

1262 atoms, energy=energy, dipole=dipole, forces=forces,

1263 )

1264 _compare_merge_configs(configs, atoms)

1265 atoms = None

1266 orientation = line.split()[0] # Store the orientation

1267 energy = None

1268 dipole = None

1269 forces = None

1270

1271 numbers = []

1272 positions = []

1273 pbc = np.zeros(3, dtype=bool)

1274 cell = np.zeros((3, 3))

1275 npbc = 0

1276 # skip 4 irrelevant lines

1277 for _ in range(4):

1278 fd.readline()

1279 while True:

1280 match = _re_atom.match(fd.readline())

1281 if match is None:

1282 break

1283 number = int(match.group(1))

1284 pos = list(map(float, match.group(2, 3, 4)))

1285 if number == -2:

1286 pbc[npbc] = True

1287 cell[npbc] = pos

1288 npbc += 1

1289 else:

1290 numbers.append(max(number, 0))

1291 positions.append(pos)

1292 atoms = Atoms(numbers, positions, pbc=pbc, cell=cell)

1293 elif (line.startswith('Energy=')

1294 or line.startswith('SCF Done:')):

1295 # Some semi-empirical methods (Huckel, MINDO3, etc.),

1296 # or SCF methods (HF, DFT, etc.)

1297 energy = float(line.split('=')[1].split()[0].replace('D', 'e'))

1298 energy *= Hartree

1299 elif (line.startswith('E2 =') or line.startswith('E3 =')

1300 or line.startswith('E4(') or line.startswith('DEMP5 =')

1301 or line.startswith('E2(')):

1302 # MP{2,3,4,5} energy

1303 # also some double hybrid calculations, like B2PLYP

1304 energy = float(line.split('=')[-1].strip().replace('D', 'e'))

1305 energy *= Hartree

1306 elif line.startswith('Wavefunction amplitudes converged. E(Corr)'):

1307 # "correlated method" energy, e.g. CCSD

1308 energy = float(line.split('=')[-1].strip().replace('D', 'e'))

1309 energy *= Hartree

1310 elif line.startswith('CCSD(T)='):

1311 # CCSD(T) energy

1312 energy = float(line.split('=')[-1].strip().replace('D', 'e'))

1313 energy *= Hartree

1314 elif _re_l716.match(line):

1315 # Sometimes Gaussian will print "Rotating derivatives to

1316 # standard orientation" after the matched line (which looks like

1317 # "(Enter /opt/gaussian/g16/l716.exe)", though the exact path

1318 # depends on where Gaussian is installed). We *skip* the dipole

1319 # in this case, because it might be rotated relative to the input

1320 # orientation (and also it is numerically different even if the

1321 # standard orientation is the same as the input orientation).

1322 line = fd.readline().strip()

1323 if not line.startswith('Dipole'):

1324 continue

1325 dip = line.split('=')[1].replace('D', 'e')

1326 tokens = dip.split()

1327 dipole = []

1328 # dipole elements can run together, depending on what method was

1329 # used to calculate them. First see if there is a space between

1330 # values.

1331 if len(tokens) == 3:

1332 dipole = list(map(float, tokens))

1333 elif len(dip) % 3 == 0:

1334 # next, check if the number of tokens is divisible by 3

1335 nchars = len(dip) // 3

1336 for i in range(3):

1337 dipole.append(float(dip[nchars * i:nchars * (i + 1)]))

1338 else:

1339 # otherwise, just give up on trying to parse it.

1340 dipole = None

1341 continue

1342 # this dipole moment is printed in atomic units, e-Bohr

1343 # ASE uses e-Angstrom for dipole moments.

1344 dipole = np.array(dipole) * Bohr

1345 elif _re_forceblock.match(line):

1346 # skip 2 irrelevant lines

1347 fd.readline()

1348 fd.readline()

1349 forces = []

1350 while True:

1351 match = _re_atom.match(fd.readline())

1352 if match is None:

1353 break

1354 forces.append(list(map(float, match.group(2, 3, 4))))

1355 forces = np.array(forces) * Hartree / Bohr

1356 if atoms is not None:

1357 atoms.calc = SinglePointCalculator(

1358 atoms, energy=energy, dipole=dipole, forces=forces,

1359 )

1360 _compare_merge_configs(configs, atoms)

1361 return configs[index]