Coverage for /builds/kinetik161/ase/ase/io/formats.py: 89.75%

1"""File formats.

3This module implements the read(), iread() and write() functions in ase.io.

4For each file format there is an IOFormat object.

6There is a dict, ioformats, which stores the objects.

8Example

9=======

11The xyz format is implemented in the ase/io/xyz.py file which has a

12read_xyz() generator and a write_xyz() function. This and other

13information can be obtained from ioformats['xyz'].

14"""

16import functools

17import inspect

18import io

19import numbers

20import os

21import re

22import sys

23import warnings

24from pathlib import Path, PurePath

25from typing import (IO, Any, Dict, Iterable, List, Optional, Sequence, Tuple,

26 Union)

28from importlib.metadata import entry_points

29from importlib import import_module

31from ase.atoms import Atoms

32from ase.parallel import parallel_function, parallel_generator

33from ase.utils.plugins import ExternalIOFormat

35PEEK_BYTES = 50000

38class UnknownFileTypeError(Exception):

39 pass

42class IOFormat:

43 def __init__(self, name: str, desc: str, code: str, module_name: str,

44 encoding: str = None) -> None:

45 self.name = name

46 self.description = desc

47 assert len(code) == 2

48 assert code[0] in list('+1')

49 assert code[1] in list('BFS')

50 self.code = code

51 self.module_name = module_name

52 self.encoding = encoding

54 # (To be set by define_io_format())

55 self.extensions: List[str] = []

56 self.globs: List[str] = []

57 self.magic: List[str] = []

58 self.magic_regex: Optional[bytes] = None

60 def open(self, fname, mode: str = 'r') -> IO:

61 # We might want append mode, too

62 # We can allow more flags as needed (buffering etc.)

63 if mode not in list('rwa'):

64 raise ValueError("Only modes allowed are 'r', 'w', and 'a'")

65 if mode == 'r' and not self.can_read:

66 raise NotImplementedError('No reader implemented for {} format'

67 .format(self.name))

68 if mode == 'w' and not self.can_write:

69 raise NotImplementedError('No writer implemented for {} format'

70 .format(self.name))

71 if mode == 'a' and not self.can_append:

72 raise NotImplementedError('Appending not supported by {} format'

73 .format(self.name))

75 if self.isbinary:

76 mode += 'b'

78 path = Path(fname)

79 return path.open(mode, encoding=self.encoding)

81 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO:

82 encoding = self.encoding

83 if encoding is None:

84 encoding = 'utf-8' # Best hacky guess.

86 if self.isbinary:

87 if isinstance(data, str):

88 data = data.encode(encoding)

89 else:

90 if isinstance(data, bytes):

91 data = data.decode(encoding)

93 return self._ioclass(data)

95 @property

96 def _ioclass(self):

97 if self.isbinary:

98 return io.BytesIO

99 else:

100 return io.StringIO

101

102 def parse_images(self, data: Union[str, bytes],

103 **kwargs) -> Sequence[Atoms]:

104 with self._buf_as_filelike(data) as fd:

105 outputs = self.read(fd, **kwargs)

106 if self.single:

107 assert isinstance(outputs, Atoms)

108 return [outputs]

109 else:

110 return list(self.read(fd, **kwargs))

111

112 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms:

113 images = self.parse_images(data, **kwargs)

114 return images[-1]

115

116 @property

117 def can_read(self) -> bool:

118 return self._readfunc() is not None

119

120 @property

121 def can_write(self) -> bool:

122 return self._writefunc() is not None

123

124 @property

125 def can_append(self) -> bool:

126 writefunc = self._writefunc()

127 return self.can_write and 'append' in writefunc.__code__.co_varnames

128

129 def __repr__(self) -> str:

130 tokens = [f'{name}={repr(value)}'

131 for name, value in vars(self).items()]

132 return 'IOFormat({})'.format(', '.join(tokens))

133

134 def __getitem__(self, i):

135 # For compatibility.

136 #

137 # Historically, the ioformats were listed as tuples

138 # with (description, code). We look like such a tuple.

139 return (self.description, self.code)[i]

140

141 @property

142 def single(self) -> bool:

143 """Whether this format is for a single Atoms object."""

144 return self.code[0] == '1'

145

146 @property

147 def _formatname(self) -> str:

148 return self.name.replace('-', '_')

149

150 def _readfunc(self):

151 return getattr(self.module, 'read_' + self._formatname, None)

152

153 def _writefunc(self):

154 return getattr(self.module, 'write_' + self._formatname, None)

155

156 @property

157 def read(self):

158 if not self.can_read:

159 self._warn_none('read')

160 return None

161

162 return self._read_wrapper

163

164 def _read_wrapper(self, *args, **kwargs):

165 function = self._readfunc()

166 if function is None:

167 self._warn_none('read')

168 return None

169 if not inspect.isgeneratorfunction(function):

170 function = functools.partial(wrap_read_function, function)

171 return function(*args, **kwargs)

172

173 def _warn_none(self, action):

174 msg = ('Accessing the IOFormat.{action} property on a format '

175 'without {action} support will change behaviour in the '

176 'future and return a callable instead of None. '

177 'Use IOFormat.can_{action} to check whether {action} '

178 'is supported.')

179 warnings.warn(msg.format(action=action), FutureWarning)

180

181 @property

182 def write(self):

183 if not self.can_write:

184 self._warn_none('write')

185 return None

186

187 return self._write_wrapper

188

189 def _write_wrapper(self, *args, **kwargs):

190 function = self._writefunc()

191 if function is None:

192 raise ValueError(f'Cannot write to {self.name}-format')

193 return function(*args, **kwargs)

194

195 @property

196 def modes(self) -> str:

197 modes = ''

198 if self.can_read:

199 modes += 'r'

200 if self.can_write:

201 modes += 'w'

202 return modes

203

204 def full_description(self) -> str:

205 lines = [f'Name: {self.name}',

206 f'Description: {self.description}',

207 f'Modes: {self.modes}',

208 f'Encoding: {self.encoding}',

209 f'Module: {self.module_name}',

210 f'Code: {self.code}',

211 f'Extensions: {self.extensions}',

212 f'Globs: {self.globs}',

213 f'Magic: {self.magic}']

214 return '\n'.join(lines)

215

216 @property

217 def acceptsfd(self) -> bool:

218 return self.code[1] != 'S'

219

220 @property

221 def isbinary(self) -> bool:

222 return self.code[1] == 'B'

223

224 @property

225 def module(self):

226 try:

227 return import_module(self.module_name)

228 except ImportError as err:

229 raise UnknownFileTypeError(

230 f'File format not recognized: {self.name}. Error: {err}')

231

232 def match_name(self, basename: str) -> bool:

233 from fnmatch import fnmatch

234 return any(fnmatch(basename, pattern)

235 for pattern in self.globs)

236

237 def match_magic(self, data: bytes) -> bool:

238 if self.magic_regex:

239 assert not self.magic, 'Define only one of magic and magic_regex'

240 match = re.match(self.magic_regex, data, re.M | re.S)

241 return match is not None

242

243 from fnmatch import fnmatchcase

244 return any(

245 fnmatchcase(data, magic + b'*') # type: ignore[operator, type-var]

246 for magic in self.magic

247 )

248

249

250ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time.

251extension2format = {}

252

253

254all_formats = ioformats # Aliased for compatibility only. Please do not use.

255format2modulename = {} # Left for compatibility only.

256

257

258def define_io_format(name, desc, code, *, module=None, ext=None,

259 glob=None, magic=None, encoding=None,

260 magic_regex=None, external=False):

261 if module is None:

262 module = name.replace('-', '_')

263 format2modulename[name] = module

264

265 if not external:

266 module = 'ase.io.' + module

267

268 def normalize_patterns(strings):

269 if strings is None:

270 strings = []

271 elif isinstance(strings, (str, bytes)):

272 strings = [strings]

273 else:

274 strings = list(strings)

275 return strings

276

277 fmt = IOFormat(name, desc, code, module_name=module,

278 encoding=encoding)

279 fmt.extensions = normalize_patterns(ext)

280 fmt.globs = normalize_patterns(glob)

281 fmt.magic = normalize_patterns(magic)

282

283 if magic_regex is not None:

284 fmt.magic_regex = magic_regex

285

286 for ext in fmt.extensions:

287 if ext in extension2format:

288 raise ValueError(f'extension "{ext}" already registered')

289 extension2format[ext] = fmt

290

291 ioformats[name] = fmt

292 return fmt

293

294

295def get_ioformat(name: str) -> IOFormat:

296 """Return ioformat object or raise appropriate error."""

297 if name not in ioformats:

298 raise UnknownFileTypeError(name)

299 fmt = ioformats[name]

300 # Make sure module is importable, since this could also raise an error.

301 fmt.module

302 return ioformats[name]

303

304

305def register_external_io_formats(group):

306 if hasattr(entry_points(), 'select'):

307 fmt_entry_points = entry_points().select(group=group)

308 else:

309 fmt_entry_points = entry_points().get(group, ())

310

311 for entry_point in fmt_entry_points:

312 try:

313 define_external_io_format(entry_point)

314 except Exception as exc:

315 warnings.warn(

316 'Failed to register external '

317 f'IO format {entry_point.name}: {exc}'

318 )

319

320

321def define_external_io_format(entry_point):

322

323 fmt = entry_point.load()

324 if entry_point.name in ioformats:

325 raise ValueError(f'Format {entry_point.name} already defined')

326 if not isinstance(fmt, ExternalIOFormat):

327 raise TypeError('Wrong type for registering external IO formats '

328 f'in format {entry_point.name}, expected '

329 'ExternalIOFormat')

330 F(entry_point.name, **fmt._asdict(), external=True)

331

332

333# We define all the IO formats below. Each IO format has a code,

334# such as '1F', which defines some of the format's properties:

335#

336# 1=single atoms object

337# +=multiple atoms objects

338# F=accepts a file-descriptor

339# S=needs a file-name str

340# B=like F, but opens in binary mode

341

342F = define_io_format

343F('abinit-gsr', 'ABINIT GSR file', '1S',

344 module='abinit', glob='*o_GSR.nc')

345F('abinit-in', 'ABINIT input file', '1F',

346 module='abinit', magic=b'*znucl *')

347F('abinit-out', 'ABINIT output file', '1F',

348 module='abinit', magic=b'*.Version * of ABINIT')

349F('aims', 'FHI-aims geometry file', '1S', ext='in')

350F('aims-output', 'FHI-aims output', '+S',

351 module='aims', magic=b'*Invoking FHI-aims ...')

352F('bundletrajectory', 'ASE bundle trajectory', '+S')

353F('castep-castep', 'CASTEP output file', '+F',

354 module='castep', ext='castep')

355F('castep-cell', 'CASTEP geom file', '1F',

356 module='castep', ext='cell')

357F('castep-geom', 'CASTEP trajectory file', '+F',

358 module='castep', ext='geom')

359F('castep-md', 'CASTEP molecular dynamics file', '+F',

360 module='castep', ext='md')

361F('castep-phonon', 'CASTEP phonon file', '1F',

362 module='castep', ext='phonon')

363F('cfg', 'AtomEye configuration', '1F')

364F('cif', 'CIF-file', '+B', ext='cif')

365F('cmdft', 'CMDFT-file', '1F', glob='*I_info')

366F('cjson', 'Chemical json file', '1F', ext='cjson')

367F('cp2k-dcd', 'CP2K DCD file', '+B',

368 module='cp2k', ext='dcd')

369F('cp2k-restart', 'CP2K restart file', '1F',

370 module='cp2k', ext='restart')

371F('crystal', 'Crystal fort.34 format', '1F',

372 ext=['f34', '34'], glob=['f34', '34'])

373F('cube', 'CUBE file', '1F', ext='cube')

374F('dacapo-text', 'Dacapo text output', '1F',

375 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n')

376F('db', 'ASE SQLite database file', '+S')

377F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry')

378F('dlp4', 'DL_POLY_4 CONFIG file', '1F',

379 module='dlp4', ext='config', glob=['*CONFIG*'])

380F('dlp-history', 'DL_POLY HISTORY file', '+F',

381 module='dlp4', glob='HISTORY')

382F('dmol-arc', 'DMol3 arc file', '+S',

383 module='dmol', ext='arc')

384F('dmol-car', 'DMol3 structure file', '1S',

385 module='dmol', ext='car')

386F('dmol-incoor', 'DMol3 structure file', '1S',

387 module='dmol')

388F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F',

389 glob=['GEOMETRY.OUT'])

390F('elk-in', 'ELK input file', '1F', module='elk')

391F('eon', 'EON CON file', '+F',

392 ext='con')

393F('eps', 'Encapsulated Postscript', '1S')

394F('espresso-in', 'Quantum espresso in file', '1F',

395 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM'])

396F('espresso-out', 'Quantum espresso out file', '+F',

397 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF')

398F('exciting', 'exciting input', '1F', module='exciting', glob='input.xml')

399F('exciting', 'exciting output', '1F', module='exciting', glob='INFO.out')

400F('extxyz', 'Extended XYZ file', '+F', ext='xyz')

401F('findsym', 'FINDSYM-format', '+F')

402F('gamess-us-out', 'GAMESS-US output file', '1F',

403 module='gamess_us', magic=b'*GAMESS')

404F('gamess-us-in', 'GAMESS-US input file', '1F',

405 module='gamess_us')

406F('gamess-us-punch', 'GAMESS-US punchcard file', '1F',

407 module='gamess_us', magic=b' $DATA', ext='dat')

408F('gaussian-in', 'Gaussian com (input) file', '1F',

409 module='gaussian', ext=['com', 'gjf'])

410F('gaussian-out', 'Gaussian output file', '+F',

411 module='gaussian', ext='log', magic=b'*Entering Gaussian System')

412F('acemolecule-out', 'ACE output file', '1S',

413 module='acemolecule')

414F('acemolecule-input', 'ACE input file', '1S',

415 module='acemolecule')

416F('gen', 'DFTBPlus GEN format', '1F')

417F('gif', 'Graphics interchange format', '+S',

418 module='animation')

419F('gpaw-out', 'GPAW text output', '+F',

420 magic=b'* ___ ___ ___ _ _ _')

421F('gpumd', 'GPUMD input file', '1F', glob='xyz.in')

422F('gpw', 'GPAW restart-file', '1S',

423 magic=[b'- of UlmGPAW', b'AFFormatGPAW'])

424F('gromacs', 'Gromacs coordinates', '1F',

425 ext='gro')

426F('gromos', 'Gromos96 geometry file', '1F', ext='g96')

427F('html', 'X3DOM HTML', '1F', module='x3d')

428F('json', 'ASE JSON database file', '+F', ext='json', module='db')

429F('jsv', 'JSV file format', '1F')

430F('lammps-dump-text', 'LAMMPS text dump file', '+F',

431 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$')

432F('lammps-dump-binary', 'LAMMPS binary dump file', '+B',

433 module='lammpsrun')

434F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata',

435 encoding='ascii')

436F('magres', 'MAGRES ab initio NMR data file', '1F')

437F('mol', 'MDL Molfile', '1F')

438F('mp4', 'MP4 animation', '+S',

439 module='animation')

440F('mustem', 'muSTEM xtl file', '1F',

441 ext='xtl')

442F('mysql', 'ASE MySQL database file', '+S',

443 module='db')

444F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S',

445 magic=b'CDF')

446F('nomad-json', 'JSON from Nomad archive', '+F',

447 ext='nomad-json')

448F('nwchem-in', 'NWChem input file', '1F',

449 module='nwchem', ext='nwi')

450F('nwchem-out', 'NWChem output file', '+F',

451 module='nwchem', ext='nwo',

452 magic=b'*Northwest Computational Chemistry Package')

453F('octopus-in', 'Octopus input file', '1F',

454 module='octopus', glob='inp')

455F('onetep-out', 'ONETEP output file', '+F',

456 module='onetep',

457 magic=b'*Linear-Scaling Ab Initio Total Energy Program*')

458F('onetep-in', 'ONETEP input file', '1F',

459 module='onetep',

460 magic=[b'*lock species ',

461 b'*LOCK SPECIES ',

462 b'*--- INPUT FILE ---*'])

463F('proteindatabank', 'Protein Data Bank', '+F',

464 ext='pdb')

465F('png', 'Portable Network Graphics', '1B')

466F('postgresql', 'ASE PostgreSQL database file', '+S', module='db')

467F('pov', 'Persistance of Vision', '1S')

468# prismatic: Should have ext='xyz' if/when multiple formats can have the same

469# extension

470F('prismatic', 'prismatic and computem XYZ-file', '1F')

471F('py', 'Python file', '+F')

472F('sys', 'qball sys file', '1F')

473F('qbox', 'QBOX output file', '+F',

474 magic=b'*:simulation xmlns:')

475F('res', 'SHELX format', '1S', ext='shelx')

476F('rmc6f', 'RMCProfile', '1S', ext='rmc6f')

477F('sdf', 'SDF format', '1F')

478F('siesta-xv', 'Siesta .XV file', '1F',

479 glob='*.XV', module='siesta')

480F('struct', 'WIEN2k structure file', '1S', module='wien2k')

481F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta')

482F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj',

483 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory'])

484F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord',

485 magic=b'$coord')

486F('turbomole-gradient', 'TURBOMOLE gradient file', '+F',

487 module='turbomole', glob='gradient', magic=b'$grad')

488F('v-sim', 'V_Sim ascii file', '1F', ext='ascii')

489F('vasp', 'VASP POSCAR/CONTCAR', '1F',

490 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*'])

491F('vasp-out', 'VASP OUTCAR file', '+F',

492 module='vasp', glob='*OUTCAR*')

493F('vasp-xdatcar', 'VASP XDATCAR file', '+F',

494 module='vasp', glob='*XDATCAR*')

495F('vasp-xml', 'VASP vasprun.xml file', '+F',

496 module='vasp', glob='*vasp*.xml')

497F('vti', 'VTK XML Image Data', '1F', module='vtkxml')

498F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu')

499F('wout', 'Wannier90 output', '1F', module='wannier90')

500F('x3d', 'X3D', '1S')

501F('xsd', 'Materials Studio file', '1F')

502F('xsf', 'XCrySDen Structure File', '+F',

503 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER',

504 b'*\nMOLECULE', b'*\nATOMS'])

505F('xtd', 'Materials Studio file', '+F')

506# xyz: No `ext='xyz'` in the definition below.

507# The .xyz files are handled by the extxyz module by default.

508F('xyz', 'XYZ-file', '+F')

509

510# Register IO formats exposed through the ase.ioformats entry point

511register_external_io_formats('ase.ioformats')

512

513

514def get_compression(filename: str) -> Tuple[str, Optional[str]]:

515 """

516 Parse any expected file compression from the extension of a filename.

517 Return the filename without the extension, and the extension. Recognises

518 ``.gz``, ``.bz2``, ``.xz``.

519

520 >>> get_compression('H2O.pdb.gz')

521 ('H2O.pdb', 'gz')

522 >>> get_compression('crystal.cif')

523 ('crystal.cif', None)

524

525 Parameters

526 ==========

527 filename: str

528 Full filename including extension.

529

530 Returns

531 =======

532 (root, extension): (str, str or None)

533 Filename split into root without extension, and the extension

534 indicating compression format. Will not split if compression

535 is not recognised.

536 """

537 # Update if anything is added

538 valid_compression = ['gz', 'bz2', 'xz']

539

540 # Use stdlib as it handles most edge cases

541 root, compression = os.path.splitext(filename)

542

543 # extension keeps the '.' so remember to remove it

544 if compression.strip('.') in valid_compression:

545 return root, compression.strip('.')

546 else:

547 return filename, None

548

549

550def open_with_compression(filename: str, mode: str = 'r') -> IO:

551 """

552 Wrapper around builtin `open` that will guess compression of a file

553 from the filename and open it for reading or writing as if it were

554 a standard file.

555

556 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma).

557

558 Supported modes are:

559 * 'r', 'rt', 'w', 'wt' for text mode read and write.

560 * 'rb, 'wb' for binary read and write.

561

562 Parameters

563 ==========

564 filename: str

565 Path to the file to open, including any extensions that indicate

566 the compression used.

567 mode: str

568 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'.

569

570 Returns

571 =======

572 fd: file

573 File-like object open with the specified mode.

574 """

575

576 # Compressed formats sometimes default to binary, so force text mode.

577 if mode == 'r':

578 mode = 'rt'

579 elif mode == 'w':

580 mode = 'wt'

581 elif mode == 'a':

582 mode = 'at'

583

584 root, compression = get_compression(filename)

585

586 if compression == 'gz':

587 import gzip

588 return gzip.open(filename, mode=mode) # type: ignore[return-value]

589 elif compression == 'bz2':

590 import bz2

591 return bz2.open(filename, mode=mode)

592 elif compression == 'xz':

593 import lzma

594 return lzma.open(filename, mode)

595 else:

596 # Either None or unknown string

597 return open(filename, mode)

598

599

600def is_compressed(fd: io.BufferedIOBase) -> bool:

601 """Check if the file object is in a compressed format."""

602 compressed = False

603

604 # We'd like to avoid triggering imports unless already imported.

605 # Also, Python can be compiled without e.g. lzma so we need to

606 # protect against that:

607 if 'gzip' in sys.modules:

608 import gzip

609 compressed = compressed or isinstance(fd, gzip.GzipFile)

610 if 'bz2' in sys.modules:

611 import bz2

612 compressed = compressed or isinstance(fd, bz2.BZ2File)

613 if 'lzma' in sys.modules:

614 import lzma

615 compressed = compressed or isinstance(fd, lzma.LZMAFile)

616 return compressed

617

618

619def wrap_read_function(read, filename, index=None, **kwargs):

620 """Convert read-function to generator."""

621 if index is None:

622 yield read(filename, **kwargs)

623 else:

624 yield from read(filename, index, **kwargs)

625

626

627NameOrFile = Union[str, PurePath, IO]

628

629

630def write(

631 filename: NameOrFile,

632 images: Union[Atoms, Sequence[Atoms]],

633 format: str = None,

634 parallel: bool = True,

635 append: bool = False,

636 **kwargs: Any

637) -> None:

638 """Write Atoms object(s) to file.

639

640 filename: str or file

641 Name of the file to write to or a file descriptor. The name '-'

642 means standard output.

643 images: Atoms object or list of Atoms objects

644 A single Atoms object or a list of Atoms objects.

645 format: str

646 Used to specify the file-format. If not given, the

647 file-format will be taken from suffix of the filename.

648 parallel: bool

649 Default is to write on master only. Use parallel=False to write

650 from all slaves.

651 append: bool

652 Default is to open files in 'w' or 'wb' mode, overwriting

653 existing files. In some cases opening the file in 'a' or 'ab'

654 mode (appending) is useful,

655 e.g. writing trajectories or saving multiple Atoms objects in one file.

656 WARNING: If the file format does not support multiple entries without

657 additional keywords/headers, files created using 'append=True'

658 might not be readable by any program! They will nevertheless be

659 written without error message.

660

661 The use of additional keywords is format specific. write() may

662 return an object after writing certain formats, but this behaviour

663 may change in the future.

664

665 """

666

667 if isinstance(filename, PurePath):

668 filename = str(filename)

669

670 if isinstance(filename, str):

671 fd = None

672 if filename == '-':

673 fd = sys.stdout

674 filename = None # type: ignore[assignment]

675 elif format is None:

676 format = filetype(filename, read=False)

677 assert isinstance(format, str)

678 else:

679 fd = filename # type: ignore[assignment]

680 if format is None:

681 try:

682 format = filetype(filename, read=False)

683 assert isinstance(format, str)

684 except UnknownFileTypeError:

685 format = None

686 filename = None # type: ignore[assignment]

687

688 format = format or 'json' # default is json

689

690 io = get_ioformat(format)

691

692 return _write(filename, fd, format, io, images,

693 parallel=parallel, append=append, **kwargs)

694

695

696@parallel_function

697def _write(filename, fd, format, io, images, parallel=None, append=False,

698 **kwargs):

699 if isinstance(images, Atoms):

700 images = [images]

701

702 if io.single:

703 if len(images) > 1:

704 raise ValueError('{}-format can only store 1 Atoms object.'

705 .format(format))

706 images = images[0]

707

708 if not io.can_write:

709 raise ValueError(f"Can't write to {format}-format")

710

711 # Special case for json-format:

712 if format == 'json' and (len(images) > 1 or append):

713 if filename is not None:

714 return io.write(filename, images, append=append, **kwargs)

715 raise ValueError("Can't write more than one image to file-descriptor "

716 'using json-format.')

717

718 if io.acceptsfd:

719 open_new = (fd is None)

720 try:

721 if open_new:

722 mode = 'wb' if io.isbinary else 'w'

723 if append:

724 mode = mode.replace('w', 'a')

725 fd = open_with_compression(filename, mode)

726 # XXX remember to re-enable compressed open

727 # fd = io.open(filename, mode)

728 return io.write(fd, images, **kwargs)

729 finally:

730 if open_new and fd is not None:

731 fd.close()

732 else:

733 if fd is not None:

734 raise ValueError("Can't write {}-format to file-descriptor"

735 .format(format))

736 if io.can_append:

737 return io.write(filename, images, append=append, **kwargs)

738 elif append:

739 raise ValueError("Cannot append to {}-format, write-function "

740 "does not support the append keyword."

741 .format(format))

742 else:

743 return io.write(filename, images, **kwargs)

744

745

746def read(

747 filename: NameOrFile,

748 index: Any = None,

749 format: str = None,

750 parallel: bool = True,

751 do_not_split_by_at_sign: bool = False,

752 **kwargs

753) -> Union[Atoms, List[Atoms]]:

754 """Read Atoms object(s) from file.

755

756 filename: str or file

757 Name of the file to read from or a file descriptor.

758 index: int, slice or str

759 The last configuration will be returned by default. Examples:

760

761 * ``index=0``: first configuration

762 * ``index=-2``: second to last

763 * ``index=':'`` or ``index=slice(None)``: all

764 * ``index='-3:'`` or ``index=slice(-3, None)``: three last

765 * ``index='::2'`` or ``index=slice(0, None, 2)``: even

766 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd

767 format: str

768 Used to specify the file-format. If not given, the

769 file-format will be guessed by the *filetype* function.

770 parallel: bool

771 Default is to read on master and broadcast to slaves. Use

772 parallel=False to read on all slaves.

773 do_not_split_by_at_sign: bool

774 If False (default) ``filename`` is splitted by at sign ``@``

775

776 Many formats allow on open file-like object to be passed instead

777 of ``filename``. In this case the format cannot be auto-detected,

778 so the ``format`` argument should be explicitly given."""

779

780 if isinstance(filename, PurePath):

781 filename = str(filename)

782 if filename == '-':

783 filename = sys.stdin

784 if isinstance(index, str):

785 try:

786 index = string2index(index)

787 except ValueError:

788 pass

789

790 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)

791 if index is None:

792 index = -1

793 format = format or filetype(filename, read=isinstance(filename, str))

794

795 io = get_ioformat(format)

796 if isinstance(index, (slice, str)):

797 return list(_iread(filename, index, format, io, parallel=parallel,

798 **kwargs))

799 else:

800 return next(_iread(filename, slice(index, None), format, io,

801 parallel=parallel, **kwargs))

802

803

804def iread(

805 filename: NameOrFile,

806 index: Any = None,

807 format: str = None,

808 parallel: bool = True,

809 do_not_split_by_at_sign: bool = False,

810 **kwargs

811) -> Iterable[Atoms]:

812 """Iterator for reading Atoms objects from file.

813

814 Works as the `read` function, but yields one Atoms object at a time

815 instead of all at once."""

816

817 if isinstance(filename, PurePath):

818 filename = str(filename)

819

820 if isinstance(index, str):

821 index = string2index(index)

822

823 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)

824

825 if index is None or index == ':':

826 index = slice(None, None, None)

827

828 if not isinstance(index, (slice, str)):

829 index = slice(index, (index + 1) or None)

830

831 format = format or filetype(filename, read=isinstance(filename, str))

832 io = get_ioformat(format)

833

834 yield from _iread(filename, index, format, io, parallel=parallel,

835 **kwargs)

836

837

838@parallel_generator

839def _iread(filename, index, format, io, parallel=None, full_output=False,

840 **kwargs):

841

842 if not io.can_read:

843 raise ValueError(f"Can't read from {format}-format")

844

845 if io.single:

846 start = index.start

847 assert start is None or start == 0 or start == -1

848 args = ()

849 else:

850 args = (index,)

851

852 must_close_fd = False

853 if isinstance(filename, str):

854 if io.acceptsfd:

855 mode = 'rb' if io.isbinary else 'r'

856 fd = open_with_compression(filename, mode)

857 must_close_fd = True

858 else:

859 fd = filename

860 else:

861 assert io.acceptsfd

862 fd = filename

863

864 # Make sure fd is closed in case loop doesn't finish:

865 try:

866 for dct in io.read(fd, *args, **kwargs):

867 if not isinstance(dct, dict):

868 dct = {'atoms': dct}

869 if full_output:

870 yield dct

871 else:

872 yield dct['atoms']

873 finally:

874 if must_close_fd:

875 fd.close()

876

877

878def parse_filename(filename, index=None, do_not_split_by_at_sign=False):

879 if not isinstance(filename, str):

880 return filename, index

881

882 basename = os.path.basename(filename)

883 if do_not_split_by_at_sign or '@' not in basename:

884 return filename, index

885

886 newindex = None

887 newfilename, newindex = filename.rsplit('@', 1)

888

889 if isinstance(index, slice):

890 return newfilename, index

891 try:

892 newindex = string2index(newindex)

893 except ValueError:

894 warnings.warn('Can not parse index for path \n'

895 ' "%s" \nConsider set '

896 'do_not_split_by_at_sign=True \nif '

897 'there is no index.' % filename)

898 return newfilename, newindex

899

900

901def match_magic(data: bytes) -> IOFormat:

902 data = data[:PEEK_BYTES]

903 for ioformat in ioformats.values():

904 if ioformat.match_magic(data):

905 return ioformat

906 raise UnknownFileTypeError('Cannot guess file type from contents')

907

908

909def string2index(string: str) -> Union[int, slice, str]:

910 """Convert index string to either int or slice"""

911 if ':' not in string:

912 # may contain database accessor

913 try:

914 return int(string)

915 except ValueError:

916 return string

917 i: List[Optional[int]] = []

918 for s in string.split(':'):

919 if s == '':

920 i.append(None)

921 else:

922 i.append(int(s))

923 i += (3 - len(i)) * [None]

924 return slice(*i)

925

926

927def filetype(

928 filename: NameOrFile,

929 read: bool = True,

930 guess: bool = True,

931) -> str:

932 """Try to guess the type of the file.

933

934 First, special signatures in the filename will be checked for. If that

935 does not identify the file type, then the first 2000 bytes of the file

936 will be read and analysed. Turn off this second part by using

937 read=False.

938

939 Can be used from the command-line also::

940

941 $ ase info filename ...

942 """

943

944 orig_filename = filename

945 if hasattr(filename, 'name'):

946 filename = filename.name

947

948 ext = None

949 if isinstance(filename, str):

950 if os.path.isdir(filename):

951 if os.path.basename(os.path.normpath(filename)) == 'states':

952 return 'eon'

953 return 'bundletrajectory'

954

955 if filename.startswith('postgres'):

956 return 'postgresql'

957

958 if filename.startswith('mysql') or filename.startswith('mariadb'):

959 return 'mysql'

960

961 # strip any compression extensions that can be read

962 root, compression = get_compression(filename)

963 basename = os.path.basename(root)

964

965 if '.' in basename:

966 ext = os.path.splitext(basename)[1].strip('.').lower()

967

968 for fmt in ioformats.values():

969 if fmt.match_name(basename):

970 return fmt.name

971

972 if not read:

973 if ext is None:

974 raise UnknownFileTypeError('Could not guess file type')

975 ioformat = extension2format.get(ext)

976 if ioformat:

977 return ioformat.name

978

979 # askhl: This is strange, we don't know if ext is a format:

980 return ext

981

982 if orig_filename == filename:

983 fd = open_with_compression(filename, 'rb')

984 else:

985 fd = orig_filename # type: ignore[assignment]

986 else:

987 fd = filename

988 if fd is sys.stdin:

989 return 'json'

990

991 data = fd.read(PEEK_BYTES)

992 if fd is not filename:

993 fd.close()

994 else:

995 fd.seek(0)

996

997 if len(data) == 0:

998 raise UnknownFileTypeError('Empty file: ' + filename)

999

1000 try:

1001 return match_magic(data).name

1002 except UnknownFileTypeError:

1003 pass

1004

1005 format = None

1006 if ext in extension2format:

1007 format = extension2format[ext].name

1008

1009 if format is None and guess:

1010 format = ext

1011 if format is None:

1012 # Do quick xyz check:

1013 lines = data.splitlines()

1014 if lines and lines[0].strip().isdigit():

1015 return extension2format['xyz'].name

1016

1017 raise UnknownFileTypeError('Could not guess file type')

1018 assert isinstance(format, str)

1019 return format

1020

1021

1022def index2range(index, length):

1023 """Convert slice or integer to range.

1024

1025 If index is an integer, range will contain only that integer."""

1026 obj = range(length)[index]

1027 if isinstance(obj, numbers.Integral):

1028 obj = range(obj, obj + 1)

1029 return obj