Coverage for /builds/kinetik161/ase/ase/io/formats.py: 89.75%
556 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-12-10 11:04 +0000
1"""File formats.
3This module implements the read(), iread() and write() functions in ase.io.
4For each file format there is an IOFormat object.
6There is a dict, ioformats, which stores the objects.
8Example
9=======
11The xyz format is implemented in the ase/io/xyz.py file which has a
12read_xyz() generator and a write_xyz() function. This and other
13information can be obtained from ioformats['xyz'].
14"""
16import functools
17import inspect
18import io
19import numbers
20import os
21import re
22import sys
23import warnings
24from pathlib import Path, PurePath
25from typing import (IO, Any, Dict, Iterable, List, Optional, Sequence, Tuple,
26 Union)
28from importlib.metadata import entry_points
29from importlib import import_module
31from ase.atoms import Atoms
32from ase.parallel import parallel_function, parallel_generator
33from ase.utils.plugins import ExternalIOFormat
35PEEK_BYTES = 50000
38class UnknownFileTypeError(Exception):
39 pass
42class IOFormat:
43 def __init__(self, name: str, desc: str, code: str, module_name: str,
44 encoding: str = None) -> None:
45 self.name = name
46 self.description = desc
47 assert len(code) == 2
48 assert code[0] in list('+1')
49 assert code[1] in list('BFS')
50 self.code = code
51 self.module_name = module_name
52 self.encoding = encoding
54 # (To be set by define_io_format())
55 self.extensions: List[str] = []
56 self.globs: List[str] = []
57 self.magic: List[str] = []
58 self.magic_regex: Optional[bytes] = None
60 def open(self, fname, mode: str = 'r') -> IO:
61 # We might want append mode, too
62 # We can allow more flags as needed (buffering etc.)
63 if mode not in list('rwa'):
64 raise ValueError("Only modes allowed are 'r', 'w', and 'a'")
65 if mode == 'r' and not self.can_read:
66 raise NotImplementedError('No reader implemented for {} format'
67 .format(self.name))
68 if mode == 'w' and not self.can_write:
69 raise NotImplementedError('No writer implemented for {} format'
70 .format(self.name))
71 if mode == 'a' and not self.can_append:
72 raise NotImplementedError('Appending not supported by {} format'
73 .format(self.name))
75 if self.isbinary:
76 mode += 'b'
78 path = Path(fname)
79 return path.open(mode, encoding=self.encoding)
81 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO:
82 encoding = self.encoding
83 if encoding is None:
84 encoding = 'utf-8' # Best hacky guess.
86 if self.isbinary:
87 if isinstance(data, str):
88 data = data.encode(encoding)
89 else:
90 if isinstance(data, bytes):
91 data = data.decode(encoding)
93 return self._ioclass(data)
95 @property
96 def _ioclass(self):
97 if self.isbinary:
98 return io.BytesIO
99 else:
100 return io.StringIO
102 def parse_images(self, data: Union[str, bytes],
103 **kwargs) -> Sequence[Atoms]:
104 with self._buf_as_filelike(data) as fd:
105 outputs = self.read(fd, **kwargs)
106 if self.single:
107 assert isinstance(outputs, Atoms)
108 return [outputs]
109 else:
110 return list(self.read(fd, **kwargs))
112 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms:
113 images = self.parse_images(data, **kwargs)
114 return images[-1]
116 @property
117 def can_read(self) -> bool:
118 return self._readfunc() is not None
120 @property
121 def can_write(self) -> bool:
122 return self._writefunc() is not None
124 @property
125 def can_append(self) -> bool:
126 writefunc = self._writefunc()
127 return self.can_write and 'append' in writefunc.__code__.co_varnames
129 def __repr__(self) -> str:
130 tokens = [f'{name}={repr(value)}'
131 for name, value in vars(self).items()]
132 return 'IOFormat({})'.format(', '.join(tokens))
134 def __getitem__(self, i):
135 # For compatibility.
136 #
137 # Historically, the ioformats were listed as tuples
138 # with (description, code). We look like such a tuple.
139 return (self.description, self.code)[i]
141 @property
142 def single(self) -> bool:
143 """Whether this format is for a single Atoms object."""
144 return self.code[0] == '1'
146 @property
147 def _formatname(self) -> str:
148 return self.name.replace('-', '_')
150 def _readfunc(self):
151 return getattr(self.module, 'read_' + self._formatname, None)
153 def _writefunc(self):
154 return getattr(self.module, 'write_' + self._formatname, None)
156 @property
157 def read(self):
158 if not self.can_read:
159 self._warn_none('read')
160 return None
162 return self._read_wrapper
164 def _read_wrapper(self, *args, **kwargs):
165 function = self._readfunc()
166 if function is None:
167 self._warn_none('read')
168 return None
169 if not inspect.isgeneratorfunction(function):
170 function = functools.partial(wrap_read_function, function)
171 return function(*args, **kwargs)
173 def _warn_none(self, action):
174 msg = ('Accessing the IOFormat.{action} property on a format '
175 'without {action} support will change behaviour in the '
176 'future and return a callable instead of None. '
177 'Use IOFormat.can_{action} to check whether {action} '
178 'is supported.')
179 warnings.warn(msg.format(action=action), FutureWarning)
181 @property
182 def write(self):
183 if not self.can_write:
184 self._warn_none('write')
185 return None
187 return self._write_wrapper
189 def _write_wrapper(self, *args, **kwargs):
190 function = self._writefunc()
191 if function is None:
192 raise ValueError(f'Cannot write to {self.name}-format')
193 return function(*args, **kwargs)
195 @property
196 def modes(self) -> str:
197 modes = ''
198 if self.can_read:
199 modes += 'r'
200 if self.can_write:
201 modes += 'w'
202 return modes
204 def full_description(self) -> str:
205 lines = [f'Name: {self.name}',
206 f'Description: {self.description}',
207 f'Modes: {self.modes}',
208 f'Encoding: {self.encoding}',
209 f'Module: {self.module_name}',
210 f'Code: {self.code}',
211 f'Extensions: {self.extensions}',
212 f'Globs: {self.globs}',
213 f'Magic: {self.magic}']
214 return '\n'.join(lines)
216 @property
217 def acceptsfd(self) -> bool:
218 return self.code[1] != 'S'
220 @property
221 def isbinary(self) -> bool:
222 return self.code[1] == 'B'
224 @property
225 def module(self):
226 try:
227 return import_module(self.module_name)
228 except ImportError as err:
229 raise UnknownFileTypeError(
230 f'File format not recognized: {self.name}. Error: {err}')
232 def match_name(self, basename: str) -> bool:
233 from fnmatch import fnmatch
234 return any(fnmatch(basename, pattern)
235 for pattern in self.globs)
237 def match_magic(self, data: bytes) -> bool:
238 if self.magic_regex:
239 assert not self.magic, 'Define only one of magic and magic_regex'
240 match = re.match(self.magic_regex, data, re.M | re.S)
241 return match is not None
243 from fnmatch import fnmatchcase
244 return any(
245 fnmatchcase(data, magic + b'*') # type: ignore[operator, type-var]
246 for magic in self.magic
247 )
250ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time.
251extension2format = {}
254all_formats = ioformats # Aliased for compatibility only. Please do not use.
255format2modulename = {} # Left for compatibility only.
258def define_io_format(name, desc, code, *, module=None, ext=None,
259 glob=None, magic=None, encoding=None,
260 magic_regex=None, external=False):
261 if module is None:
262 module = name.replace('-', '_')
263 format2modulename[name] = module
265 if not external:
266 module = 'ase.io.' + module
268 def normalize_patterns(strings):
269 if strings is None:
270 strings = []
271 elif isinstance(strings, (str, bytes)):
272 strings = [strings]
273 else:
274 strings = list(strings)
275 return strings
277 fmt = IOFormat(name, desc, code, module_name=module,
278 encoding=encoding)
279 fmt.extensions = normalize_patterns(ext)
280 fmt.globs = normalize_patterns(glob)
281 fmt.magic = normalize_patterns(magic)
283 if magic_regex is not None:
284 fmt.magic_regex = magic_regex
286 for ext in fmt.extensions:
287 if ext in extension2format:
288 raise ValueError(f'extension "{ext}" already registered')
289 extension2format[ext] = fmt
291 ioformats[name] = fmt
292 return fmt
295def get_ioformat(name: str) -> IOFormat:
296 """Return ioformat object or raise appropriate error."""
297 if name not in ioformats:
298 raise UnknownFileTypeError(name)
299 fmt = ioformats[name]
300 # Make sure module is importable, since this could also raise an error.
301 fmt.module
302 return ioformats[name]
305def register_external_io_formats(group):
306 if hasattr(entry_points(), 'select'):
307 fmt_entry_points = entry_points().select(group=group)
308 else:
309 fmt_entry_points = entry_points().get(group, ())
311 for entry_point in fmt_entry_points:
312 try:
313 define_external_io_format(entry_point)
314 except Exception as exc:
315 warnings.warn(
316 'Failed to register external '
317 f'IO format {entry_point.name}: {exc}'
318 )
321def define_external_io_format(entry_point):
323 fmt = entry_point.load()
324 if entry_point.name in ioformats:
325 raise ValueError(f'Format {entry_point.name} already defined')
326 if not isinstance(fmt, ExternalIOFormat):
327 raise TypeError('Wrong type for registering external IO formats '
328 f'in format {entry_point.name}, expected '
329 'ExternalIOFormat')
330 F(entry_point.name, **fmt._asdict(), external=True)
333# We define all the IO formats below. Each IO format has a code,
334# such as '1F', which defines some of the format's properties:
335#
336# 1=single atoms object
337# +=multiple atoms objects
338# F=accepts a file-descriptor
339# S=needs a file-name str
340# B=like F, but opens in binary mode
342F = define_io_format
343F('abinit-gsr', 'ABINIT GSR file', '1S',
344 module='abinit', glob='*o_GSR.nc')
345F('abinit-in', 'ABINIT input file', '1F',
346 module='abinit', magic=b'*znucl *')
347F('abinit-out', 'ABINIT output file', '1F',
348 module='abinit', magic=b'*.Version * of ABINIT')
349F('aims', 'FHI-aims geometry file', '1S', ext='in')
350F('aims-output', 'FHI-aims output', '+S',
351 module='aims', magic=b'*Invoking FHI-aims ...')
352F('bundletrajectory', 'ASE bundle trajectory', '+S')
353F('castep-castep', 'CASTEP output file', '+F',
354 module='castep', ext='castep')
355F('castep-cell', 'CASTEP geom file', '1F',
356 module='castep', ext='cell')
357F('castep-geom', 'CASTEP trajectory file', '+F',
358 module='castep', ext='geom')
359F('castep-md', 'CASTEP molecular dynamics file', '+F',
360 module='castep', ext='md')
361F('castep-phonon', 'CASTEP phonon file', '1F',
362 module='castep', ext='phonon')
363F('cfg', 'AtomEye configuration', '1F')
364F('cif', 'CIF-file', '+B', ext='cif')
365F('cmdft', 'CMDFT-file', '1F', glob='*I_info')
366F('cjson', 'Chemical json file', '1F', ext='cjson')
367F('cp2k-dcd', 'CP2K DCD file', '+B',
368 module='cp2k', ext='dcd')
369F('cp2k-restart', 'CP2K restart file', '1F',
370 module='cp2k', ext='restart')
371F('crystal', 'Crystal fort.34 format', '1F',
372 ext=['f34', '34'], glob=['f34', '34'])
373F('cube', 'CUBE file', '1F', ext='cube')
374F('dacapo-text', 'Dacapo text output', '1F',
375 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n')
376F('db', 'ASE SQLite database file', '+S')
377F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry')
378F('dlp4', 'DL_POLY_4 CONFIG file', '1F',
379 module='dlp4', ext='config', glob=['*CONFIG*'])
380F('dlp-history', 'DL_POLY HISTORY file', '+F',
381 module='dlp4', glob='HISTORY')
382F('dmol-arc', 'DMol3 arc file', '+S',
383 module='dmol', ext='arc')
384F('dmol-car', 'DMol3 structure file', '1S',
385 module='dmol', ext='car')
386F('dmol-incoor', 'DMol3 structure file', '1S',
387 module='dmol')
388F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F',
389 glob=['GEOMETRY.OUT'])
390F('elk-in', 'ELK input file', '1F', module='elk')
391F('eon', 'EON CON file', '+F',
392 ext='con')
393F('eps', 'Encapsulated Postscript', '1S')
394F('espresso-in', 'Quantum espresso in file', '1F',
395 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM'])
396F('espresso-out', 'Quantum espresso out file', '+F',
397 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF')
398F('exciting', 'exciting input', '1F', module='exciting', glob='input.xml')
399F('exciting', 'exciting output', '1F', module='exciting', glob='INFO.out')
400F('extxyz', 'Extended XYZ file', '+F', ext='xyz')
401F('findsym', 'FINDSYM-format', '+F')
402F('gamess-us-out', 'GAMESS-US output file', '1F',
403 module='gamess_us', magic=b'*GAMESS')
404F('gamess-us-in', 'GAMESS-US input file', '1F',
405 module='gamess_us')
406F('gamess-us-punch', 'GAMESS-US punchcard file', '1F',
407 module='gamess_us', magic=b' $DATA', ext='dat')
408F('gaussian-in', 'Gaussian com (input) file', '1F',
409 module='gaussian', ext=['com', 'gjf'])
410F('gaussian-out', 'Gaussian output file', '+F',
411 module='gaussian', ext='log', magic=b'*Entering Gaussian System')
412F('acemolecule-out', 'ACE output file', '1S',
413 module='acemolecule')
414F('acemolecule-input', 'ACE input file', '1S',
415 module='acemolecule')
416F('gen', 'DFTBPlus GEN format', '1F')
417F('gif', 'Graphics interchange format', '+S',
418 module='animation')
419F('gpaw-out', 'GPAW text output', '+F',
420 magic=b'* ___ ___ ___ _ _ _')
421F('gpumd', 'GPUMD input file', '1F', glob='xyz.in')
422F('gpw', 'GPAW restart-file', '1S',
423 magic=[b'- of UlmGPAW', b'AFFormatGPAW'])
424F('gromacs', 'Gromacs coordinates', '1F',
425 ext='gro')
426F('gromos', 'Gromos96 geometry file', '1F', ext='g96')
427F('html', 'X3DOM HTML', '1F', module='x3d')
428F('json', 'ASE JSON database file', '+F', ext='json', module='db')
429F('jsv', 'JSV file format', '1F')
430F('lammps-dump-text', 'LAMMPS text dump file', '+F',
431 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$')
432F('lammps-dump-binary', 'LAMMPS binary dump file', '+B',
433 module='lammpsrun')
434F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata',
435 encoding='ascii')
436F('magres', 'MAGRES ab initio NMR data file', '1F')
437F('mol', 'MDL Molfile', '1F')
438F('mp4', 'MP4 animation', '+S',
439 module='animation')
440F('mustem', 'muSTEM xtl file', '1F',
441 ext='xtl')
442F('mysql', 'ASE MySQL database file', '+S',
443 module='db')
444F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S',
445 magic=b'CDF')
446F('nomad-json', 'JSON from Nomad archive', '+F',
447 ext='nomad-json')
448F('nwchem-in', 'NWChem input file', '1F',
449 module='nwchem', ext='nwi')
450F('nwchem-out', 'NWChem output file', '+F',
451 module='nwchem', ext='nwo',
452 magic=b'*Northwest Computational Chemistry Package')
453F('octopus-in', 'Octopus input file', '1F',
454 module='octopus', glob='inp')
455F('onetep-out', 'ONETEP output file', '+F',
456 module='onetep',
457 magic=b'*Linear-Scaling Ab Initio Total Energy Program*')
458F('onetep-in', 'ONETEP input file', '1F',
459 module='onetep',
460 magic=[b'*lock species ',
461 b'*LOCK SPECIES ',
462 b'*--- INPUT FILE ---*'])
463F('proteindatabank', 'Protein Data Bank', '+F',
464 ext='pdb')
465F('png', 'Portable Network Graphics', '1B')
466F('postgresql', 'ASE PostgreSQL database file', '+S', module='db')
467F('pov', 'Persistance of Vision', '1S')
468# prismatic: Should have ext='xyz' if/when multiple formats can have the same
469# extension
470F('prismatic', 'prismatic and computem XYZ-file', '1F')
471F('py', 'Python file', '+F')
472F('sys', 'qball sys file', '1F')
473F('qbox', 'QBOX output file', '+F',
474 magic=b'*:simulation xmlns:')
475F('res', 'SHELX format', '1S', ext='shelx')
476F('rmc6f', 'RMCProfile', '1S', ext='rmc6f')
477F('sdf', 'SDF format', '1F')
478F('siesta-xv', 'Siesta .XV file', '1F',
479 glob='*.XV', module='siesta')
480F('struct', 'WIEN2k structure file', '1S', module='wien2k')
481F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta')
482F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj',
483 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory'])
484F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord',
485 magic=b'$coord')
486F('turbomole-gradient', 'TURBOMOLE gradient file', '+F',
487 module='turbomole', glob='gradient', magic=b'$grad')
488F('v-sim', 'V_Sim ascii file', '1F', ext='ascii')
489F('vasp', 'VASP POSCAR/CONTCAR', '1F',
490 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*'])
491F('vasp-out', 'VASP OUTCAR file', '+F',
492 module='vasp', glob='*OUTCAR*')
493F('vasp-xdatcar', 'VASP XDATCAR file', '+F',
494 module='vasp', glob='*XDATCAR*')
495F('vasp-xml', 'VASP vasprun.xml file', '+F',
496 module='vasp', glob='*vasp*.xml')
497F('vti', 'VTK XML Image Data', '1F', module='vtkxml')
498F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu')
499F('wout', 'Wannier90 output', '1F', module='wannier90')
500F('x3d', 'X3D', '1S')
501F('xsd', 'Materials Studio file', '1F')
502F('xsf', 'XCrySDen Structure File', '+F',
503 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER',
504 b'*\nMOLECULE', b'*\nATOMS'])
505F('xtd', 'Materials Studio file', '+F')
506# xyz: No `ext='xyz'` in the definition below.
507# The .xyz files are handled by the extxyz module by default.
508F('xyz', 'XYZ-file', '+F')
510# Register IO formats exposed through the ase.ioformats entry point
511register_external_io_formats('ase.ioformats')
514def get_compression(filename: str) -> Tuple[str, Optional[str]]:
515 """
516 Parse any expected file compression from the extension of a filename.
517 Return the filename without the extension, and the extension. Recognises
518 ``.gz``, ``.bz2``, ``.xz``.
520 >>> get_compression('H2O.pdb.gz')
521 ('H2O.pdb', 'gz')
522 >>> get_compression('crystal.cif')
523 ('crystal.cif', None)
525 Parameters
526 ==========
527 filename: str
528 Full filename including extension.
530 Returns
531 =======
532 (root, extension): (str, str or None)
533 Filename split into root without extension, and the extension
534 indicating compression format. Will not split if compression
535 is not recognised.
536 """
537 # Update if anything is added
538 valid_compression = ['gz', 'bz2', 'xz']
540 # Use stdlib as it handles most edge cases
541 root, compression = os.path.splitext(filename)
543 # extension keeps the '.' so remember to remove it
544 if compression.strip('.') in valid_compression:
545 return root, compression.strip('.')
546 else:
547 return filename, None
550def open_with_compression(filename: str, mode: str = 'r') -> IO:
551 """
552 Wrapper around builtin `open` that will guess compression of a file
553 from the filename and open it for reading or writing as if it were
554 a standard file.
556 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma).
558 Supported modes are:
559 * 'r', 'rt', 'w', 'wt' for text mode read and write.
560 * 'rb, 'wb' for binary read and write.
562 Parameters
563 ==========
564 filename: str
565 Path to the file to open, including any extensions that indicate
566 the compression used.
567 mode: str
568 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'.
570 Returns
571 =======
572 fd: file
573 File-like object open with the specified mode.
574 """
576 # Compressed formats sometimes default to binary, so force text mode.
577 if mode == 'r':
578 mode = 'rt'
579 elif mode == 'w':
580 mode = 'wt'
581 elif mode == 'a':
582 mode = 'at'
584 root, compression = get_compression(filename)
586 if compression == 'gz':
587 import gzip
588 return gzip.open(filename, mode=mode) # type: ignore[return-value]
589 elif compression == 'bz2':
590 import bz2
591 return bz2.open(filename, mode=mode)
592 elif compression == 'xz':
593 import lzma
594 return lzma.open(filename, mode)
595 else:
596 # Either None or unknown string
597 return open(filename, mode)
600def is_compressed(fd: io.BufferedIOBase) -> bool:
601 """Check if the file object is in a compressed format."""
602 compressed = False
604 # We'd like to avoid triggering imports unless already imported.
605 # Also, Python can be compiled without e.g. lzma so we need to
606 # protect against that:
607 if 'gzip' in sys.modules:
608 import gzip
609 compressed = compressed or isinstance(fd, gzip.GzipFile)
610 if 'bz2' in sys.modules:
611 import bz2
612 compressed = compressed or isinstance(fd, bz2.BZ2File)
613 if 'lzma' in sys.modules:
614 import lzma
615 compressed = compressed or isinstance(fd, lzma.LZMAFile)
616 return compressed
619def wrap_read_function(read, filename, index=None, **kwargs):
620 """Convert read-function to generator."""
621 if index is None:
622 yield read(filename, **kwargs)
623 else:
624 yield from read(filename, index, **kwargs)
627NameOrFile = Union[str, PurePath, IO]
630def write(
631 filename: NameOrFile,
632 images: Union[Atoms, Sequence[Atoms]],
633 format: str = None,
634 parallel: bool = True,
635 append: bool = False,
636 **kwargs: Any
637) -> None:
638 """Write Atoms object(s) to file.
640 filename: str or file
641 Name of the file to write to or a file descriptor. The name '-'
642 means standard output.
643 images: Atoms object or list of Atoms objects
644 A single Atoms object or a list of Atoms objects.
645 format: str
646 Used to specify the file-format. If not given, the
647 file-format will be taken from suffix of the filename.
648 parallel: bool
649 Default is to write on master only. Use parallel=False to write
650 from all slaves.
651 append: bool
652 Default is to open files in 'w' or 'wb' mode, overwriting
653 existing files. In some cases opening the file in 'a' or 'ab'
654 mode (appending) is useful,
655 e.g. writing trajectories or saving multiple Atoms objects in one file.
656 WARNING: If the file format does not support multiple entries without
657 additional keywords/headers, files created using 'append=True'
658 might not be readable by any program! They will nevertheless be
659 written without error message.
661 The use of additional keywords is format specific. write() may
662 return an object after writing certain formats, but this behaviour
663 may change in the future.
665 """
667 if isinstance(filename, PurePath):
668 filename = str(filename)
670 if isinstance(filename, str):
671 fd = None
672 if filename == '-':
673 fd = sys.stdout
674 filename = None # type: ignore[assignment]
675 elif format is None:
676 format = filetype(filename, read=False)
677 assert isinstance(format, str)
678 else:
679 fd = filename # type: ignore[assignment]
680 if format is None:
681 try:
682 format = filetype(filename, read=False)
683 assert isinstance(format, str)
684 except UnknownFileTypeError:
685 format = None
686 filename = None # type: ignore[assignment]
688 format = format or 'json' # default is json
690 io = get_ioformat(format)
692 return _write(filename, fd, format, io, images,
693 parallel=parallel, append=append, **kwargs)
696@parallel_function
697def _write(filename, fd, format, io, images, parallel=None, append=False,
698 **kwargs):
699 if isinstance(images, Atoms):
700 images = [images]
702 if io.single:
703 if len(images) > 1:
704 raise ValueError('{}-format can only store 1 Atoms object.'
705 .format(format))
706 images = images[0]
708 if not io.can_write:
709 raise ValueError(f"Can't write to {format}-format")
711 # Special case for json-format:
712 if format == 'json' and (len(images) > 1 or append):
713 if filename is not None:
714 return io.write(filename, images, append=append, **kwargs)
715 raise ValueError("Can't write more than one image to file-descriptor "
716 'using json-format.')
718 if io.acceptsfd:
719 open_new = (fd is None)
720 try:
721 if open_new:
722 mode = 'wb' if io.isbinary else 'w'
723 if append:
724 mode = mode.replace('w', 'a')
725 fd = open_with_compression(filename, mode)
726 # XXX remember to re-enable compressed open
727 # fd = io.open(filename, mode)
728 return io.write(fd, images, **kwargs)
729 finally:
730 if open_new and fd is not None:
731 fd.close()
732 else:
733 if fd is not None:
734 raise ValueError("Can't write {}-format to file-descriptor"
735 .format(format))
736 if io.can_append:
737 return io.write(filename, images, append=append, **kwargs)
738 elif append:
739 raise ValueError("Cannot append to {}-format, write-function "
740 "does not support the append keyword."
741 .format(format))
742 else:
743 return io.write(filename, images, **kwargs)
746def read(
747 filename: NameOrFile,
748 index: Any = None,
749 format: str = None,
750 parallel: bool = True,
751 do_not_split_by_at_sign: bool = False,
752 **kwargs
753) -> Union[Atoms, List[Atoms]]:
754 """Read Atoms object(s) from file.
756 filename: str or file
757 Name of the file to read from or a file descriptor.
758 index: int, slice or str
759 The last configuration will be returned by default. Examples:
761 * ``index=0``: first configuration
762 * ``index=-2``: second to last
763 * ``index=':'`` or ``index=slice(None)``: all
764 * ``index='-3:'`` or ``index=slice(-3, None)``: three last
765 * ``index='::2'`` or ``index=slice(0, None, 2)``: even
766 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd
767 format: str
768 Used to specify the file-format. If not given, the
769 file-format will be guessed by the *filetype* function.
770 parallel: bool
771 Default is to read on master and broadcast to slaves. Use
772 parallel=False to read on all slaves.
773 do_not_split_by_at_sign: bool
774 If False (default) ``filename`` is splitted by at sign ``@``
776 Many formats allow on open file-like object to be passed instead
777 of ``filename``. In this case the format cannot be auto-detected,
778 so the ``format`` argument should be explicitly given."""
780 if isinstance(filename, PurePath):
781 filename = str(filename)
782 if filename == '-':
783 filename = sys.stdin
784 if isinstance(index, str):
785 try:
786 index = string2index(index)
787 except ValueError:
788 pass
790 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)
791 if index is None:
792 index = -1
793 format = format or filetype(filename, read=isinstance(filename, str))
795 io = get_ioformat(format)
796 if isinstance(index, (slice, str)):
797 return list(_iread(filename, index, format, io, parallel=parallel,
798 **kwargs))
799 else:
800 return next(_iread(filename, slice(index, None), format, io,
801 parallel=parallel, **kwargs))
804def iread(
805 filename: NameOrFile,
806 index: Any = None,
807 format: str = None,
808 parallel: bool = True,
809 do_not_split_by_at_sign: bool = False,
810 **kwargs
811) -> Iterable[Atoms]:
812 """Iterator for reading Atoms objects from file.
814 Works as the `read` function, but yields one Atoms object at a time
815 instead of all at once."""
817 if isinstance(filename, PurePath):
818 filename = str(filename)
820 if isinstance(index, str):
821 index = string2index(index)
823 filename, index = parse_filename(filename, index, do_not_split_by_at_sign)
825 if index is None or index == ':':
826 index = slice(None, None, None)
828 if not isinstance(index, (slice, str)):
829 index = slice(index, (index + 1) or None)
831 format = format or filetype(filename, read=isinstance(filename, str))
832 io = get_ioformat(format)
834 yield from _iread(filename, index, format, io, parallel=parallel,
835 **kwargs)
838@parallel_generator
839def _iread(filename, index, format, io, parallel=None, full_output=False,
840 **kwargs):
842 if not io.can_read:
843 raise ValueError(f"Can't read from {format}-format")
845 if io.single:
846 start = index.start
847 assert start is None or start == 0 or start == -1
848 args = ()
849 else:
850 args = (index,)
852 must_close_fd = False
853 if isinstance(filename, str):
854 if io.acceptsfd:
855 mode = 'rb' if io.isbinary else 'r'
856 fd = open_with_compression(filename, mode)
857 must_close_fd = True
858 else:
859 fd = filename
860 else:
861 assert io.acceptsfd
862 fd = filename
864 # Make sure fd is closed in case loop doesn't finish:
865 try:
866 for dct in io.read(fd, *args, **kwargs):
867 if not isinstance(dct, dict):
868 dct = {'atoms': dct}
869 if full_output:
870 yield dct
871 else:
872 yield dct['atoms']
873 finally:
874 if must_close_fd:
875 fd.close()
878def parse_filename(filename, index=None, do_not_split_by_at_sign=False):
879 if not isinstance(filename, str):
880 return filename, index
882 basename = os.path.basename(filename)
883 if do_not_split_by_at_sign or '@' not in basename:
884 return filename, index
886 newindex = None
887 newfilename, newindex = filename.rsplit('@', 1)
889 if isinstance(index, slice):
890 return newfilename, index
891 try:
892 newindex = string2index(newindex)
893 except ValueError:
894 warnings.warn('Can not parse index for path \n'
895 ' "%s" \nConsider set '
896 'do_not_split_by_at_sign=True \nif '
897 'there is no index.' % filename)
898 return newfilename, newindex
901def match_magic(data: bytes) -> IOFormat:
902 data = data[:PEEK_BYTES]
903 for ioformat in ioformats.values():
904 if ioformat.match_magic(data):
905 return ioformat
906 raise UnknownFileTypeError('Cannot guess file type from contents')
909def string2index(string: str) -> Union[int, slice, str]:
910 """Convert index string to either int or slice"""
911 if ':' not in string:
912 # may contain database accessor
913 try:
914 return int(string)
915 except ValueError:
916 return string
917 i: List[Optional[int]] = []
918 for s in string.split(':'):
919 if s == '':
920 i.append(None)
921 else:
922 i.append(int(s))
923 i += (3 - len(i)) * [None]
924 return slice(*i)
927def filetype(
928 filename: NameOrFile,
929 read: bool = True,
930 guess: bool = True,
931) -> str:
932 """Try to guess the type of the file.
934 First, special signatures in the filename will be checked for. If that
935 does not identify the file type, then the first 2000 bytes of the file
936 will be read and analysed. Turn off this second part by using
937 read=False.
939 Can be used from the command-line also::
941 $ ase info filename ...
942 """
944 orig_filename = filename
945 if hasattr(filename, 'name'):
946 filename = filename.name
948 ext = None
949 if isinstance(filename, str):
950 if os.path.isdir(filename):
951 if os.path.basename(os.path.normpath(filename)) == 'states':
952 return 'eon'
953 return 'bundletrajectory'
955 if filename.startswith('postgres'):
956 return 'postgresql'
958 if filename.startswith('mysql') or filename.startswith('mariadb'):
959 return 'mysql'
961 # strip any compression extensions that can be read
962 root, compression = get_compression(filename)
963 basename = os.path.basename(root)
965 if '.' in basename:
966 ext = os.path.splitext(basename)[1].strip('.').lower()
968 for fmt in ioformats.values():
969 if fmt.match_name(basename):
970 return fmt.name
972 if not read:
973 if ext is None:
974 raise UnknownFileTypeError('Could not guess file type')
975 ioformat = extension2format.get(ext)
976 if ioformat:
977 return ioformat.name
979 # askhl: This is strange, we don't know if ext is a format:
980 return ext
982 if orig_filename == filename:
983 fd = open_with_compression(filename, 'rb')
984 else:
985 fd = orig_filename # type: ignore[assignment]
986 else:
987 fd = filename
988 if fd is sys.stdin:
989 return 'json'
991 data = fd.read(PEEK_BYTES)
992 if fd is not filename:
993 fd.close()
994 else:
995 fd.seek(0)
997 if len(data) == 0:
998 raise UnknownFileTypeError('Empty file: ' + filename)
1000 try:
1001 return match_magic(data).name
1002 except UnknownFileTypeError:
1003 pass
1005 format = None
1006 if ext in extension2format:
1007 format = extension2format[ext].name
1009 if format is None and guess:
1010 format = ext
1011 if format is None:
1012 # Do quick xyz check:
1013 lines = data.splitlines()
1014 if lines and lines[0].strip().isdigit():
1015 return extension2format['xyz'].name
1017 raise UnknownFileTypeError('Could not guess file type')
1018 assert isinstance(format, str)
1019 return format
1022def index2range(index, length):
1023 """Convert slice or integer to range.
1025 If index is an integer, range will contain only that integer."""
1026 obj = range(length)[index]
1027 if isinstance(obj, numbers.Integral):
1028 obj = range(obj, obj + 1)
1029 return obj