Coverage for /builds/kinetik161/ase/ase/io/formats.py: 89.75%

556 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-12-10 11:04 +0000

1"""File formats. 

2 

3This module implements the read(), iread() and write() functions in ase.io. 

4For each file format there is an IOFormat object. 

5 

6There is a dict, ioformats, which stores the objects. 

7 

8Example 

9======= 

10 

11The xyz format is implemented in the ase/io/xyz.py file which has a 

12read_xyz() generator and a write_xyz() function. This and other 

13information can be obtained from ioformats['xyz']. 

14""" 

15 

16import functools 

17import inspect 

18import io 

19import numbers 

20import os 

21import re 

22import sys 

23import warnings 

24from pathlib import Path, PurePath 

25from typing import (IO, Any, Dict, Iterable, List, Optional, Sequence, Tuple, 

26 Union) 

27 

28from importlib.metadata import entry_points 

29from importlib import import_module 

30 

31from ase.atoms import Atoms 

32from ase.parallel import parallel_function, parallel_generator 

33from ase.utils.plugins import ExternalIOFormat 

34 

35PEEK_BYTES = 50000 

36 

37 

38class UnknownFileTypeError(Exception): 

39 pass 

40 

41 

42class IOFormat: 

43 def __init__(self, name: str, desc: str, code: str, module_name: str, 

44 encoding: str = None) -> None: 

45 self.name = name 

46 self.description = desc 

47 assert len(code) == 2 

48 assert code[0] in list('+1') 

49 assert code[1] in list('BFS') 

50 self.code = code 

51 self.module_name = module_name 

52 self.encoding = encoding 

53 

54 # (To be set by define_io_format()) 

55 self.extensions: List[str] = [] 

56 self.globs: List[str] = [] 

57 self.magic: List[str] = [] 

58 self.magic_regex: Optional[bytes] = None 

59 

60 def open(self, fname, mode: str = 'r') -> IO: 

61 # We might want append mode, too 

62 # We can allow more flags as needed (buffering etc.) 

63 if mode not in list('rwa'): 

64 raise ValueError("Only modes allowed are 'r', 'w', and 'a'") 

65 if mode == 'r' and not self.can_read: 

66 raise NotImplementedError('No reader implemented for {} format' 

67 .format(self.name)) 

68 if mode == 'w' and not self.can_write: 

69 raise NotImplementedError('No writer implemented for {} format' 

70 .format(self.name)) 

71 if mode == 'a' and not self.can_append: 

72 raise NotImplementedError('Appending not supported by {} format' 

73 .format(self.name)) 

74 

75 if self.isbinary: 

76 mode += 'b' 

77 

78 path = Path(fname) 

79 return path.open(mode, encoding=self.encoding) 

80 

81 def _buf_as_filelike(self, data: Union[str, bytes]) -> IO: 

82 encoding = self.encoding 

83 if encoding is None: 

84 encoding = 'utf-8' # Best hacky guess. 

85 

86 if self.isbinary: 

87 if isinstance(data, str): 

88 data = data.encode(encoding) 

89 else: 

90 if isinstance(data, bytes): 

91 data = data.decode(encoding) 

92 

93 return self._ioclass(data) 

94 

95 @property 

96 def _ioclass(self): 

97 if self.isbinary: 

98 return io.BytesIO 

99 else: 

100 return io.StringIO 

101 

102 def parse_images(self, data: Union[str, bytes], 

103 **kwargs) -> Sequence[Atoms]: 

104 with self._buf_as_filelike(data) as fd: 

105 outputs = self.read(fd, **kwargs) 

106 if self.single: 

107 assert isinstance(outputs, Atoms) 

108 return [outputs] 

109 else: 

110 return list(self.read(fd, **kwargs)) 

111 

112 def parse_atoms(self, data: Union[str, bytes], **kwargs) -> Atoms: 

113 images = self.parse_images(data, **kwargs) 

114 return images[-1] 

115 

116 @property 

117 def can_read(self) -> bool: 

118 return self._readfunc() is not None 

119 

120 @property 

121 def can_write(self) -> bool: 

122 return self._writefunc() is not None 

123 

124 @property 

125 def can_append(self) -> bool: 

126 writefunc = self._writefunc() 

127 return self.can_write and 'append' in writefunc.__code__.co_varnames 

128 

129 def __repr__(self) -> str: 

130 tokens = [f'{name}={repr(value)}' 

131 for name, value in vars(self).items()] 

132 return 'IOFormat({})'.format(', '.join(tokens)) 

133 

134 def __getitem__(self, i): 

135 # For compatibility. 

136 # 

137 # Historically, the ioformats were listed as tuples 

138 # with (description, code). We look like such a tuple. 

139 return (self.description, self.code)[i] 

140 

141 @property 

142 def single(self) -> bool: 

143 """Whether this format is for a single Atoms object.""" 

144 return self.code[0] == '1' 

145 

146 @property 

147 def _formatname(self) -> str: 

148 return self.name.replace('-', '_') 

149 

150 def _readfunc(self): 

151 return getattr(self.module, 'read_' + self._formatname, None) 

152 

153 def _writefunc(self): 

154 return getattr(self.module, 'write_' + self._formatname, None) 

155 

156 @property 

157 def read(self): 

158 if not self.can_read: 

159 self._warn_none('read') 

160 return None 

161 

162 return self._read_wrapper 

163 

164 def _read_wrapper(self, *args, **kwargs): 

165 function = self._readfunc() 

166 if function is None: 

167 self._warn_none('read') 

168 return None 

169 if not inspect.isgeneratorfunction(function): 

170 function = functools.partial(wrap_read_function, function) 

171 return function(*args, **kwargs) 

172 

173 def _warn_none(self, action): 

174 msg = ('Accessing the IOFormat.{action} property on a format ' 

175 'without {action} support will change behaviour in the ' 

176 'future and return a callable instead of None. ' 

177 'Use IOFormat.can_{action} to check whether {action} ' 

178 'is supported.') 

179 warnings.warn(msg.format(action=action), FutureWarning) 

180 

181 @property 

182 def write(self): 

183 if not self.can_write: 

184 self._warn_none('write') 

185 return None 

186 

187 return self._write_wrapper 

188 

189 def _write_wrapper(self, *args, **kwargs): 

190 function = self._writefunc() 

191 if function is None: 

192 raise ValueError(f'Cannot write to {self.name}-format') 

193 return function(*args, **kwargs) 

194 

195 @property 

196 def modes(self) -> str: 

197 modes = '' 

198 if self.can_read: 

199 modes += 'r' 

200 if self.can_write: 

201 modes += 'w' 

202 return modes 

203 

204 def full_description(self) -> str: 

205 lines = [f'Name: {self.name}', 

206 f'Description: {self.description}', 

207 f'Modes: {self.modes}', 

208 f'Encoding: {self.encoding}', 

209 f'Module: {self.module_name}', 

210 f'Code: {self.code}', 

211 f'Extensions: {self.extensions}', 

212 f'Globs: {self.globs}', 

213 f'Magic: {self.magic}'] 

214 return '\n'.join(lines) 

215 

216 @property 

217 def acceptsfd(self) -> bool: 

218 return self.code[1] != 'S' 

219 

220 @property 

221 def isbinary(self) -> bool: 

222 return self.code[1] == 'B' 

223 

224 @property 

225 def module(self): 

226 try: 

227 return import_module(self.module_name) 

228 except ImportError as err: 

229 raise UnknownFileTypeError( 

230 f'File format not recognized: {self.name}. Error: {err}') 

231 

232 def match_name(self, basename: str) -> bool: 

233 from fnmatch import fnmatch 

234 return any(fnmatch(basename, pattern) 

235 for pattern in self.globs) 

236 

237 def match_magic(self, data: bytes) -> bool: 

238 if self.magic_regex: 

239 assert not self.magic, 'Define only one of magic and magic_regex' 

240 match = re.match(self.magic_regex, data, re.M | re.S) 

241 return match is not None 

242 

243 from fnmatch import fnmatchcase 

244 return any( 

245 fnmatchcase(data, magic + b'*') # type: ignore[operator, type-var] 

246 for magic in self.magic 

247 ) 

248 

249 

250ioformats: Dict[str, IOFormat] = {} # These will be filled at run-time. 

251extension2format = {} 

252 

253 

254all_formats = ioformats # Aliased for compatibility only. Please do not use. 

255format2modulename = {} # Left for compatibility only. 

256 

257 

258def define_io_format(name, desc, code, *, module=None, ext=None, 

259 glob=None, magic=None, encoding=None, 

260 magic_regex=None, external=False): 

261 if module is None: 

262 module = name.replace('-', '_') 

263 format2modulename[name] = module 

264 

265 if not external: 

266 module = 'ase.io.' + module 

267 

268 def normalize_patterns(strings): 

269 if strings is None: 

270 strings = [] 

271 elif isinstance(strings, (str, bytes)): 

272 strings = [strings] 

273 else: 

274 strings = list(strings) 

275 return strings 

276 

277 fmt = IOFormat(name, desc, code, module_name=module, 

278 encoding=encoding) 

279 fmt.extensions = normalize_patterns(ext) 

280 fmt.globs = normalize_patterns(glob) 

281 fmt.magic = normalize_patterns(magic) 

282 

283 if magic_regex is not None: 

284 fmt.magic_regex = magic_regex 

285 

286 for ext in fmt.extensions: 

287 if ext in extension2format: 

288 raise ValueError(f'extension "{ext}" already registered') 

289 extension2format[ext] = fmt 

290 

291 ioformats[name] = fmt 

292 return fmt 

293 

294 

295def get_ioformat(name: str) -> IOFormat: 

296 """Return ioformat object or raise appropriate error.""" 

297 if name not in ioformats: 

298 raise UnknownFileTypeError(name) 

299 fmt = ioformats[name] 

300 # Make sure module is importable, since this could also raise an error. 

301 fmt.module 

302 return ioformats[name] 

303 

304 

305def register_external_io_formats(group): 

306 if hasattr(entry_points(), 'select'): 

307 fmt_entry_points = entry_points().select(group=group) 

308 else: 

309 fmt_entry_points = entry_points().get(group, ()) 

310 

311 for entry_point in fmt_entry_points: 

312 try: 

313 define_external_io_format(entry_point) 

314 except Exception as exc: 

315 warnings.warn( 

316 'Failed to register external ' 

317 f'IO format {entry_point.name}: {exc}' 

318 ) 

319 

320 

321def define_external_io_format(entry_point): 

322 

323 fmt = entry_point.load() 

324 if entry_point.name in ioformats: 

325 raise ValueError(f'Format {entry_point.name} already defined') 

326 if not isinstance(fmt, ExternalIOFormat): 

327 raise TypeError('Wrong type for registering external IO formats ' 

328 f'in format {entry_point.name}, expected ' 

329 'ExternalIOFormat') 

330 F(entry_point.name, **fmt._asdict(), external=True) 

331 

332 

333# We define all the IO formats below. Each IO format has a code, 

334# such as '1F', which defines some of the format's properties: 

335# 

336# 1=single atoms object 

337# +=multiple atoms objects 

338# F=accepts a file-descriptor 

339# S=needs a file-name str 

340# B=like F, but opens in binary mode 

341 

342F = define_io_format 

343F('abinit-gsr', 'ABINIT GSR file', '1S', 

344 module='abinit', glob='*o_GSR.nc') 

345F('abinit-in', 'ABINIT input file', '1F', 

346 module='abinit', magic=b'*znucl *') 

347F('abinit-out', 'ABINIT output file', '1F', 

348 module='abinit', magic=b'*.Version * of ABINIT') 

349F('aims', 'FHI-aims geometry file', '1S', ext='in') 

350F('aims-output', 'FHI-aims output', '+S', 

351 module='aims', magic=b'*Invoking FHI-aims ...') 

352F('bundletrajectory', 'ASE bundle trajectory', '+S') 

353F('castep-castep', 'CASTEP output file', '+F', 

354 module='castep', ext='castep') 

355F('castep-cell', 'CASTEP geom file', '1F', 

356 module='castep', ext='cell') 

357F('castep-geom', 'CASTEP trajectory file', '+F', 

358 module='castep', ext='geom') 

359F('castep-md', 'CASTEP molecular dynamics file', '+F', 

360 module='castep', ext='md') 

361F('castep-phonon', 'CASTEP phonon file', '1F', 

362 module='castep', ext='phonon') 

363F('cfg', 'AtomEye configuration', '1F') 

364F('cif', 'CIF-file', '+B', ext='cif') 

365F('cmdft', 'CMDFT-file', '1F', glob='*I_info') 

366F('cjson', 'Chemical json file', '1F', ext='cjson') 

367F('cp2k-dcd', 'CP2K DCD file', '+B', 

368 module='cp2k', ext='dcd') 

369F('cp2k-restart', 'CP2K restart file', '1F', 

370 module='cp2k', ext='restart') 

371F('crystal', 'Crystal fort.34 format', '1F', 

372 ext=['f34', '34'], glob=['f34', '34']) 

373F('cube', 'CUBE file', '1F', ext='cube') 

374F('dacapo-text', 'Dacapo text output', '1F', 

375 module='dacapo', magic=b'*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n') 

376F('db', 'ASE SQLite database file', '+S') 

377F('dftb', 'DftbPlus input file', '1S', magic=b'Geometry') 

378F('dlp4', 'DL_POLY_4 CONFIG file', '1F', 

379 module='dlp4', ext='config', glob=['*CONFIG*']) 

380F('dlp-history', 'DL_POLY HISTORY file', '+F', 

381 module='dlp4', glob='HISTORY') 

382F('dmol-arc', 'DMol3 arc file', '+S', 

383 module='dmol', ext='arc') 

384F('dmol-car', 'DMol3 structure file', '1S', 

385 module='dmol', ext='car') 

386F('dmol-incoor', 'DMol3 structure file', '1S', 

387 module='dmol') 

388F('elk', 'ELK atoms definition from GEOMETRY.OUT', '1F', 

389 glob=['GEOMETRY.OUT']) 

390F('elk-in', 'ELK input file', '1F', module='elk') 

391F('eon', 'EON CON file', '+F', 

392 ext='con') 

393F('eps', 'Encapsulated Postscript', '1S') 

394F('espresso-in', 'Quantum espresso in file', '1F', 

395 module='espresso', ext='pwi', magic=[b'*\n&system', b'*\n&SYSTEM']) 

396F('espresso-out', 'Quantum espresso out file', '+F', 

397 module='espresso', ext=['pwo', 'out'], magic=b'*Program PWSCF') 

398F('exciting', 'exciting input', '1F', module='exciting', glob='input.xml') 

399F('exciting', 'exciting output', '1F', module='exciting', glob='INFO.out') 

400F('extxyz', 'Extended XYZ file', '+F', ext='xyz') 

401F('findsym', 'FINDSYM-format', '+F') 

402F('gamess-us-out', 'GAMESS-US output file', '1F', 

403 module='gamess_us', magic=b'*GAMESS') 

404F('gamess-us-in', 'GAMESS-US input file', '1F', 

405 module='gamess_us') 

406F('gamess-us-punch', 'GAMESS-US punchcard file', '1F', 

407 module='gamess_us', magic=b' $DATA', ext='dat') 

408F('gaussian-in', 'Gaussian com (input) file', '1F', 

409 module='gaussian', ext=['com', 'gjf']) 

410F('gaussian-out', 'Gaussian output file', '+F', 

411 module='gaussian', ext='log', magic=b'*Entering Gaussian System') 

412F('acemolecule-out', 'ACE output file', '1S', 

413 module='acemolecule') 

414F('acemolecule-input', 'ACE input file', '1S', 

415 module='acemolecule') 

416F('gen', 'DFTBPlus GEN format', '1F') 

417F('gif', 'Graphics interchange format', '+S', 

418 module='animation') 

419F('gpaw-out', 'GPAW text output', '+F', 

420 magic=b'* ___ ___ ___ _ _ _') 

421F('gpumd', 'GPUMD input file', '1F', glob='xyz.in') 

422F('gpw', 'GPAW restart-file', '1S', 

423 magic=[b'- of UlmGPAW', b'AFFormatGPAW']) 

424F('gromacs', 'Gromacs coordinates', '1F', 

425 ext='gro') 

426F('gromos', 'Gromos96 geometry file', '1F', ext='g96') 

427F('html', 'X3DOM HTML', '1F', module='x3d') 

428F('json', 'ASE JSON database file', '+F', ext='json', module='db') 

429F('jsv', 'JSV file format', '1F') 

430F('lammps-dump-text', 'LAMMPS text dump file', '+F', 

431 module='lammpsrun', magic_regex=b'.*?^ITEM: TIMESTEP$') 

432F('lammps-dump-binary', 'LAMMPS binary dump file', '+B', 

433 module='lammpsrun') 

434F('lammps-data', 'LAMMPS data file', '1F', module='lammpsdata', 

435 encoding='ascii') 

436F('magres', 'MAGRES ab initio NMR data file', '1F') 

437F('mol', 'MDL Molfile', '1F') 

438F('mp4', 'MP4 animation', '+S', 

439 module='animation') 

440F('mustem', 'muSTEM xtl file', '1F', 

441 ext='xtl') 

442F('mysql', 'ASE MySQL database file', '+S', 

443 module='db') 

444F('netcdftrajectory', 'AMBER NetCDF trajectory file', '+S', 

445 magic=b'CDF') 

446F('nomad-json', 'JSON from Nomad archive', '+F', 

447 ext='nomad-json') 

448F('nwchem-in', 'NWChem input file', '1F', 

449 module='nwchem', ext='nwi') 

450F('nwchem-out', 'NWChem output file', '+F', 

451 module='nwchem', ext='nwo', 

452 magic=b'*Northwest Computational Chemistry Package') 

453F('octopus-in', 'Octopus input file', '1F', 

454 module='octopus', glob='inp') 

455F('onetep-out', 'ONETEP output file', '+F', 

456 module='onetep', 

457 magic=b'*Linear-Scaling Ab Initio Total Energy Program*') 

458F('onetep-in', 'ONETEP input file', '1F', 

459 module='onetep', 

460 magic=[b'*lock species ', 

461 b'*LOCK SPECIES ', 

462 b'*--- INPUT FILE ---*']) 

463F('proteindatabank', 'Protein Data Bank', '+F', 

464 ext='pdb') 

465F('png', 'Portable Network Graphics', '1B') 

466F('postgresql', 'ASE PostgreSQL database file', '+S', module='db') 

467F('pov', 'Persistance of Vision', '1S') 

468# prismatic: Should have ext='xyz' if/when multiple formats can have the same 

469# extension 

470F('prismatic', 'prismatic and computem XYZ-file', '1F') 

471F('py', 'Python file', '+F') 

472F('sys', 'qball sys file', '1F') 

473F('qbox', 'QBOX output file', '+F', 

474 magic=b'*:simulation xmlns:') 

475F('res', 'SHELX format', '1S', ext='shelx') 

476F('rmc6f', 'RMCProfile', '1S', ext='rmc6f') 

477F('sdf', 'SDF format', '1F') 

478F('siesta-xv', 'Siesta .XV file', '1F', 

479 glob='*.XV', module='siesta') 

480F('struct', 'WIEN2k structure file', '1S', module='wien2k') 

481F('struct_out', 'SIESTA STRUCT file', '1F', module='siesta') 

482F('traj', 'ASE trajectory', '+B', module='trajectory', ext='traj', 

483 magic=[b'- of UlmASE-Trajectory', b'AFFormatASE-Trajectory']) 

484F('turbomole', 'TURBOMOLE coord file', '1F', glob='coord', 

485 magic=b'$coord') 

486F('turbomole-gradient', 'TURBOMOLE gradient file', '+F', 

487 module='turbomole', glob='gradient', magic=b'$grad') 

488F('v-sim', 'V_Sim ascii file', '1F', ext='ascii') 

489F('vasp', 'VASP POSCAR/CONTCAR', '1F', 

490 ext='poscar', glob=['*POSCAR*', '*CONTCAR*', '*CENTCAR*']) 

491F('vasp-out', 'VASP OUTCAR file', '+F', 

492 module='vasp', glob='*OUTCAR*') 

493F('vasp-xdatcar', 'VASP XDATCAR file', '+F', 

494 module='vasp', glob='*XDATCAR*') 

495F('vasp-xml', 'VASP vasprun.xml file', '+F', 

496 module='vasp', glob='*vasp*.xml') 

497F('vti', 'VTK XML Image Data', '1F', module='vtkxml') 

498F('vtu', 'VTK XML Unstructured Grid', '1F', module='vtkxml', ext='vtu') 

499F('wout', 'Wannier90 output', '1F', module='wannier90') 

500F('x3d', 'X3D', '1S') 

501F('xsd', 'Materials Studio file', '1F') 

502F('xsf', 'XCrySDen Structure File', '+F', 

503 magic=[b'*\nANIMSTEPS', b'*\nCRYSTAL', b'*\nSLAB', b'*\nPOLYMER', 

504 b'*\nMOLECULE', b'*\nATOMS']) 

505F('xtd', 'Materials Studio file', '+F') 

506# xyz: No `ext='xyz'` in the definition below. 

507# The .xyz files are handled by the extxyz module by default. 

508F('xyz', 'XYZ-file', '+F') 

509 

510# Register IO formats exposed through the ase.ioformats entry point 

511register_external_io_formats('ase.ioformats') 

512 

513 

514def get_compression(filename: str) -> Tuple[str, Optional[str]]: 

515 """ 

516 Parse any expected file compression from the extension of a filename. 

517 Return the filename without the extension, and the extension. Recognises 

518 ``.gz``, ``.bz2``, ``.xz``. 

519 

520 >>> get_compression('H2O.pdb.gz') 

521 ('H2O.pdb', 'gz') 

522 >>> get_compression('crystal.cif') 

523 ('crystal.cif', None) 

524 

525 Parameters 

526 ========== 

527 filename: str 

528 Full filename including extension. 

529 

530 Returns 

531 ======= 

532 (root, extension): (str, str or None) 

533 Filename split into root without extension, and the extension 

534 indicating compression format. Will not split if compression 

535 is not recognised. 

536 """ 

537 # Update if anything is added 

538 valid_compression = ['gz', 'bz2', 'xz'] 

539 

540 # Use stdlib as it handles most edge cases 

541 root, compression = os.path.splitext(filename) 

542 

543 # extension keeps the '.' so remember to remove it 

544 if compression.strip('.') in valid_compression: 

545 return root, compression.strip('.') 

546 else: 

547 return filename, None 

548 

549 

550def open_with_compression(filename: str, mode: str = 'r') -> IO: 

551 """ 

552 Wrapper around builtin `open` that will guess compression of a file 

553 from the filename and open it for reading or writing as if it were 

554 a standard file. 

555 

556 Implemented for ``gz``(gzip), ``bz2``(bzip2) and ``xz``(lzma). 

557 

558 Supported modes are: 

559 * 'r', 'rt', 'w', 'wt' for text mode read and write. 

560 * 'rb, 'wb' for binary read and write. 

561 

562 Parameters 

563 ========== 

564 filename: str 

565 Path to the file to open, including any extensions that indicate 

566 the compression used. 

567 mode: str 

568 Mode to open the file, same as for builtin ``open``, e.g 'r', 'w'. 

569 

570 Returns 

571 ======= 

572 fd: file 

573 File-like object open with the specified mode. 

574 """ 

575 

576 # Compressed formats sometimes default to binary, so force text mode. 

577 if mode == 'r': 

578 mode = 'rt' 

579 elif mode == 'w': 

580 mode = 'wt' 

581 elif mode == 'a': 

582 mode = 'at' 

583 

584 root, compression = get_compression(filename) 

585 

586 if compression == 'gz': 

587 import gzip 

588 return gzip.open(filename, mode=mode) # type: ignore[return-value] 

589 elif compression == 'bz2': 

590 import bz2 

591 return bz2.open(filename, mode=mode) 

592 elif compression == 'xz': 

593 import lzma 

594 return lzma.open(filename, mode) 

595 else: 

596 # Either None or unknown string 

597 return open(filename, mode) 

598 

599 

600def is_compressed(fd: io.BufferedIOBase) -> bool: 

601 """Check if the file object is in a compressed format.""" 

602 compressed = False 

603 

604 # We'd like to avoid triggering imports unless already imported. 

605 # Also, Python can be compiled without e.g. lzma so we need to 

606 # protect against that: 

607 if 'gzip' in sys.modules: 

608 import gzip 

609 compressed = compressed or isinstance(fd, gzip.GzipFile) 

610 if 'bz2' in sys.modules: 

611 import bz2 

612 compressed = compressed or isinstance(fd, bz2.BZ2File) 

613 if 'lzma' in sys.modules: 

614 import lzma 

615 compressed = compressed or isinstance(fd, lzma.LZMAFile) 

616 return compressed 

617 

618 

619def wrap_read_function(read, filename, index=None, **kwargs): 

620 """Convert read-function to generator.""" 

621 if index is None: 

622 yield read(filename, **kwargs) 

623 else: 

624 yield from read(filename, index, **kwargs) 

625 

626 

627NameOrFile = Union[str, PurePath, IO] 

628 

629 

630def write( 

631 filename: NameOrFile, 

632 images: Union[Atoms, Sequence[Atoms]], 

633 format: str = None, 

634 parallel: bool = True, 

635 append: bool = False, 

636 **kwargs: Any 

637) -> None: 

638 """Write Atoms object(s) to file. 

639 

640 filename: str or file 

641 Name of the file to write to or a file descriptor. The name '-' 

642 means standard output. 

643 images: Atoms object or list of Atoms objects 

644 A single Atoms object or a list of Atoms objects. 

645 format: str 

646 Used to specify the file-format. If not given, the 

647 file-format will be taken from suffix of the filename. 

648 parallel: bool 

649 Default is to write on master only. Use parallel=False to write 

650 from all slaves. 

651 append: bool 

652 Default is to open files in 'w' or 'wb' mode, overwriting 

653 existing files. In some cases opening the file in 'a' or 'ab' 

654 mode (appending) is useful, 

655 e.g. writing trajectories or saving multiple Atoms objects in one file. 

656 WARNING: If the file format does not support multiple entries without 

657 additional keywords/headers, files created using 'append=True' 

658 might not be readable by any program! They will nevertheless be 

659 written without error message. 

660 

661 The use of additional keywords is format specific. write() may 

662 return an object after writing certain formats, but this behaviour 

663 may change in the future. 

664 

665 """ 

666 

667 if isinstance(filename, PurePath): 

668 filename = str(filename) 

669 

670 if isinstance(filename, str): 

671 fd = None 

672 if filename == '-': 

673 fd = sys.stdout 

674 filename = None # type: ignore[assignment] 

675 elif format is None: 

676 format = filetype(filename, read=False) 

677 assert isinstance(format, str) 

678 else: 

679 fd = filename # type: ignore[assignment] 

680 if format is None: 

681 try: 

682 format = filetype(filename, read=False) 

683 assert isinstance(format, str) 

684 except UnknownFileTypeError: 

685 format = None 

686 filename = None # type: ignore[assignment] 

687 

688 format = format or 'json' # default is json 

689 

690 io = get_ioformat(format) 

691 

692 return _write(filename, fd, format, io, images, 

693 parallel=parallel, append=append, **kwargs) 

694 

695 

696@parallel_function 

697def _write(filename, fd, format, io, images, parallel=None, append=False, 

698 **kwargs): 

699 if isinstance(images, Atoms): 

700 images = [images] 

701 

702 if io.single: 

703 if len(images) > 1: 

704 raise ValueError('{}-format can only store 1 Atoms object.' 

705 .format(format)) 

706 images = images[0] 

707 

708 if not io.can_write: 

709 raise ValueError(f"Can't write to {format}-format") 

710 

711 # Special case for json-format: 

712 if format == 'json' and (len(images) > 1 or append): 

713 if filename is not None: 

714 return io.write(filename, images, append=append, **kwargs) 

715 raise ValueError("Can't write more than one image to file-descriptor " 

716 'using json-format.') 

717 

718 if io.acceptsfd: 

719 open_new = (fd is None) 

720 try: 

721 if open_new: 

722 mode = 'wb' if io.isbinary else 'w' 

723 if append: 

724 mode = mode.replace('w', 'a') 

725 fd = open_with_compression(filename, mode) 

726 # XXX remember to re-enable compressed open 

727 # fd = io.open(filename, mode) 

728 return io.write(fd, images, **kwargs) 

729 finally: 

730 if open_new and fd is not None: 

731 fd.close() 

732 else: 

733 if fd is not None: 

734 raise ValueError("Can't write {}-format to file-descriptor" 

735 .format(format)) 

736 if io.can_append: 

737 return io.write(filename, images, append=append, **kwargs) 

738 elif append: 

739 raise ValueError("Cannot append to {}-format, write-function " 

740 "does not support the append keyword." 

741 .format(format)) 

742 else: 

743 return io.write(filename, images, **kwargs) 

744 

745 

746def read( 

747 filename: NameOrFile, 

748 index: Any = None, 

749 format: str = None, 

750 parallel: bool = True, 

751 do_not_split_by_at_sign: bool = False, 

752 **kwargs 

753) -> Union[Atoms, List[Atoms]]: 

754 """Read Atoms object(s) from file. 

755 

756 filename: str or file 

757 Name of the file to read from or a file descriptor. 

758 index: int, slice or str 

759 The last configuration will be returned by default. Examples: 

760 

761 * ``index=0``: first configuration 

762 * ``index=-2``: second to last 

763 * ``index=':'`` or ``index=slice(None)``: all 

764 * ``index='-3:'`` or ``index=slice(-3, None)``: three last 

765 * ``index='::2'`` or ``index=slice(0, None, 2)``: even 

766 * ``index='1::2'`` or ``index=slice(1, None, 2)``: odd 

767 format: str 

768 Used to specify the file-format. If not given, the 

769 file-format will be guessed by the *filetype* function. 

770 parallel: bool 

771 Default is to read on master and broadcast to slaves. Use 

772 parallel=False to read on all slaves. 

773 do_not_split_by_at_sign: bool 

774 If False (default) ``filename`` is splitted by at sign ``@`` 

775 

776 Many formats allow on open file-like object to be passed instead 

777 of ``filename``. In this case the format cannot be auto-detected, 

778 so the ``format`` argument should be explicitly given.""" 

779 

780 if isinstance(filename, PurePath): 

781 filename = str(filename) 

782 if filename == '-': 

783 filename = sys.stdin 

784 if isinstance(index, str): 

785 try: 

786 index = string2index(index) 

787 except ValueError: 

788 pass 

789 

790 filename, index = parse_filename(filename, index, do_not_split_by_at_sign) 

791 if index is None: 

792 index = -1 

793 format = format or filetype(filename, read=isinstance(filename, str)) 

794 

795 io = get_ioformat(format) 

796 if isinstance(index, (slice, str)): 

797 return list(_iread(filename, index, format, io, parallel=parallel, 

798 **kwargs)) 

799 else: 

800 return next(_iread(filename, slice(index, None), format, io, 

801 parallel=parallel, **kwargs)) 

802 

803 

804def iread( 

805 filename: NameOrFile, 

806 index: Any = None, 

807 format: str = None, 

808 parallel: bool = True, 

809 do_not_split_by_at_sign: bool = False, 

810 **kwargs 

811) -> Iterable[Atoms]: 

812 """Iterator for reading Atoms objects from file. 

813 

814 Works as the `read` function, but yields one Atoms object at a time 

815 instead of all at once.""" 

816 

817 if isinstance(filename, PurePath): 

818 filename = str(filename) 

819 

820 if isinstance(index, str): 

821 index = string2index(index) 

822 

823 filename, index = parse_filename(filename, index, do_not_split_by_at_sign) 

824 

825 if index is None or index == ':': 

826 index = slice(None, None, None) 

827 

828 if not isinstance(index, (slice, str)): 

829 index = slice(index, (index + 1) or None) 

830 

831 format = format or filetype(filename, read=isinstance(filename, str)) 

832 io = get_ioformat(format) 

833 

834 yield from _iread(filename, index, format, io, parallel=parallel, 

835 **kwargs) 

836 

837 

838@parallel_generator 

839def _iread(filename, index, format, io, parallel=None, full_output=False, 

840 **kwargs): 

841 

842 if not io.can_read: 

843 raise ValueError(f"Can't read from {format}-format") 

844 

845 if io.single: 

846 start = index.start 

847 assert start is None or start == 0 or start == -1 

848 args = () 

849 else: 

850 args = (index,) 

851 

852 must_close_fd = False 

853 if isinstance(filename, str): 

854 if io.acceptsfd: 

855 mode = 'rb' if io.isbinary else 'r' 

856 fd = open_with_compression(filename, mode) 

857 must_close_fd = True 

858 else: 

859 fd = filename 

860 else: 

861 assert io.acceptsfd 

862 fd = filename 

863 

864 # Make sure fd is closed in case loop doesn't finish: 

865 try: 

866 for dct in io.read(fd, *args, **kwargs): 

867 if not isinstance(dct, dict): 

868 dct = {'atoms': dct} 

869 if full_output: 

870 yield dct 

871 else: 

872 yield dct['atoms'] 

873 finally: 

874 if must_close_fd: 

875 fd.close() 

876 

877 

878def parse_filename(filename, index=None, do_not_split_by_at_sign=False): 

879 if not isinstance(filename, str): 

880 return filename, index 

881 

882 basename = os.path.basename(filename) 

883 if do_not_split_by_at_sign or '@' not in basename: 

884 return filename, index 

885 

886 newindex = None 

887 newfilename, newindex = filename.rsplit('@', 1) 

888 

889 if isinstance(index, slice): 

890 return newfilename, index 

891 try: 

892 newindex = string2index(newindex) 

893 except ValueError: 

894 warnings.warn('Can not parse index for path \n' 

895 ' "%s" \nConsider set ' 

896 'do_not_split_by_at_sign=True \nif ' 

897 'there is no index.' % filename) 

898 return newfilename, newindex 

899 

900 

901def match_magic(data: bytes) -> IOFormat: 

902 data = data[:PEEK_BYTES] 

903 for ioformat in ioformats.values(): 

904 if ioformat.match_magic(data): 

905 return ioformat 

906 raise UnknownFileTypeError('Cannot guess file type from contents') 

907 

908 

909def string2index(string: str) -> Union[int, slice, str]: 

910 """Convert index string to either int or slice""" 

911 if ':' not in string: 

912 # may contain database accessor 

913 try: 

914 return int(string) 

915 except ValueError: 

916 return string 

917 i: List[Optional[int]] = [] 

918 for s in string.split(':'): 

919 if s == '': 

920 i.append(None) 

921 else: 

922 i.append(int(s)) 

923 i += (3 - len(i)) * [None] 

924 return slice(*i) 

925 

926 

927def filetype( 

928 filename: NameOrFile, 

929 read: bool = True, 

930 guess: bool = True, 

931) -> str: 

932 """Try to guess the type of the file. 

933 

934 First, special signatures in the filename will be checked for. If that 

935 does not identify the file type, then the first 2000 bytes of the file 

936 will be read and analysed. Turn off this second part by using 

937 read=False. 

938 

939 Can be used from the command-line also:: 

940 

941 $ ase info filename ... 

942 """ 

943 

944 orig_filename = filename 

945 if hasattr(filename, 'name'): 

946 filename = filename.name 

947 

948 ext = None 

949 if isinstance(filename, str): 

950 if os.path.isdir(filename): 

951 if os.path.basename(os.path.normpath(filename)) == 'states': 

952 return 'eon' 

953 return 'bundletrajectory' 

954 

955 if filename.startswith('postgres'): 

956 return 'postgresql' 

957 

958 if filename.startswith('mysql') or filename.startswith('mariadb'): 

959 return 'mysql' 

960 

961 # strip any compression extensions that can be read 

962 root, compression = get_compression(filename) 

963 basename = os.path.basename(root) 

964 

965 if '.' in basename: 

966 ext = os.path.splitext(basename)[1].strip('.').lower() 

967 

968 for fmt in ioformats.values(): 

969 if fmt.match_name(basename): 

970 return fmt.name 

971 

972 if not read: 

973 if ext is None: 

974 raise UnknownFileTypeError('Could not guess file type') 

975 ioformat = extension2format.get(ext) 

976 if ioformat: 

977 return ioformat.name 

978 

979 # askhl: This is strange, we don't know if ext is a format: 

980 return ext 

981 

982 if orig_filename == filename: 

983 fd = open_with_compression(filename, 'rb') 

984 else: 

985 fd = orig_filename # type: ignore[assignment] 

986 else: 

987 fd = filename 

988 if fd is sys.stdin: 

989 return 'json' 

990 

991 data = fd.read(PEEK_BYTES) 

992 if fd is not filename: 

993 fd.close() 

994 else: 

995 fd.seek(0) 

996 

997 if len(data) == 0: 

998 raise UnknownFileTypeError('Empty file: ' + filename) 

999 

1000 try: 

1001 return match_magic(data).name 

1002 except UnknownFileTypeError: 

1003 pass 

1004 

1005 format = None 

1006 if ext in extension2format: 

1007 format = extension2format[ext].name 

1008 

1009 if format is None and guess: 

1010 format = ext 

1011 if format is None: 

1012 # Do quick xyz check: 

1013 lines = data.splitlines() 

1014 if lines and lines[0].strip().isdigit(): 

1015 return extension2format['xyz'].name 

1016 

1017 raise UnknownFileTypeError('Could not guess file type') 

1018 assert isinstance(format, str) 

1019 return format 

1020 

1021 

1022def index2range(index, length): 

1023 """Convert slice or integer to range. 

1024 

1025 If index is an integer, range will contain only that integer.""" 

1026 obj = range(length)[index] 

1027 if isinstance(obj, numbers.Integral): 

1028 obj = range(obj, obj + 1) 

1029 return obj