Source code for sisl.io.siesta.stdout

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
from __future__ import annotations

import os
from collections import namedtuple
from dataclasses import dataclass, field
from functools import lru_cache
from typing import Literal, Optional

import numpy as np

import sisl._array as _a
from sisl import Atom, Atoms, Geometry, Lattice
from sisl._common import Opt
from sisl._help import voigt_matrix
from sisl._internal import set_module
from sisl.messages import deprecation, warn
from sisl.physics import Spin
from sisl.physics.brillouinzone import MonkhorstPack
from sisl.unit.siesta import unit_convert
from sisl.utils import PropertyDict
from sisl.utils.cmd import *

from .._multiple import SileBinder, postprocess_tuple
from ..sile import SileError, add_sile, sile_fh_open
from .sile import SileSiesta

__all__ = ["stdoutSileSiesta", "outSileSiesta"]


Bohr2Ang = unit_convert("Bohr", "Ang")


def _ensure_atoms(atoms):
    """Ensures that the atoms list is a list with entries (converts `None` to a list)."""
    if atoms is None:
        return [Atom(i) for i in range(150)]
    elif len(atoms) == 0:
        return [Atom(i) for i in range(150)]
    return atoms


def _parse_spin(attr, instance, match):
    """Parse 'redata: Spin configuration *= <value>'"""
    opt = match.string.split("=")[-1].strip()

    if opt.startswith("nambu"):
        return Spin("nambu")
    if opt.startswith("spin-orbit"):
        return Spin("spin-orbit")
    if opt.startswith("collinear") or opt.startswith("colinear"):
        return Spin("polarized")
    if opt.startswith("non-col"):
        return Spin("non-colinear")
    return Spin()


def _read_scf_empty(scf):
    if isinstance(scf, tuple):
        return len(scf[0]) == 0
    return len(scf) == 0


def _read_scf_md_process(scfs):

    if len(scfs) == 0:
        return None

    if not isinstance(scfs, list):
        # single MD request either as:
        #  - np.ndarray
        #  - np.ndarray, tuple
        #  - pd.DataFrame
        return scfs

    has_props = isinstance(scfs[0], tuple)
    if has_props:
        my_len = lambda scf: len(scf[0])
    else:
        my_len = len

    scf_len1 = np.all(_a.fromiterd(map(my_len, scfs)) == 1)
    if isinstance(scfs[0], (np.ndarray, tuple)):

        if has_props:
            props = scfs[0][1]
            scfs = [scf[0] for scf in scfs]

        if scf_len1:
            scfs = np.array(scfs)
        if has_props:
            return scfs, props
        return scfs

    # We are dealing with a dataframe
    import pandas as pd

    df = pd.concat(
        scfs,
        keys=_a.arangei(1, len(scfs) + 1),
        names=["imd"],
    )
    if scf_len1:
        df.reset_index("iscf", inplace=True)
    return df


def _parse_in_dynamics(attr, instance, match):
    """Determines whether we are in the dynamics section or in the *Final* section.

    Basically it returns ``not instance.info._in_final_analysis``.
    """
    return not instance.info._in_final_analysis()


def _parse_version(attr, instance, match):
    opt = match.string.split(":", maxsplit=1)[-1].strip()

    version, *spec = opt.split("-", maxsplit=1)
    try:
        version = tuple(int(v) for v in version.split("."))
    except Exception:
        version = (0, 0, 0)

    # Convert version to a tuple
    Version = namedtuple("Version", "version spec")

    version = Version(version, spec)

    return version


def _in_final(self):
    return self.fh.tell() >= self.info._in_final_analysis_tell


@dataclass
class Toggler:
    """Allows simpler toggling for when a key is found or not.

    When executing `toggle`, then they key is added if not present,
    and return False.
    If the key is present, it will reset the `keys` attribute, add
    the `key`, and return True (because it got re-initialized).
    """

    keys: set[str] = field(init=False, default_factory=set)

    def __bool__(self) -> bool:
        return len(self) > 0

    def __len__(self) -> int:
        return len(self.keys)

    def toggle(self, key: str) -> bool:
        """Toggle a key in the object. Return true if it was already present."""
        ret = False
        if key in self.keys:
            # easier to just reset it!
            self.keys = set()
            ret = True
        self.keys.add(key)
        return ret


@set_module("sisl.io.siesta")
class stdoutSileSiesta(SileSiesta):
    """Output file from Siesta

    This enables reading the output quantities from the Siesta output.
    """

    _info_attributes_ = [
        dict(
            name="version",
            searcher=r"^Version[ ]*: ",
            parser=_parse_version,
            not_found="warn",
        ),
        dict(
            name="na",
            searcher=r"^initatomlists: Number of atoms",
            parser=lambda attr, instance, match: int(match.string.split()[-3]),
            not_found="warn",
        ),
        dict(
            name="no",
            searcher=r"^initatomlists: Number of atoms",
            parser=lambda attr, instance, match: int(match.string.split()[-2]),
            not_found="warn",
        ),
        dict(
            name="nspecies",
            searcher=r"^redata: Number of Atomic Species",
            parser=lambda attr, instance, match: int(match.string.split("=")[-1]),
            not_found="warn",
        ),
        dict(
            name="completed",
            searcher=r".*Job completed",
            parser=lambda attr, instance, match: lambda: True,
            default=lambda: False,
            not_found="warn",
        ),
        dict(
            name="spin",
            searcher=r"^redata: Spin configuration",
            parser=_parse_spin,
        ),
        dict(
            name="_has_forces_in_dynamics",
            searcher=r"^siesta: Atomic forces",
            parser=_parse_in_dynamics,
        ),
        dict(
            name="_has_stress_in_dynamics",
            searcher=r"^siesta: Stress tensor",
            parser=_parse_in_dynamics,
        ),
        dict(
            name="_in_final_analysis_tell",
            searcher=r"^siesta: Final energy",
            parser=lambda attr, instance, match: instance.fh.tell(),
            default=1e12,
        ),
        dict(
            name="_in_final_analysis",
            searcher=None,
            default=_in_final,
            found=True,
        ),
    ]


[docs]
    @deprecation(
        "stdoutSileSiesta.completed is deprecated in favor of stdoutSileSiesta.info.completed",
        "0.15",
        "0.17",
    )
    def completed(self):
        """True if the full file has been read and "Job completed" was found."""
        return self.info.completed()



[docs]
    @lru_cache(1)
    @sile_fh_open(True)
    def read_basis(self) -> Atoms:
        """Reads the basis as found in the output file

        This parses 3 things:

        1. At the start of the file there are some initatom output
           specifying which species in the calculation.
        2. Reading the <basis_specs> entries for the masses
        3. Reading the PAO.Basis block output for orbital information
        """
        found, line = self.step_to("Species number:")
        if not found:
            return []

        atoms = {}
        order = []
        while "Species number:" in line:
            ls = line.split()
            if ls[3] == "Atomic":
                atoms[ls[7]] = {"Z": int(ls[5]), "tag": ls[7]}
                order.append(ls[7])
            else:
                atoms[ls[4]] = {"Z": int(ls[7]), "tag": ls[4]}
                order.append(ls[4])
            line = self.readline()

            # Now go down to basis_specs
        found, line = self.step_to("<basis_specs>")
        while found:
            # =====
            self.readline()
            # actual line
            line = self.readline().split("=")
            tag = line[0].split()[0]
            atoms[tag]["mass"] = float(line[2].split()[0])
            found, line = self.step_to("<basis_specs>", allow_reread=False)

        block = []
        found, line = self.step_to("%block PAO.Basis")
        line = self.readline()
        while not line.startswith("%endblock PAO.Basis"):
            block.append(line)
            line = self.readline()

        from .fdf import fdfSileSiesta

        atom_orbs = fdfSileSiesta._parse_pao_basis(block)
        for atom, orbs in atom_orbs.items():
            atoms[atom]["orbitals"] = orbs

        return Atoms([Atom(**atoms[tag]) for tag in order])


    def _r_lattice_outcell(self):
        """Wrapper for reading the unit-cell from the outcoor block"""

        # Read until outcell is found
        found, line = self.step_to("outcell: Unit cell vectors")
        if not found:
            raise ValueError(
                f"{self.__class__.__name__}._r_lattice_outcell did not find outcell key"
            )

        Ang = "Ang" in line

        # We read the unit-cell vectors (in Ang)
        cell = []
        line = self.readline()
        while len(line.strip()) > 0:
            line = line.split()
            cell.append([float(x) for x in line[:3]])
            line = self.readline()

        cell = _a.arrayd(cell)

        if not Ang:
            cell *= Bohr2Ang

        return Lattice(cell)

    def _r_geometry_outcoor(self, line, atoms=None):
        """Wrapper for reading the geometry as in the outcoor output"""
        atoms_order = _ensure_atoms(atoms)
        is_final = "Relaxed" in line or "Final (unrelaxed)" in line

        # Now we have outcoor
        scaled = "scaled" in line
        fractional = "fractional" in line
        Ang = "Ang" in line

        # Read in data
        xyz = []
        atoms = []
        line = self.readline()
        while len(line.strip()) > 0:
            line = line.split()
            if len(line) != 6:
                break
            xyz.append(line[:3])
            atoms.append(atoms_order[int(line[3]) - 1])
            line = self.readline()

        # in outcoor we know it is always just after
        # But not if not variable cell.
        # Retrieve the unit-cell (but do not skip file-descriptor position)
        # This is because the current unit-cell is not always written.
        pos = self.fh.tell()
        cell = self._r_lattice_outcell()
        if is_final and self.fh.tell() < pos:
            # we have wrapped around the file
            self.fh.seek(pos, os.SEEK_SET)
        xyz = _a.arrayd(xyz)

        # Now create the geometry
        if scaled:
            # The output file for siesta does not
            # contain the lattice constant.
            # So... :(
            raise ValueError(
                "Could not read the lattice-constant for the scaled geometry"
            )
        elif fractional:
            xyz = xyz.dot(cell.cell)
        elif not Ang:
            xyz *= Bohr2Ang

        return Geometry(xyz, atoms, lattice=cell)

    def _r_geometry_atomic(self, line, atoms=None):
        """Wrapper for reading the geometry as in the outcoor output"""
        atoms_order = _ensure_atoms(atoms)

        # Now we have outcoor
        Ang = "Ang" in line

        # Read in data
        xyz = []
        atoms = []
        line = self.readline()
        while len(line.strip()) > 0:
            line = line.split()
            xyz.append([float(x) for x in line[1:4]])
            atoms.append(atoms_order[int(line[4]) - 1])
            line = self.readline()

        # Retrieve the unit-cell (but do not skip file-descriptor position)
        # This is because the current unit-cell is not always written.
        pos = self.fh.tell()
        cell = self._r_lattice_outcell()
        self.fh.seek(pos, os.SEEK_SET)

        # Convert xyz
        xyz = _a.arrayd(xyz)
        if not Ang:
            xyz *= Bohr2Ang

        return Geometry(xyz, atoms, lattice=cell)


[docs]
    @SileBinder()
    @sile_fh_open()
    def read_geometry(self, skip_input: bool = True) -> Geometry:
        """Reads the geometry from the Siesta output file

        Parameters
        ----------
        skip_input :
            the input geometry may be contained as a print-out.
            This is not part of an MD calculation, and hence is by
            default not returned.

        Returns
        -------
        geometries: list or Geometry or None
             if all is False only one geometry will be returned (or None). Otherwise
             a list of geometries corresponding to the MD-runs.
        """
        atoms = self.read_basis()

        def func(*args, **kwargs):
            """Wrapper to return None"""
            return None

        while line := self.readline():
            if "outcoor" in line and "coordinates" in line:
                func = self._r_geometry_outcoor
                break
            elif "siesta: Atomic coordinates" in line and not skip_input:
                func = self._r_geometry_atomic
                break

        return func(line, atoms)



[docs]
    @SileBinder(postprocess=postprocess_tuple(_a.arrayd))
    @sile_fh_open()
    def read_force(
        self,
        total: bool = False,
        max: bool = False,
        key: Literal["siesta", "ts"] = "siesta",
        skip_final: Optional[bool] = None,
    ):
        """Reads the forces from the Siesta output file

        Parameters
        ----------
        total:
            return the total forces instead of the atomic forces.
        max:
            whether only the maximum atomic force should be returned for each step.

            Setting it to `True` is equivalent to `max(outSile.read_force())` in case atomic forces
            are written in the output file (`WriteForces .true.` in the fdf file)

            Note that this is not the same as doing `max(outSile.read_force(total=True))` since
            the forces returned in that case are averages on each axis.
        key:
            Specifies the indicator string for the forces that are to be read.
            The function will look for a line containing ``f'{key}: Atomic forces'``
            to start reading forces.
        skip_final:
            the final output of the forces is duplicated when the *final*
            output is written.
            By default, this method will return the final forces, but
            only **if** no other forces are found.
            If forces from dynamics are found, then the final forces
            will not be returned, unless explicitly requested through this flag.

        Returns
        -------
        numpy.ndarray or None
            returns ``None`` if the forces are not found in the
            output, otherwise forces will be returned

            The shape of the array will be different depending on the type of forces requested:
                - atomic (default): (nMDsteps, nAtoms, 3)
                - total: (nMDsteps, 3)
                - max: (nMDsteps, )

            If `total` and `max` are both `True`, they are returned separately as a tuple: ``(total, max)``
        """
        # Read until forces are found
        found, line = self.step_to(f"{key}: Atomic forces", allow_reread=False)
        if not found:
            return None

        if skip_final is None:
            # If we have final forces, default to skip the
            # final forces when we have forces in the dynamics
            # sections.
            skip_final = self.info._has_forces_in_dynamics

        # Now read data
        if skip_final and self.info._in_final_analysis():
            # This is the final summary, we don't need to read it as it does not contain new information
            # and also it make break things since max forces are not written there
            return None

        F = []
        # First, we encounter the atomic forces
        while line := self.readline():
            if "---" in line:
                break
            line = line.split()
            if not (total or max):
                F.append([float(x) for x in line[-3:]])

        if not F:
            F = None

        # Parse total forces if requested
        line = self.readline()
        if total and (line := line.split()):
            F = _a.arrayd([float(x) for x in line[-3:]])

        # And after that we can read the max force
        line = self.readline()
        if max and (line := self.readline().split()):
            maxF = _a.arrayd(float(line[1]))

            # In case total is also requested, we are going to
            # store it all in the same variable.
            # It will be separated later
            if total:
                # create tuple
                F = (F, maxF)
            else:
                F = maxF

        return F



[docs]
    @SileBinder(postprocess=_a.arrayd)
    @sile_fh_open()
    def read_stress(
        self,
        key: Literal["static", "total", "Voigt"] = "static",
        skip_final: Optional[bool] = None,
    ) -> np.ndarray:
        """Reads the stresses from the Siesta output file

        Parameters
        ----------
        key :
           which stress to read from the output.
        skip_final:
            the final output of the stress is duplicated when the *final*
            output is written.
            By default, this method will return the final stress, but
            only **if** no other stresses are found.
            If stresses from dynamics are found, then the final stress
            will not be returned, unless explicitly requested through this flag.

        Returns
        -------
        numpy.ndarray or None
            returns ``None`` if the stresses are not found in the
            output, otherwise stresses will be returned
        """

        is_voigt = key.lower() == "voigt"
        if is_voigt:
            key = "Voigt"
            search = "Stress tensor Voigt"
        else:
            search = "siesta: Stress tensor"

        found = False
        line = " "
        while not found and line != "":
            found, line = self.step_to(search, allow_reread=False)
            found = found and key in line

        if skip_final is None:
            # If we have final stress, default to skip the
            # final stress when we have stress in the dynamics
            # sections.
            skip_final = self.info._has_stress_in_dynamics

        if skip_final and self.info._in_final_analysis():
            # we are in the final section, and don't want to do
            # anything
            return None

        if not found:
            return None

        # Now read data
        if is_voigt:
            Svoigt = _a.arrayd([float(x) for x in line.split()[-6:]])
            S = voigt_matrix(Svoigt, False)
        else:
            S = []
            for _ in range(3):
                line = self.readline().split()
                S.append([float(x) for x in line[-3:]])
            S = _a.arrayd(S)

        return S



[docs]
    @SileBinder(postprocess=_a.arrayd)
    @sile_fh_open()
    def read_moment(
        self, orbitals: bool = False, quantity: Literal["S", "L"] = "S"
    ) -> np.ndarray:
        """Reads the moments from the Siesta output file

        These will only be present in case of spin-orbit coupling.

        Parameters
        ----------
        orbitals:
           return a table with orbitally resolved
           moments.
        quantity:
           return the spin-moments or the L moments
        """
        # Read until outcoor is found
        if not self.step_to("moments: Atomic", allow_reread=False)[0]:
            return None

        # The moments are printed in SPECIES list
        itt = iter(self)
        next(itt)  # empty
        next(itt)  # empty

        na = 0
        # Loop the species
        tbl = []
        # Read the species label
        while True:
            next(itt)  # ""
            next(itt)  # Atom    Orb ...

            # Loop atoms in this species list
            while True:
                line = next(itt)
                if line.startswith("Species") or line.startswith("--"):
                    break
                line = " "
                atom = []
                ia = 0
                while not line.startswith("--"):
                    line = next(itt).split()
                    if ia == 0:
                        ia = int(line[0])
                    elif ia != int(line[0]):
                        raise ValueError("Error in moments formatting.")

                    # Track maximum number of atoms
                    na = max(ia, na)
                    if quantity == "S":
                        atom.append([float(x) for x in line[4:7]])
                    elif quantity == "L":
                        atom.append([float(x) for x in line[7:10]])

                line = next(itt).split()  # Total ...
                if not orbitals:
                    ia = int(line[0])
                    if quantity == "S":
                        atom.append([float(x) for x in line[4:7]])
                    elif quantity == "L":
                        atom.append([float(x) for x in line[8:11]])
                tbl.append((ia, atom))
            if line.startswith("--"):
                break

        # Sort according to the atomic index
        moments = [] * na

        # Insert in the correct atomic
        for ia, atom in tbl:
            moments[ia - 1] = atom

        return _a.arrayd(moments)



[docs]
    @sile_fh_open(True)
    def read_energy(self) -> PropertyDict:
        """Reads the final energy distribution

        Currently the energies translated are:

        ``band``
             band structure energy
        ``kinetic``
             electronic kinetic energy
        ``hartree``
             electronic electrostatic Hartree energy
        ``dftu``
             DFT+U energy
        ``spin_orbit``
             spin-orbit energy
        ``extE``
             external field energy
        ``xc``
             exchange-correlation energy
        ``exchange``
             exchange energy
        ``correlation``
             correlation energy
        ``bulkV``
             bulk-bias correction energy
        ``total``
             total energy
        ``negf``
             NEGF energy
        ``fermi``
             Fermi energy
        ``ion.electron``
             ion-electron interaction energy
        ``ion.ion``
             ion-ion interaction energy
        ``ion.kinetic``
             kinetic ion energy
        ``basis.enthalpy``
             enthalpy of basis sets, Free + p_basis*V_orbitals


        Any unrecognized key gets added *as is*.

        Examples
        --------
        >>> energies = sisl.get_sile("RUN.out").read_energy()
        >>> ion_energies = energies.ion
        >>> ion_energies.ion # ion-ion interaction energy
        >>> ion_energies.kinetic # ion kinetic energy
        >>> energies.fermi # fermi energy

        Returns
        -------
        PropertyDict : dictionary like lookup table ionic energies are stored in a nested `PropertyDict` at the key ``ion`` (all energies in eV)
        """
        found = self.step_to("siesta: Final energy", allow_reread=False)[0]
        out = PropertyDict()

        if not found:
            return out
        itt = iter(self)

        # Read data
        line = next(itt)
        name_conv = {
            "Band Struct.": "band",
            "Kinetic": "kinetic",
            "Hartree": "hartree",
            "Edftu": "dftu",
            "Eldau": "dftu",
            "Eso": "spin_orbit",
            "Ext. field": "extE",
            "Exch.-corr.": "xc",
            "Exch.": "exchange",
            "Corr.": "correlation",
            "Ekinion": "ion.kinetic",
            "Ion-electron": "ion.electron",
            "Ion-ion": "ion.ion",
            "Bulk bias": "bulkV",
            "Total": "total",
            "Fermi": "fermi",
            "Enegf": "negf",
            "(Free)E+ p_basis*V_orbitals": "basis.enthalpy",
            "(Free)E + p_basis*V_orbitals": "basis.enthalpy",  # we may correct the missing space
        }

        def assign(out, key, val):
            key = name_conv.get(key, key)
            try:
                val = float(val)
            except ValueError:
                warn(
                    f"Could not convert energy '{key}' ({val.strip()}) to a float, assigning nan."
                )
                val = np.nan

            if "." in key:
                loc, key = key.split(".")
                if not hasattr(out, loc):
                    out[loc] = PropertyDict()
                loc = out[loc]
            else:
                loc = out
            loc[key] = val

        while len(line.strip()) > 0:
            key, val = line.split("=")
            key = key.split(":")[1].strip()
            assign(out, key, val.split()[0])
            line = next(itt)

        # now skip to the pressure
        found, line = self.step_to(
            ["(Free)E + p_basis*V_orbitals", "(Free)E+ p_basis*V_orbitals"],
            allow_reread=False,
        )
        if found:
            key, val = line.split("=")
            assign(out, key.strip(), val.split()[0])

        return out



[docs]
    @SileBinder()
    @sile_fh_open()
    def read_brillouinzone(self, trs: bool = True) -> MonkhorstPack:
        r"""Parses the k-grid section if present, otherwise returns the
        :math:`\Gamma`-point"""

        # store position, read geometry, then reposition file.
        tell = self.fh.tell()
        geom = self.read_geometry()
        self.fh.seek(tell)

        found, line = self.step_to(
            "siesta: k-grid: Number of k-points", allow_reread=False
        )

        if not found:
            return MonkhorstPack(geom, 1, trs=trs)

        nk = int(line.split("=")[-1])

        # default kcell and kdispl
        kcell = np.diag([1, 1, 1])
        kdispl = np.zeros(3)

        # if next line also contains 'siesta: k-grid:' then we can step_to
        line = self.readline()
        if line.startswith("siesta: k-grid:"):
            if self.step_to(
                "siesta: k-grid: Supercell and displacements", allow_reread=False
            )[0]:
                for i in range(3):
                    line = self.readline().split()
                    kdispl[i] = float(line[-1])
                    kcell[i, 2] = int(line[-2])
                    kcell[i, 1] = int(line[-3])
                    kcell[i, 0] = int(line[-4])

        return MonkhorstPack(geom, kcell, displacement=kdispl, trs=trs)



[docs]
    def read_data(self, *args, **kwargs) -> Any:
        """Read specific content in the Siesta out file

        The currently implemented things are denoted in
        the parameters list.
        Note that the returned quantities are in the order
        of keywords, so:

        >>> read_data(geometry=True, force=True)
        <geometry>, <force>
        >>> read_data(force=True, geometry=True)
        <force>, <geometry>

        Parameters
        ----------
        geometry: bool, optional
           read geometry, args are passed to `read_geometry`
        force: bool, optional
           read force, args are passed to `read_force`
        stress: bool, optional
           read stress, args are passed to `read_stress`
        moment: bool, optional
           read moment, args are passed to `read_moment` (only for spin-orbit calculations)
        energy: bool, optional
           read final energies, args are passed to `read_energy`
        """
        run = []
        # This loops ensures that we preserve the order of arguments
        # From Py3.6 and onwards the **kwargs is an OrderedDictionary
        for kw in kwargs.keys():
            if kw in ("geometry", "force", "moment", "stress", "energy"):
                if kwargs[kw]:
                    run.append(kw)

        # Clean running names
        for name in run:
            kwargs.pop(name)

        slice = kwargs.pop("slice", None)
        val = []
        for name in run:
            if slice is None:
                val.append(getattr(self, f"read_{name.lower()}")(*args, **kwargs))
            else:
                val.append(
                    getattr(self, f"read_{name.lower()}")[slice](*args, **kwargs)
                )

        if len(val) == 0:
            return None
        elif len(val) == 1:
            val = val[0]
        return val



[docs]
    @SileBinder(
        default_slice=-1, check_empty=_read_scf_empty, postprocess=_read_scf_md_process
    )
    @sile_fh_open()
    def read_scf(
        self,
        key: Literal["scf", "ts-scf"] = "scf",
        iscf: Optional[Union[int, Ellipsis]] = -1,
        as_dataframe: bool = False,
        ret_header: bool = False,
    ):
        r"""Parse SCF information and return a table of SCF information depending on what is requested

        Parameters
        ----------
        key :
            parse SCF information from Siesta SCF or TranSiesta SCF
        iscf :
            which SCF cycle should be stored. If ``-1`` only the final SCF step is stored,
            for `...`/`None` *all* SCF cycles are returned. When `iscf` values queried are not found they
            will be truncated to the nearest SCF step.
        as_dataframe:
            whether the information should be returned as a `pandas.DataFrame`. The advantage of this
            format is that everything is indexed and therefore you know what each value means.You can also
            perform operations very easily on a dataframe.
        ret_header:
            whether to also return the headers that define each value in the returned array,
            will have no effect if `as_dataframe` is true.
        """

        # These are the properties that are written in SIESTA scf
        if iscf is None:
            iscf = Ellipsis
        elif iscf is not Ellipsis:
            if iscf == 0:
                raise ValueError(
                    f"{self.__class__.__name__}.read_scf requires iscf argument to *not* be 0!"
                )

        def reset_d(d, line):
            """Determine if the SCF is done, and signal what it found."""
            if line.startswith("SCF cycle converged") or line.startswith(
                "SCF_NOT_CONV"
            ):
                if d["_toggle"]:  # means it did find some data
                    d["_final_iscf"] = 1

            elif line.startswith("SCF cycle continued"):
                d["_final_iscf"] = 0

        def construct_data(d):
            """Concatenate the data from the `order` and keys"""
            try:
                data = []
                for key in d["order"]:
                    data.extend(d[key])
                    del d[key]
                return data
            except KeyError:
                return None

        def do_finalize(d, key):
            """Check whether we are ready to return data (a similar key is found)."""
            if d["_toggle"].toggle(key):
                return construct_data(d)
            return None

        def add_order(d, key, prop=None):
            if prop is None:
                prop = key

            if key not in d["order"]:
                d["order"].append(key)
                if isinstance(prop, str):
                    d["props"].append(prop)
                else:
                    d["props"].extend(prop)

        def p_scf_fix_spin(line):
            assert len(line) == 97
            data = [
                int(line[5:9]),
                float(line[9:25]),
                float(line[25:41]),
                float(line[41:57]),
                float(line[57:67]),
                float(line[67:77]),
                float(line[77:87]),
                float(line[87:97]),
            ]
            return data

        def p_scf(line):
            assert len(line) == 87
            data = [
                int(line[5:9]),
                float(line[9:25]),
                float(line[25:41]),
                float(line[41:57]),
                float(line[57:67]),
                float(line[67:77]),
                float(line[77:87]),
            ]
            return data

        def p_ts_scf_fix_spin(line):
            assert len(line) == 100
            data = [
                int(line[8:12]),
                float(line[12:28]),
                float(line[28:44]),
                float(line[44:60]),
                float(line[60:70]),
                float(line[70:80]),
                float(line[80:90]),
                float(line[90:100]),
            ]
            return data

        def p_ts_scf(line):
            assert len(line) == 90
            data = [
                int(line[8:12]),
                float(line[12:28]),
                float(line[28:44]),
                float(line[44:60]),
                float(line[60:70]),
                float(line[70:80]),
                float(line[80:90]),
            ]
            return data

        def p_default(line):
            # Populate DATA by splitting
            data = line.split()
            data = [int(data[1])] + list(map(float, data[2:]))
            return data

        # For getting the parser according to len(line)
        scf_parser = {}
        scf_parser[87] = p_scf
        scf_parser[97] = p_scf_fix_spin
        scf_parser[90] = p_ts_scf
        scf_parser[100] = p_ts_scf_fix_spin

        def parse_next(line, d):
            nonlocal key, scf_parser, p_default
            line = line.strip().replace("*", "0")
            reset_d(d, line)
            ret = None

            # Start parsing based on keys
            if line.startswith("ts-Vha:"):
                ret = do_finalize(d, "ts-Vha")
                d["ts-Vha"] = [float(line.split()[1])]
                add_order(d, "ts-Vha")
            elif line.startswith("spin moment: S"):
                ret = do_finalize(d, "S")
                # 4.1 and earlier
                d["S"] = list(map(float, line.split("=")[1].split()[1:]))
                add_order(d, "S", ["Sx", "Sy", "Sz"])
            elif line.startswith("spin moment: {S}"):
                ret = do_finalize(d, "S")
                # 4.2 and later
                d["S"] = list(map(float, line.split("= {")[1].split()[:3]))
                add_order(d, "S", ["Sx", "Sy", "Sz"])
            elif line.startswith("bulk-bias: |v"):
                ret = do_finalize(d, "bb-v")
                # TODO old version should be removed once released
                d["bb-v"] = list(map(float, line.split()[-3:]))
                add_order(d, "bb-v", ["BB-vx", "BB-vy", "BB-vz"])
            elif line.startswith("bulk-bias: {v}"):
                idx = line.index("{v}")
                if line[idx + 3] == "_":
                    # we are in a subset
                    lbl = f"BB-{line[idx + 4:idx + 6]}"
                else:
                    lbl = "BB"

                v = line.split("] {")[1].split()
                v = list(map(float, v[:3]))
                ret = do_finalize(d, lbl)
                d[lbl] = v
                add_order(d, lbl, [f"{lbl}-vx", f"{lbl}-vy", f"{lbl}-vz"])
            elif line.startswith("bulk-bias: dq"):
                ret = do_finalize(d, "BB-q")
                d["BB-q"] = list(map(float, line.split()[-2:]))
                add_order(d, "BB-q", ["BB-dq", "BB-q0"])
            elif line.startswith("ts-q:"):
                ret = do_finalize(d, "ts-q")
                # even for key == scf, this won't be there.
                # So it will just fail...
                data = line.split()[1:]
                try:
                    d["ts-q"] = list(map(float, data))
                except Exception:
                    # We are probably reading a device list
                    add_order(d, "ts-q", data)

            elif line.startswith(key):
                ret = do_finalize(d, "scf")
                parser = scf_parser.get(len(line), p_default)
                d["scf"] = parser(line)
                # we don't need to add_order here, because it starts with scf

            return ret

        # A temporary dictionary to hold information while reading the output file
        d = {
            "_final_iscf": 0,
            "_toggle": Toggler(),
            # The properties defaults to these keys
            # TODO fix for fix-spin calculations (Efup/Efdown)
            "props": ["iscf", "Eharris", "E_KS", "FreeEng", "dDmax", "Ef", "dHmax"],
            # Default the order, start with SCF
            "order": ["scf"],
        }

        # Jump to the `iscf` which is uniquely found for start of SCF cycles.
        _, _ = self.step_to("iscf", allow_reread=False)
        scf = []
        while line := self.readline():
            data = parse_next(line, d)
            if data is None and d["_final_iscf"] == 2:
                # Catch the case where this is the final
                # case, so we can add the data to the list of SCF segments
                data = construct_data(d)

            if data is not None:
                # we have found a new key
                if iscf is Ellipsis or iscf < 0:
                    scf.append(data)

                elif data[0] <= iscf:
                    # this ensures we will retain the latest iscf in
                    # case the requested iscf is too big
                    scf = data

            if d["_final_iscf"] == 1:
                # step to next line!
                d["_final_iscf"] = 2

            elif d["_final_iscf"] == 2:
                # The line after SCF cycle converged
                # Reset, for MD reads
                d["_final_iscf"] = 0

                if len(scf) == 0:
                    # this traps cases where final_iscf has
                    # been trickered but we haven't collected anything.
                    # I.e. if key == scf but ts-scf also exists.
                    continue

                # First figure out which iscf we should store
                if iscf is Ellipsis:  # or iscf > 0
                    # scf is correct
                    pass
                elif iscf < 0:
                    # truncate to 0
                    scf = scf[max(len(scf) + iscf, 0)]

                # found a full MD
                break

        # Define the function that is going to convert the information of a MDstep to a Dataset
        if as_dataframe:
            import pandas as pd

            # Easier
            props = d["props"][1:]

            if len(scf) == 0:
                return pd.DataFrame(index=pd.Index([], name="iscf"), columns=props)

            scf = np.atleast_2d(scf)
            return pd.DataFrame(
                scf[..., 1:],
                index=pd.Index(scf[..., 0].ravel().astype(np.int32), name="iscf"),
                columns=props,
            )

        # Convert to numpy array
        scf = np.array(scf)
        if ret_header:
            return scf, d["props"]
        return scf



[docs]
    @sile_fh_open(True)
    def read_charge(
        self,
        name: Literal["voronoi", "hirshfeld", "mulliken", "mulliken:<5.2"],
        iscf: Union[Opt, int, Ellipsis] = Opt.ANY,
        imd: Union[Opt, int, Ellipsis] = Opt.ANY,
        key_scf: str = "scf",
        as_dataframe: bool = False,
    ):
        r"""Read charges calculated in SCF loop or MD loop (or both)

        Siesta enables many different modes of writing out charges.

        The below table shows a list of different cases that
        may be encountered, the letters are referred to in the
        return section to indicate what is returned.

        +-----------+-----+-----+--------+-------+------------------+
        | Case      | *A* | *B* | *C*    | *D*   | *E*              |
        +-----------+-----+-----+--------+-------+------------------+
        | Charge    | MD  | SCF | MD+SCF | Final | Orbital resolved |
        +-----------+-----+-----+--------+-------+------------------+
        | Voronoi   | +   | +   | +      | +     | -                |
        +-----------+-----+-----+--------+-------+------------------+
        | Hirshfeld | +   | +   | +      | +     | -                |
        +-----------+-----+-----+--------+-------+------------------+
        | Mulliken  | +   | +   | +      | +     | -                |
        |   (>=5.2) |     |     |        |       |                  |
        +-----------+-----+-----+--------+-------+------------------+
        | Mulliken  | +   | +   | +      | +     | (+)              |
        |     <5.2  |     |     |        |       |                  |
        +-----------+-----+-----+--------+-------+------------------+

        Notes
        -----
        Errors will be raised if one requests information not present. I.e.
        passing an integer or `Opt.ALL` for `iscf` will raise an error if
        the SCF charges are not present. For `Opt.ANY` it will return
        the most information, effectively SCF will be returned if present.

        Currently orbitally-resolved Mulliken is not implemented, any help in
        reading this would be very welcome.

        Parameters
        ----------
        name:
            the name of the charges that you want to read
        iscf:
            index (0-based) of the scf iteration you want the charges for.
            If the enum specifier `Opt.ANY` or `Opt.ALL`/`...` are used, then
            the returned quantities depend on what is present.
            If ``None/Opt.NONE`` it will not return any SCF charges.
            If both `imd` and `iscf` are ``None`` then only the final charges will be returned.
        imd:
            index (0-based) of the md step you want the charges for.
            If the enum specifier `Opt.ANY` or `Opt.ALL`/`...` are used, then
            the returned quantities depend on what is present.
            If ``None/Opt.NONE`` it will not return any MD charges.
            If both `imd` and `iscf` are ``None`` then only the final charges will be returned.
        key_scf :
            the key lookup for the scf iterations (a ":" will automatically be appended)
        as_dataframe:
            whether charges should be returned as a pandas dataframe.

        Returns
        -------
        numpy.ndarray
            if a specific MD+SCF index is requested (or special cases where output is
            not complete)
        list of numpy.ndarray
            if `iscf` or `imd` is different from ``None/Opt.NONE``.
        pandas.DataFrame
            if `as_dataframe` is requested. The dataframe will have multi-indices if multiple
            SCF or MD steps are requested.
        """
        namel = name.lower()
        if as_dataframe:
            import pandas as pd

            def _empty_charge():
                # build a fake dataframe with no indices
                return pd.DataFrame(
                    index=pd.Index([], name="atom", dtype=np.int32), dtype=np.float32
                )

        else:
            pd = None

            def _empty_charge():
                # return for single value with nan values
                return _a.arrayf([[None]])

        # define helper function for reading voronoi+hirshfeld charges
        def _charges():
            """Read output from Voronoi/Hirshfeld/Mulliken charges"""
            nonlocal pd

            # Expecting something like this (NC/SOC)
            # Voronoi Atomic Populations:
            # Atom #     dQatom  Atom pop         S        Sx        Sy        Sz  Species
            #      1   -0.02936   4.02936   0.00000  -0.00000   0.00000   0.00000  C
            # or (polarized)
            # Voronoi Atomic Populations:
            # Atom #     dQatom  Atom pop        Sz  Species
            #      1   -0.02936   4.02936   0.00000  C
            # In 5.2 this now looks like this:
            # Voronoi Atomic Populations:
            # Atom #  charge [q]  valence [e]        Sz  Species
            #      1    -0.02936      4.02936   0.00000  C

            # first line is the header
            header = (
                self.readline()
                .replace("charge [q]", "dq")  # charge [q] in 5.2
                .replace("valence [e]", "e")  # in 5.2
                .replace("dQatom", "dq")  # dQatom in 5.0
                .replace(" Qatom", " dq")  # Qatom in 4.1
                .replace("Atom pop", "e")  # not found in 4.1
                .split()
            )[2:-1]

            # Define the function that parses the charges
            def _parse_charge(line):
                atom_idx, *vals, symbol = line.split()
                # assert that this is a proper line
                # this should catch cases where the following line of charge output
                # is still parseable
                # atom_idx = int(atom_idx)
                return list(map(float, vals))

            # We have found the header, prepare a list to read the charges
            atom_charges = []
            while (line := self.readline()) != "":
                try:
                    charge_vals = _parse_charge(line)
                    atom_charges.append(charge_vals)
                except Exception:
                    # We already have the charge values and we reached a line that can't be parsed,
                    # this means we have reached the end.
                    break

            if pd is None:
                # not as_dataframe
                return _a.arrayf(atom_charges)

            # determine how many columns we have
            # this will remove atom indices and species, so only inside
            ncols = len(atom_charges[0])
            assert ncols == len(header)

            # the precision is limited, so no need for double precision
            return pd.DataFrame(
                atom_charges,
                columns=header,
                dtype=np.float32,
                index=pd.RangeIndex(stop=len(atom_charges), name="atom"),
            )

        # define helper function for reading voronoi+hirshfeld charges
        def _mulliken_charges_pre52():
            """Read output from Mulliken charges"""
            nonlocal pd

            # Expecting something like this (NC/SOC)
            # mulliken: Atomic and Orbital Populations:

            # Species: Cl

            # Atom      Orb        Charge      Spin       Svec
            # ----------------------------------------------------------------
            #     1  1 3s         1.75133   0.00566      0.004   0.004  -0.000
            #     1  2 3s         0.09813   0.00658     -0.005  -0.005   0.000
            #     1  3 3py        1.69790   0.21531     -0.161  -0.142   0.018
            #     1  4 3pz        1.72632   0.15770     -0.086  -0.132  -0.008
            #     1  5 3px        1.81369   0.01618     -0.004   0.015  -0.004
            #     1  6 3py       -0.04663   0.02356     -0.017  -0.016  -0.000
            #     1  7 3pz       -0.04167   0.01560     -0.011  -0.011   0.001
            #     1  8 3px       -0.02977   0.00920     -0.006  -0.007   0.000
            #     1  9 3Pdxy      0.00595   0.00054     -0.000  -0.000  -0.000
            #     1 10 3Pdyz      0.00483   0.00073     -0.001  -0.001  -0.000
            #     1 11 3Pdz2      0.00515   0.00098     -0.001  -0.001  -0.000
            #     1 12 3Pdxz      0.00604   0.00039     -0.000  -0.000   0.000
            #     1 13 3Pdx2-y2   0.00607   0.00099     -0.001  -0.001   0.000
            #     1     Total     6.99733   0.41305     -0.288  -0.296   0.007

            # Define the function that parses the charges
            def _parse_charge_total_nc(line):  # non-colinear and soc
                atom_idx, _, *vals = line.split()
                # assert that this is a proper line
                # this should catch cases where the following line of charge output
                # is still parseable
                # atom_idx = int(atom_idx)
                return int(atom_idx), list(map(float, vals))

            def _parse_charge_total(line):  # unpolarized and colinear spin
                atom_idx, val, *_ = line.split()
                return int(atom_idx), float(val)

            # Define the function that parses a single species
            def _parse_species_nc():  # non-colinear and soc
                nonlocal header, atom_idx, atom_charges

                # The mulliken charges are organized per species where the charges
                # for each species are enclosed by dashes (-----)

                # Step to header
                _, line = self.step_to("Atom", allow_reread=False)
                if header is None:
                    header = (
                        line.replace("Charge", "e")
                        .replace("Spin", "S")
                        .replace(
                            "Svec", "Sx Sy Sz"
                        )  # Split Svec into Cartesian components
                        .split()
                    )[2:]

                # Skip over the starting ---- line
                self.readline()

                # Read until closing ---- line
                while "----" not in (line := self.readline()):
                    if "Total" in line:
                        ia, charge_vals = _parse_charge_total_nc(line)
                        atom_idx.append(ia)
                        atom_charges.append(charge_vals)

            def _parse_spin_pol():  # unpolarized and colinear spin
                nonlocal atom_idx, atom_charges

                # The mulliken charges are organized per spin
                # polarization (UP/DOWN). The end of a spin
                # block is marked by a Qtot

                # Read until we encounter "mulliken: Qtot"
                def try_parse_int(s):
                    try:
                        int(s)
                    except ValueError:
                        return False
                    else:
                        return True

                while "mulliken: Qtot" not in (line := self.readline()):
                    words = line.split()
                    if len(words) > 0 and try_parse_int(words[0]):
                        # This should be a line containing the total charge for an atom
                        ia, charge = _parse_charge_total(line)
                        atom_idx.append(ia)
                        atom_charges.append([charge])

            # Determine with which spin type we are dealing
            if self.info.spin.is_unpolarized:  # UNPOLARIZED
                # No spin components so just parse charge
                atom_charges = []
                atom_idx = []
                header = ["e"]
                _parse_spin_pol()

            elif self.info.spin.is_polarized:
                # Parse both spin polarizations
                atom_charges_pol = []
                header = ["e", "Sz"]
                for s in ("UP", "DOWN"):
                    atom_charges = []
                    atom_idx = []
                    self.step_to(f"mulliken: Spin {s}", allow_reread=False)
                    _parse_spin_pol()
                    atom_charges_pol.append(atom_charges)

                # Compute the charge and spin of each atom
                atom_charges_pol_array = _a.arrayf(atom_charges_pol)
                atom_q = (
                    atom_charges_pol_array[0, :, 0] + atom_charges_pol_array[1, :, 0]
                )
                atom_s = (
                    atom_charges_pol_array[0, :, 0] - atom_charges_pol_array[1, :, 0]
                )
                atom_charges[:] = np.stack((atom_q, atom_s), axis=-1)

            elif not self.info.spin.is_diagonal:
                # Parse each species
                atom_charges = []
                atom_idx = []
                header = None
                for _ in range(self.info.nspecies):
                    found, _ = self.step_to("Species:", allow_reread=False)
                    _parse_species_nc()

            else:
                raise NotImplementedError(
                    "Something went wrong... Couldn't parse file."
                )

            # Convert to array and sort in the order of the atoms
            sort_idx = np.argsort(atom_idx)
            atom_charges_array = _a.arrayf(atom_charges)[sort_idx]

            if pd is None:
                # not as_dataframe
                return atom_charges_array

            # determine how many columns we have
            # this will remove atom indices and species, so only inside
            ncols = len(atom_charges[0])
            assert ncols == len(header)

            # the precision is limited, so no need for double precision
            return pd.DataFrame(
                atom_charges_array,
                columns=header,
                dtype=np.float32,
                index=pd.RangeIndex(stop=len(atom_charges), name="atom"),
            )

        # split method to retrieve options
        namel, *opts = namel.split(":")

        # Check that a known charge has been requested
        if namel == "voronoi":
            _r_charge = _charges
            charge_keys = [
                "Voronoi Atomic Populations",
                "Voronoi Net Atomic Populations",
            ]
        elif namel == "hirshfeld":
            _r_charge = _charges
            charge_keys = [
                "Hirshfeld Atomic Populations",
                "Hirshfeld Net Atomic Populations",
            ]
        elif namel == "mulliken":
            _r_charge = _mulliken_charges_pre52
            charge_keys = ["mulliken: Atomic and Orbital Populations"]
            if "<5.2" in opts:
                pass

            else:
                # check if version is understandable
                version = self.info.version
                try:
                    if version.version[:2] >= (5, 2):
                        _r_charge = _charges
                        charge_keys = [
                            "Mulliken Atomic Populations",
                            "Mulliken Net Atomic Populations",
                        ]
                except Exception:
                    pass

        else:
            raise ValueError(
                f"{self.__class__.__name__}.read_charge name argument should be one of [voronoi, hirshfeld, mulliken], got {name}?"
            )

        # Ensure the key_scf matches exactly (prepend a space)
        key_scf = f" {key_scf.strip()}:"

        # Reading charges may be quite time consuming for large MD simulations.

        # to see if we finished a MD read, we check for these keys
        search_keys = [
            # two keys can signal ending SCF
            "SCF Convergence",
            "SCF_NOT_CONV",
            "siesta: Final energy",
            key_scf,
            *charge_keys,
        ]

        # adjust the below while loop to take into account any additional
        # segments of search_keys
        IDX_SCF_END = [0, 1]
        IDX_FINAL = [2]
        IDX_SCF = [3]
        # the rest are charge keys
        IDX_CHARGE = list(range(len(search_keys) - len(charge_keys), len(search_keys)))

        # state to figure out where we are
        state = PropertyDict()
        state.INITIAL = 0
        state.MD = 1
        state.SCF = 2
        state.CHARGE = 3
        state.FINAL = 4

        # a list of scf_charge
        md_charge = []
        md_scf_charge = []
        scf_charge = []
        final_charge = None

        # signal that any first reads are INITIAL charges
        current_state = state.INITIAL
        charge = _empty_charge()
        FOUND_SCF = False
        FOUND_MD = False
        FOUND_FINAL = False

        while (
            ret := self.step_to(
                search_keys, case=True, ret_index=True, allow_reread=False
            )
        )[0]:
            if ret[2] in IDX_SCF_END:
                # we finished all SCF iterations
                current_state = state.MD
                md_scf_charge.append(scf_charge)
                scf_charge = []

            elif ret[2] in IDX_SCF:
                current_state = state.SCF
                # collect scf-charges (possibly none)
                scf_charge.append(charge)

            elif ret[2] in IDX_FINAL:
                current_state = state.FINAL
                # don't do anything, this is the final charge construct
                # regardless of where it comes from.

            elif ret[2] in IDX_CHARGE:
                FOUND_CHARGE = True
                # also read charge
                charge = _r_charge()

                if state.INITIAL == current_state or state.CHARGE == current_state:
                    # this signals scf charges
                    FOUND_SCF = True
                    # There *could* be 2 steps if we are mixing H,
                    # this is because it first does
                    # compute H -> compute DM -> compute H
                    # in the first iteration, subsequently we only do
                    # compute compute DM -> compute H
                    # once we hit ret[2] in IDX_SCF we will append
                    scf_charge = []

                elif state.MD == current_state:
                    FOUND_MD = True
                    # we just finished an SCF cycle.
                    # So any output between SCF ending and
                    # a new one beginning *must* be that geometries
                    # charge

                    # Here `charge` may be NONE signalling
                    # we don't have charge in MD steps.
                    md_charge.append(charge)

                    # reset charge
                    charge = _empty_charge()

                elif state.SCF == current_state:
                    FOUND_SCF = True

                elif state.FINAL == current_state:
                    FOUND_FINAL = True
                    # a special state writing out the charges after everything
                    final_charge = charge
                    charge = _empty_charge()
                    scf_charge = []
                    # we should be done and no other charge reads should be found!
                    # should we just break?

                current_state = state.CHARGE

        if not any((FOUND_SCF, FOUND_MD, FOUND_FINAL)):
            raise SileError(f"{self!s} does not contain any charges ({name})")

        # if the scf-charges are not stored, it means that the MD step finalization
        # has not been read. So correct
        if len(scf_charge) > 0:
            assert False, "this test shouldn't reach here"
            # we must not have read through the entire MD step
            # so this has to be a running simulation
            if charge is not None:
                scf_charge.append(charge)
                charge = _empty_charge()
            md_scf_charge.append(scf_charge)

        # otherwise there is some *parsing* error, so for now we use assert
        assert len(scf_charge) == 0

        if as_dataframe:
            # convert data to proper data structures
            # regardless of user requests. This is an overhead... But probably not that big of a problem.
            if FOUND_SCF:
                md_scf_charge = pd.concat(
                    [
                        pd.concat(
                            iscf_, keys=pd.RangeIndex(1, len(iscf_) + 1, name="iscf")
                        )
                        for iscf_ in md_scf_charge
                    ],
                    keys=pd.RangeIndex(1, len(md_scf_charge) + 1, name="imd"),
                )
            if FOUND_MD:
                md_charge = pd.concat(
                    md_charge, keys=pd.RangeIndex(1, len(md_charge) + 1, name="imd")
                )
        else:
            if FOUND_SCF:
                nan_array = _a.emptyf(md_scf_charge[0][0].shape)
                nan_array.fill(np.nan)

                def get_md_scf_charge(scf_charge, iscf):
                    try:
                        return scf_charge[iscf]
                    except Exception:
                        return nan_array

            if FOUND_MD:
                md_charge = np.stack(md_charge)

        # option parsing is a bit *difficult* with flag enums
        # So first figure out what is there, and handle this based
        # on arguments
        def _p(flag, found):
            """Helper routine to do the following:

            Returns
            -------
            is_opt : bool
                whether the flag is an `Opt`
            flag :
                corrected flag
            """
            if flag is Ellipsis:
                flag = Opt.ALL

            if isinstance(flag, Opt):
                # correct flag depending on what `found` is
                # If the values have been found we
                # change flag to None only if flag == NONE
                # If the case has not been found, we
                # change flag to None if ANY or NONE is in flags

                if found:
                    # flag is only NONE, then pass none
                    if not (Opt.NONE ^ flag):
                        flag = None
                else:  # not found
                    # we convert flag to none
                    # if ANY or NONE in flag
                    if (Opt.NONE | Opt.ANY) & flag:
                        flag = None

            return isinstance(flag, Opt), flag

        opt_imd, imd = _p(imd, FOUND_MD)
        opt_iscf, iscf = _p(iscf, FOUND_SCF)

        if not (FOUND_SCF or FOUND_MD):
            # none of these are found
            # we request that user does not request any input
            if (opt_iscf or (not iscf is None)) or (opt_imd or (not imd is None)):
                raise SileError(f"{self!s} does not contain MD/SCF charges")

        elif not FOUND_SCF:
            if opt_iscf or (not iscf is None):
                raise SileError(f"{self!s} does not contain SCF charges")

        elif not FOUND_MD:
            if opt_imd or (not imd is None):
                raise SileError(f"{self!s} does not contain MD charges")

        # if either are options they may hold
        if opt_imd and opt_iscf:
            if FOUND_SCF:
                return md_scf_charge
            elif FOUND_MD:
                return md_charge
            elif FOUND_FINAL:
                # I think this will never be reached
                # If neither are found they will be converted to
                # None
                return final_charge

            raise SileError(f"{str(self)} unknown argument for 'imd' and 'iscf'")

        elif opt_imd:
            # flag requested imd
            if not (imd & (Opt.ANY | Opt.ALL)):
                # wrong flag
                raise SileError(f"{str(self)} unknown argument for 'imd'")

            if FOUND_SCF and iscf is not None:
                # this should be handled, i.e. the scf should be taken out
                if as_dataframe:
                    return md_scf_charge.groupby(level=[0, 2]).nth(iscf)
                return np.stack(
                    tuple(get_md_scf_charge(x, iscf) for x in md_scf_charge)
                )

            elif FOUND_MD and iscf is None:
                return md_charge
            raise SileError(
                f"{str(self)} unknown argument for 'imd' and 'iscf', could not find SCF charges"
            )

        elif opt_iscf:
            # flag requested imd
            if not (iscf & (Opt.ANY | Opt.ALL)):
                # wrong flag
                raise SileError(f"{str(self)} unknown argument for 'iscf'")
            if imd is None:
                # correct imd
                imd = -1
            if as_dataframe:
                md_scf_charge = md_scf_charge.groupby(level=0)
                group = list(md_scf_charge.groups.keys())[imd]
                return md_scf_charge.get_group(group).droplevel(0)
            return np.stack(md_scf_charge[imd])

        elif imd is None and iscf is None:
            if FOUND_FINAL:
                return final_charge
            raise SileError(f"{str(self)} does not contain final charges")

        elif imd is None:
            # iscf is not None, so pass through as though explicitly passed
            imd = -1

        elif iscf is None:
            # we return the last MD step and the requested scf iteration
            if as_dataframe:
                return md_charge.groupby(level=1).nth(imd)
            return md_charge[imd]

        if as_dataframe:
            # first select imd
            md_scf_charge = md_scf_charge.groupby(level=0)
            group = list(md_scf_charge.groups.keys())[imd]
            md_scf_charge = md_scf_charge.get_group(group).droplevel(0)
            return md_scf_charge.groupby(level=1).nth(iscf)
        return md_scf_charge[imd][iscf]



outSileSiesta = deprecation(
    "outSileSiesta has been deprecated in favor of stdoutSileSiesta.", "0.15", "0.17"
)(stdoutSileSiesta)

add_sile("siesta.out", stdoutSileSiesta, case=False, gzip=True)
add_sile("out", stdoutSileSiesta, case=False, gzip=True)