Source code for pyfstat.utils.io

import logging
import os

import numpy as np

from .converting import get_dictionary_from_lines

logger = logging.getLogger(__name__)


[docs]def read_par(
    filename=None,
    label=None,
    outdir=None,
    suffix="par",
    comments=["%", "#"],
    raise_error=False,
):
    """Read in a .par or .loudest file, returns a dictionary of the key=val pairs.

    Notes
    -----
    This can also be used to read in `.loudest` files
    produced by the `ComputeFstatistic_v2` executable,
    or any file which has rows of `key=val` data
    (in which the val can be understood using `eval(val)`).

    Parameters
    ----------
    filename : str
        Filename (path) containing rows of `key=val` data to read in.
    label, outdir, suffix : str, optional
        If filename is `None`, form the file to read as `outdir/label.suffix`.
    comments : str or list of strings, optional
        Characters denoting that a row is a comment.
    raise_error : bool, optional
        If True, raise an error for lines which are not comments, but cannot
        be read.

    Returns
    -------
    d: dict
        The `key=val` pairs as a dictionary.

    """
    if filename is None:
        filename = os.path.join(outdir, "{}.{}".format(label, suffix))
    if os.path.isfile(filename) is False:
        raise ValueError("No file {} found".format(filename))
    d = {}
    with open(filename, "r") as f:
        d = get_dictionary_from_lines(f, comments, raise_error)
    return d


[docs]def read_txt_file_with_header(f, names=True, comments="#"):
    """Wrapper to np.genfromtxt with smarter handling of variable-length commented headers.

    The header is identified as an uninterrupted block of lines
    from the beginning of the file,
    each starting with the given `comments` character.

    After identifying a header of length `Nhead`,
    this function then tells `np.genfromtxt()` to skip `Nhead-1` lines
    (to allow for reading field names from the last commented line
    before the actual data starts).

    Parameters
    -------
    f: str
        Name of the file to read.
    names: bool
        Passed on to `np.genfromtxt()`:
        If True, the field names are read from the last header line.
    comments: str
        The character used to indicate the start of a comment.
        Also passed on to `np.genfromtxt()`.

    Returns
    -------
    data: np.ndarray
        The data array read from the file after skipping the header.
    """
    with open(f, "r") as f_opened:
        Nhead = 0
        for line in f_opened:
            if not line.startswith(comments):
                break
            Nhead += 1
    data = np.atleast_1d(
        np.genfromtxt(f, skip_header=Nhead - 1, names=names, comments=comments)
    )
    return data


[docs]def read_parameters_dict_lines_from_file_header(
    outfile, comments="#", strip_spaces=True
):
    """Load a list of pretty-printed parameters dictionary lines from a commented file header.

    Returns a list of lines from a commented file header
    that match the pretty-printed parameters dictionary format
    as generated by `BaseSearchClass.get_output_file_header()`.
    The opening/closing bracket lines (`{`,`}`) are not included.
    Newline characters at the end of each line are stripped.

    Parameters
    ----------
    outfile: str
        Name of a PyFstat-produced output file.
    comments: str
        Comment character used to start header lines.
    strip_spaces: bool
        Whether to strip leading/trailing spaces.

    Returns
    -------
    dict_lines: list
        A list of unparsed pprinted dictionary entries.
    """
    dict_lines = []
    with open(outfile, "r") as f_opened:
        in_dict = False
        for line in f_opened:
            if not line.startswith(comments):
                raise IOError(
                    "Encountered end of {:s}-commented header before finding closing '}}' of parameters dictionary in file '{:s}'.".format(
                        comments, outfile
                    )
                )
            elif line.startswith(comments + " {"):
                in_dict = True
            elif line.startswith(comments + " }"):
                break
            elif in_dict:
                line = line.lstrip(comments).rstrip("\n")
                if strip_spaces:
                    line = line.strip(" ")
                dict_lines.append(line)
    if len(dict_lines) == 0:
        raise IOError(
            "Could not parse non-empty parameters dictionary from file '{:s}'.".format(
                outfile
            )
        )
    return dict_lines


[docs]def get_parameters_dict_from_file_header(outfile, comments="#", eval_values=False):
    """Load a parameters dict from a commented file header.

    Returns a parameters dictionary,
    as generated by `BaseSearchClass.get_output_file_header()`,
    from an output file header.
    Always returns a proper python dictionary,
    but the values will be unparsed strings if not requested otherwise.

    Parameters
    ----------
    outfile: str
        Name of a PyFstat-produced output file.
    comments: str
        Comment character used to start header lines.
    eval_values: bool
        If False, return dictionary values as unparsed strings.
        If True, evaluate each of them.
        DANGER! Only do this if you trust the source of the file!

    Returns
    -------
    params_dict: dictionary
        A dictionary of parameters
        (with values either as unparsed strings, or evaluated).
    """
    if eval_values:
        logger.warning(
            "Will evaluate dictionary values read from file '{:s}'.".format(outfile)
        )
    params_dict = {}
    dict_lines = read_parameters_dict_lines_from_file_header(
        outfile, comments="#", strip_spaces=True
    )
    for line in dict_lines:
        line_split = line.rstrip(",").split(":")
        # check for a few possible corrupt formats,
        # though we can't be exhaustive here...
        if (
            (len(line_split) != 2)
            or np.any([len(s) == 0 for s in line_split])
            or (line_split[-1] == ",")
            or (line_split[0][0] != "'")
            or (line_split[0][-1] != "'")
        ):
            raise IOError(
                "Line '{:s}' is not of the expected format (# 'key': val,').".format(
                    line
                )
            )
        key = line_split[0].strip("'")
        val = line_split[1].strip(" ")
        if eval_values:
            val = eval(val)  # DANGER
        params_dict[key] = val
    return params_dict