Source code for dnn_reco.modules.data.labels.default_labels

"""
All label functions must have the following parameters and return values:

    Parameters
    ----------
    input_data : str
            Path to input data file.
    config : dict
        Dictionary containing all settings as read in from config file.
        Label function specific settings can be passed via the config file.
    label_names : None, optional
        The names of the labels. This defines which labels to include as well
        as the ordering.
        If label_names is None (e.g. first call to initiate name list), then
        a list of label names needs to be created and returned.
    *args
        Variable length argument list.
    **kwargs
        Arbitrary keyword arguments.

    Returns
    -------
    np.ndarray
        The numpy array containing the labels.
        Shape: [batch_size] + label_shape
    list of str
        The names of the labels
"""

import pandas as pd
import numpy as np



[docs]
def simple_label_loader(input_data, config, label_names=None, *args, **kwargs):
    """Simple Label Loader.

    Will load variables contained in the hdf5 field specified via
    config['data_handler_label_key'].

    Parameters
    ----------
    input_data : str
            Path to input data file.
    config : dict
        Dictionary containing all settings as read in from config file.
        Must contain:
            'data_handler_label_key': str
                The hdf5 key from which the labels will be loaded.
            'label_add_dir_vec': bool
                If true, the direction vector components will be calculated
                on the fly and added to the labels. For this, the keys
                'label_azimuth_key' and 'label_zenith_key' have to be provided.
    label_names : None, optional
        The names of the labels. This defines which labels to include as well
        as the ordering.
        If label_names is None, then all keys except event specificers will
        be used.

    *args
        Variable length argument list.
    **kwargs
        Arbitrary keyword arguments.

    Returns
    -------
    list of str
        The names of the labels
    """

    with pd.HDFStore(input_data, mode="r") as f:
        _labels = f[config["data_handler_label_key"]]
        time_offset = f[config["data_handler_time_offset_name"]]["value"]

    ignore_columns = ["Run", "Event", "SubEvent", "SubEventStream", "exists"]

    if config["label_add_dir_vec"]:
        ignore_columns.extend(["direction_x", "direction_y", "direction_z"])

    if (
        "label_position_at_rel_time" in config
        and config["label_position_at_rel_time"] is not None
    ):
        ignore_columns.extend(["rel_pos_x", "rel_pos_y", "rel_pos_z"])

    if label_names is None:
        if "label_keys_to_load" in config:
            label_names = config["label_keys_to_load"]
        else:
            label_names = _labels.keys().tolist()

    # remove any ignore columns and load labels
    label_names = [n for n in label_names if n not in ignore_columns]
    labels = [
        _labels[name] for name in label_names if name not in ignore_columns
    ]

    # calculate direction vector components on the fly
    if config["label_add_dir_vec"]:

        # get azimuth and zenith
        azimuth = _labels[config["label_azimuth_key"]]
        zenith = _labels[config["label_zenith_key"]]

        # calculate direction vector components
        # We need the negative values here, since (azimuth, zenith) points
        # towards the source, whereas the direction vector points in the
        # direction of the moving particle
        dir_x = -np.sin(zenith) * np.cos(azimuth)
        dir_y = -np.sin(zenith) * np.sin(azimuth)
        dir_z = -np.cos(zenith)

        # add direction vector components to labels
        label_names.extend(["direction_x", "direction_y", "direction_z"])
        labels.extend([dir_x, dir_y, dir_z])

    # calculate position at relative time t (only makes sense for tracks)
    if (
        "label_position_at_rel_time" in config
        and config["label_position_at_rel_time"] is not None
    ):
        dir_x = _labels[config["label_dir_x_key"]]
        dir_y = _labels[config["label_dir_y_key"]]
        dir_z = _labels[config["label_dir_z_key"]]

        delta_t = (
            time_offset + config["label_position_at_rel_time"]
        ) - _labels["VertexTime"]
        c = 0.299792458  # m / ns
        length = c * delta_t
        x_at_t = _labels["VertexX"] + length * dir_x
        y_at_t = _labels["VertexY"] + length * dir_y
        z_at_t = _labels["VertexZ"] + length * dir_z

        # add position at relative time to labels
        label_names.extend(["rel_pos_x", "rel_pos_y", "rel_pos_z"])
        labels.extend([x_at_t, y_at_t, z_at_t])

    labels = np.array(labels, dtype=config["np_float_precision"]).T

    mask = ~np.isfinite(labels)
    if np.any(mask):
        for i, name in enumerate(label_names):
            if np.any(mask[:, i]):
                if name in config["label_nan_fill_value"]:
                    labels[mask[:, i], i] = config["label_nan_fill_value"][
                        name
                    ]
                else:
                    print(f"Found {np.sum(mask[:, i])} NaNs in {name}")

    return labels, label_names