Source code for dnn_reco.modules.data.labels.default_labels

"""
All label functions must have the following parameters and return values:

    Parameters
    ----------
    input_data : str
            Path to input data file.
    config : dict
        Dictionary containing all settings as read in from config file.
        Label function specific settings can be passed via the config file.
    label_names : None, optional
        The names of the labels. This defines which labels to include as well
        as the ordering.
        If label_names is None (e.g. first call to initiate name list), then
        a list of label names needs to be created and returned.
    *args
        Variable length argument list.
    **kwargs
        Arbitrary keyword arguments.

    Returns
    -------
    np.ndarray
        The numpy array containing the labels.
        Shape: [batch_size] + label_shape
    list of str
        The names of the labels
"""

import pandas as pd
import numpy as np


[docs] def simple_label_loader(input_data, config, label_names=None, *args, **kwargs): """Simple Label Loader. Will load variables contained in the hdf5 field specified via config['data_handler_label_key']. Parameters ---------- input_data : str Path to input data file. config : dict Dictionary containing all settings as read in from config file. Must contain: 'data_handler_label_key': str The hdf5 key from which the labels will be loaded. 'label_add_dir_vec': bool If true, the direction vector components will be calculated on the fly and added to the labels. For this, the keys 'label_azimuth_key' and 'label_zenith_key' have to be provided. label_names : None, optional The names of the labels. This defines which labels to include as well as the ordering. If label_names is None, then all keys except event specificers will be used. *args Variable length argument list. **kwargs Arbitrary keyword arguments. Returns ------- list of str The names of the labels """ with pd.HDFStore(input_data, mode="r") as f: _labels = f[config["data_handler_label_key"]] time_offset = f[config["data_handler_time_offset_name"]]["value"] ignore_columns = ["Run", "Event", "SubEvent", "SubEventStream", "exists"] if config["label_add_dir_vec"]: ignore_columns.extend(["direction_x", "direction_y", "direction_z"]) if ( "label_position_at_rel_time" in config and config["label_position_at_rel_time"] is not None ): ignore_columns.extend(["rel_pos_x", "rel_pos_y", "rel_pos_z"]) if label_names is None: if "label_keys_to_load" in config: label_names = config["label_keys_to_load"] else: label_names = _labels.keys().tolist() # remove any ignore columns and load labels label_names = [n for n in label_names if n not in ignore_columns] labels = [ _labels[name] for name in label_names if name not in ignore_columns ] # calculate direction vector components on the fly if config["label_add_dir_vec"]: # get azimuth and zenith azimuth = _labels[config["label_azimuth_key"]] zenith = _labels[config["label_zenith_key"]] # calculate direction vector components # We need the negative values here, since (azimuth, zenith) points # towards the source, whereas the direction vector points in the # direction of the moving particle dir_x = -np.sin(zenith) * np.cos(azimuth) dir_y = -np.sin(zenith) * np.sin(azimuth) dir_z = -np.cos(zenith) # add direction vector components to labels label_names.extend(["direction_x", "direction_y", "direction_z"]) labels.extend([dir_x, dir_y, dir_z]) # calculate position at relative time t (only makes sense for tracks) if ( "label_position_at_rel_time" in config and config["label_position_at_rel_time"] is not None ): dir_x = _labels[config["label_dir_x_key"]] dir_y = _labels[config["label_dir_y_key"]] dir_z = _labels[config["label_dir_z_key"]] delta_t = ( time_offset + config["label_position_at_rel_time"] ) - _labels["VertexTime"] c = 0.299792458 # m / ns length = c * delta_t x_at_t = _labels["VertexX"] + length * dir_x y_at_t = _labels["VertexY"] + length * dir_y z_at_t = _labels["VertexZ"] + length * dir_z # add position at relative time to labels label_names.extend(["rel_pos_x", "rel_pos_y", "rel_pos_z"]) labels.extend([x_at_t, y_at_t, z_at_t]) labels = np.array(labels, dtype=config["np_float_precision"]).T mask = ~np.isfinite(labels) if np.any(mask): for i, name in enumerate(label_names): if np.any(mask[:, i]): if name in config["label_nan_fill_value"]: labels[mask[:, i], i] = config["label_nan_fill_value"][ name ] else: print(f"Found {np.sum(mask[:, i])} NaNs in {name}") return labels, label_names