Source code for pgsui.utils.misc

from __future__ import annotations

from dataclasses import dataclass
from typing import Any, FrozenSet, Literal, Mapping

import numpy as np
import pandas as pd
import torch


[docs] @dataclass(frozen=True, slots=True) class OptunaParamSpec: """Specification and validation for Optuna objective parameter keys. Attributes: keys: Canonical keys used in the Optuna objective `params` dict. """ keys: FrozenSet[str] def __post_init__(self) -> None: if not isinstance(self.keys, frozenset): raise TypeError( f"`keys` must be a frozenset, got: {type(self.keys).__name__}" ) if not self.keys: raise ValueError("`keys` must be non-empty.") if not all(isinstance(k, str) and k for k in self.keys): raise TypeError("`keys` must contain only non-empty strings.")
[docs] def count(self) -> int: """Return the integer count of tuned parameters.""" return len(self.keys)
[docs] def validate(self, params: Mapping[str, Any], *, allow_extra: bool = False) -> None: """Validate that a params mapping matches this spec's keys. Args: params: Mapping of parameter names -> values (typically the objective `params` dict). allow_extra: If True, extra keys are allowed; missing keys still error. Raises: TypeError: If `params` is not a Mapping. KeyError: If required keys are missing (or extras exist when allow_extra=False). """ if not isinstance(params, Mapping): raise TypeError(f"`params` must be a Mapping, got: {type(params).__name__}") got = frozenset(params.keys()) missing = self.keys - got extra = got - self.keys if missing or (extra and not allow_extra): parts: list[str] = [] if missing: parts.append(f"missing={sorted(missing)}") if extra and not allow_extra: parts.append(f"extra={sorted(extra)}") raise KeyError("Objective params keys mismatch: " + "; ".join(parts))
OBJECTIVE_SPEC_VAE = OptunaParamSpec( keys=frozenset( { "latent_dim", "learning_rate", "dropout_rate", "num_hidden_layers", "activation", "l1_penalty", "layer_scaling_factor", "layer_schedule", "power", "normalize", "inverse", "gamma", "kl_beta", "kl_beta_schedule", "gamma_schedule", } ) ) OBJECTIVE_SPEC_AE = OptunaParamSpec( keys=frozenset( { "latent_dim", "learning_rate", "dropout_rate", "num_hidden_layers", "activation", "l1_penalty", "layer_scaling_factor", "layer_schedule", "power", "normalize", "inverse", "gamma", "gamma_schedule", } ) ) OBJECTIVE_SPEC_UBP = OptunaParamSpec( keys=frozenset( { "latent_dim", "learning_rate", "dropout_rate", "num_hidden_layers", "activation", "l1_penalty", "layer_scaling_factor", "layer_schedule", "power", "normalize", "inverse", "gamma", "gamma_schedule", } ) ) OBJECTIVE_SPEC_NLPCA = OptunaParamSpec( keys=frozenset( { "latent_dim", "learning_rate", "dropout_rate", "num_hidden_layers", "activation", "l1_penalty", "layer_scaling_factor", "layer_schedule", "power", "normalize", "inverse", "gamma", "gamma_schedule", } ) )
[docs] def validate_input_type( X: pd.DataFrame | np.ndarray | list | torch.Tensor, return_type: Literal["array", "df", "list", "tensor"] = "array", ) -> pd.DataFrame | np.ndarray | list | torch.Tensor: """Validate input type and return as numpy array. This method validates the input type and returns the input data as a numpy array, pandas DataFrame, 2D list, or torch.Tensor. Args: X (pandas.DataFrame | numpy.ndarray | list | torch.Tensor): Input data. Supported types include: pandas.DataFrame, numpy.ndarray, list, and torch.Tensor. return_type (Literal["array", "df", "list", "tensor"]): Type of returned object. Supported options include: "df", "array", "list", and "tensor". "df" corresponds to a pandas DataFrame. "array" corresponds to a numpy array. "list" corresponds to a 2D list. "tensor" corresponds to a torch.Tensor. Defaults to "array". Returns: pandas.DataFrame | numpy.ndarray | list | torch.Tensor: Input data as the desired return_type. Raises: TypeError: X must be of type pandas.DataFrame, numpy.ndarray, list, or torch.Tensor. ValueError: Unsupported return_type provided. Supported types are "df", "array", "list", and "tensor". """ if not isinstance(X, (pd.DataFrame, np.ndarray, list, torch.Tensor)): msg = f"X must be of type pandas.DataFrame, numpy.ndarray, list, or torch.Tensor, but got {type(X)}" raise TypeError(msg) if return_type not in {"df", "array", "list", "tensor"}: msg = f"Unsupported return type provided: {return_type}. Supported types are 'df', 'array', 'list', and 'tensor'" raise ValueError(msg) if return_type == "array": if isinstance(X, pd.DataFrame): return X.to_numpy() elif isinstance(X, list): return np.array(X) elif isinstance(X, np.ndarray): return X.copy() elif isinstance(X, torch.Tensor): return X.cpu().detach().numpy() elif return_type == "df": if isinstance(X, pd.DataFrame): return X.copy() elif isinstance(X, (np.ndarray, list)): return pd.DataFrame(X) elif isinstance(X, torch.Tensor): return pd.DataFrame(X.cpu().detach().numpy()) elif return_type == "list": if isinstance(X, list): return X elif isinstance(X, np.ndarray): return X.tolist() elif isinstance(X, pd.DataFrame): return X.to_numpy().tolist() elif isinstance(X, torch.Tensor): return X.detach().cpu().numpy().tolist() elif return_type == "tensor": if isinstance(X, torch.Tensor): return X elif isinstance(X, np.ndarray): return torch.from_numpy(X) elif isinstance(X, pd.DataFrame): return torch.from_numpy(X.to_numpy()) elif isinstance(X, list): return torch.tensor(X)
[docs] def detect_computing_device( *, force_cpu: bool = False, verbose: bool = False ) -> torch.device: """Detects and returns the best available PyTorch compute device. Prioritizes CUDA (NVIDIA) > MPS (Apple Silicon) > CPU. Args: force_cpu (bool): If True, forces the device to CPU regardless of available hardware. Defaults to False. verbose (bool): If True, prints the selected device to stdout. Defaults to False. Returns: torch.device: The selected computing device. """ if force_cpu: device = torch.device("cpu") # Forced to CPU elif torch.cuda.is_available(): device = torch.device("cuda") elif torch.backends.mps.is_available(): device = torch.device("mps") else: device = torch.device("cpu") # Fallback to CPU if verbose: print(f"Selected compute device: {device}") return device
[docs] def get_missing_mask( X: pd.DataFrame | pd.Series | np.ndarray | list | torch.Tensor, ) -> pd.DataFrame | pd.Series | np.ndarray | torch.Tensor: """Returns a boolean mask indicating missing values (NaN, None). Notes: Lists are converted to numpy arrays to compute the mask. Args: X: Input data. Returns: pd.DataFrame | pd.Series | np.ndarray | torch.Tensor: Boolean mask of the same shape as X (returned as DF, Array, or Tensor). Raises: TypeError: If input type is not supported. """ if isinstance(X, pd.DataFrame): return X.isna() elif isinstance(X, pd.Series): return pd.isna(X) elif isinstance(X, np.ndarray): # np.isnan fails on object arrays (e.g. strings) # so we check generically first if X.dtype.kind in {"U", "S", "O"}: # String/Object return pd.isnull(X) return np.isnan(X) elif isinstance(X, torch.Tensor): return torch.isnan(X) elif isinstance(X, list): arr = np.array(X) # Handle mixed types in lists if arr.dtype.kind in {"U", "S", "O"}: return pd.isnull(arr) return np.isnan(arr) else: raise TypeError( f"Unsupported type for missing value detection. Expected pandas.DataFrame, pandas.Series, numpy.ndarray, list, or torch.Tensor but got {type(X)}" )
[docs] def ensure_2d( X: pd.DataFrame | pd.Series | np.ndarray | list | torch.Tensor, ) -> pd.DataFrame | np.ndarray | list | torch.Tensor: """Ensures the input is at least 2-dimensional. If input is 1D (e.g., shape (N,)), it is reshaped to (N, 1). Already 2D+ inputs are returned unchanged. Args: X (pd.DataFrame | pd.Series | np.ndarray | list | torch.Tensor): Input data. Returns: pd.DataFrame | np.ndarray | list | torch.Tensor: Input data transformed to be at least 2D. Raises: TypeError: If input type is not supported. """ if isinstance(X, pd.DataFrame): return X # DataFrames are always 2D elif isinstance(X, pd.Series): return X.to_frame() # Convert Series to DataFrame (2D) elif isinstance(X, np.ndarray): if X.ndim == 1: return X.reshape(-1, 1) return X elif isinstance(X, torch.Tensor): if X.dim() == 1: return X.unsqueeze(1) return X elif isinstance(X, list): # Check depth of list if not X: return X if not isinstance(X[0], list): return [[x] for x in X] return X else: msg = f"X must be of type pandas.DataFrame, pd.Series, numpy.ndarray, list, or torch.Tensor, but got {type(X)}" raise TypeError(msg)
[docs] def flatten_1d( y: pd.DataFrame | pd.Series | np.ndarray | list | torch.Tensor, ) -> pd.Series | np.ndarray | list | torch.Tensor: """ Flattens input to a 1D structure. Args: y (pd.DataFrame | pd.Series | np.ndarray | list | torch.Tensor): Input data. Returns: pd.Series | np.ndarray | list | torch.Tensor: 1D representation of the input. Notes: Inputs with multiple columns (e.g., DataFrame with >1 column) are flattened into a single 1D structure. Raises: TypeError: If input type is not supported. """ if isinstance(y, pd.DataFrame): if y.shape[1] == 1: return y.iloc[:, 0] else: return pd.Series(y.to_numpy().flatten()) elif isinstance(y, np.ndarray): return y.flatten() elif isinstance(y, torch.Tensor): return y.view(-1) elif isinstance(y, list): # Recursively flatten list if needed, or simple comprehension if just 2D if not y: return y if isinstance(y[0], list): return [item for sublist in y for item in sublist] return y else: msg = f"Input must be of type pandas.DataFrame, pandas.Series, numpy.ndarray, list, or torch.Tensor, but got {type(y)}" raise TypeError(msg)
[docs] def safe_shape( X: pd.DataFrame | pd.Series | np.ndarray | list | torch.Tensor, ) -> tuple[int, ...]: """Returns the shape of the input container as a tuple. Args: X (pd.DataFrame | pd.Series | np.ndarray | list | torch.Tensor): Input data. Returns: tuple[int, ...]: Dimensions of the data (rows, cols, etc.). """ if isinstance(X, (pd.DataFrame, np.ndarray)): return X.shape elif isinstance(X, pd.Series): return (X.shape[0],) elif isinstance(X, torch.Tensor): return tuple(X.shape) elif isinstance(X, list): if not X: return (0,) rows = len(X) # Check if 2D list if isinstance(X[0], list): return (rows, len(X[0])) return (rows,) else: msg = f"X must be of type pandas.DataFrame, pd.Series, numpy.ndarray, list, or torch.Tensor, but got {type(X)}" raise TypeError(msg)