Source code for regressioninc.testing.complex

"""
Functions for generating and visualising complex-valued testing data
"""
from loguru import logger
from typing import Optional
from pydantic import BaseModel
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

from regressioninc.base import Regressor


[docs]class ComplexGrid(BaseModel):
    """A class to generate a grid for complex data"""

    r1: float
    """Starting real pt"""
    r2: float
    """End real pt"""
    nr: int
    """Number of real pts"""
    i1: float
    """Start imaginary pt"""
    i2: float
    """End imaginary pt"""
    ni: int
    """Number of imaginary pts"""

    @property
    def n_pts(self) -> int:
        """
        Get the number of points grid

        Returns
        -------
        int
            Number of points in grid

        Examples
        --------
        >>> from regressioninc.testing.complex import ComplexGrid
        >>> grid = ComplexGrid(r1=0, r2=5, nr=6, i1=0, i2=5, ni=6)
        >>> grid.n_pts
        36
        """
        return self.nr * self.ni

[docs]    def grid(self) -> np.ndarray:
        """
        Get the grid points as an 2-D array

        Returns
        -------
        np.ndarray
            The grid points as a 2-D array

        Examples
        --------
        >>> from regressioninc.testing.complex import ComplexGrid
        >>> grid = ComplexGrid(r1=-1, r2=1, nr=3, i1=-1, i2=1, ni=3)
        >>> grid.grid()
        array([[-1.-1.j, -1.+0.j, -1.+1.j],
               [ 0.-1.j,  0.+0.j,  0.+1.j],
               [ 1.-1.j,  1.+0.j,  1.+1.j]])
        """
        r_pts = np.linspace(self.r1, self.r2, num=self.nr)
        i_pts = np.linspace(self.i1, self.i2, num=self.ni)
        reals, imags = np.meshgrid(r_pts, i_pts, indexing="ij")
        return reals + 1j * imags

[docs]    def flat_grid(self) -> np.ndarray:
        """
        Get the grid as a flat array

        Returns
        -------
        np.ndarray
            The grid of points as a 1-D flattened array

        Examples
        --------
        >>> from regressioninc.testing.complex import ComplexGrid
        >>> grid = ComplexGrid(r1=-1, r2=1, nr=3, i1=-1, i2=1, ni=3)
        >>> grid.grid()
        array([[-1.-1.j, -1.+0.j, -1.+1.j],
               [ 0.-1.j,  0.+0.j,  0.+1.j],
               [ 1.-1.j,  1.+0.j,  1.+1.j]])
        """
        return self.grid().flatten().reshape(self.n_pts, 1)


[docs]def generate_linear_grid(
    coef: np.ndarray, grids: list[ComplexGrid], intercept: complex = 0
) -> tuple[np.ndarray, np.ndarray]:
    """
    Generate complex regression data from coefficients and grids of regressors

    Parameters
    ----------
    coef : np.ndarray
        The coefficients for the regressors
    grids : list[ComplexGrid]
        The grid of points for each regressor
    intercept : complex, optional
        The intercept, by default 0

    Returns
    -------
    tuple[np.ndarray, np.ndarray]
        X, y for the regression problem

    Raises
    ------
    ValueError
        If the number of grids provided does not equal the number of
        coefficients
    ValueError
        If the grids have different numbers of points
    """
    n_coef = coef.size

    if (n_grids := len(grids)) != n_coef:
        raise ValueError(f"{n_grids=} != {n_coef=}")
    pts = [g.n_pts for g in grids]
    if len(set(pts)) != 1:
        raise ValueError(f"The regressor grids have different numbers of points {pts}")
    n_pts = pts[0]

    X = np.empty(shape=(n_pts, n_coef), dtype=complex)
    for icoef in range(n_coef):
        X[:, icoef] = np.squeeze(grids[icoef].flat_grid())
    y = np.matmul(X, coef) + intercept
    return X, y


[docs]def generate_linear_random(
    coef: np.ndarray, n_samples: int, intercept: complex = 0, min_rand=-10, max_rand=10
):
    """Produce complex data for testing without any noise"""
    n_regressors = coef.size
    if n_samples is None:
        n_samples = coef.size * 2
    if n_samples < n_regressors:
        raise ValueError(f"{n_samples=} must be >= {n_regressors=}")
    shape = (n_samples, n_regressors)
    # generate the data
    X = np.random.uniform(min_rand, max_rand, size=shape)
    X = X.astype(complex) + 1.0j * np.random.uniform(-20, 20, size=shape)
    X = X.reshape(n_samples, n_regressors)
    y = np.matmul(X, coef) + intercept
    return X, y


[docs]def add_gaussian_noise(
    data: np.ndarray,
    loc: Optional[tuple[float]] = None,
    scale: Optional[tuple[float]] = None,
) -> np.ndarray:
    """Add Gaussian noise to data"""
    n_samples = data.shape[0]
    scale = 0.5 * np.array([[scale[0], 0], [0, scale[1]]])
    z = np.random.multivariate_normal(loc, scale, size=n_samples).view(complex)
    return data + np.squeeze(z)


[docs]def add_outliers(
    y: np.ndarray,
    outlier_percent: float = 5,
    mult_min=3,
    mult_max=5,
    random_signs_real: bool = False,
    random_signs_imag: bool = False,
) -> np.ndarray:
    """Add outliers to a complex-valued 1-D observations array"""
    if mult_min >= mult_max:
        raise ValueError(f"{mult_min=} must be less than {mult_max=}")

    if outlier_percent == 0:
        logger.debug("No outliers being added, function call is redundant")
        return y

    n_samples = y.size
    n_outliers = int((outlier_percent / 100) * n_samples)
    # create outliers
    max_r = np.max(np.abs(y.real))
    max_i = np.max(np.abs(y.imag))
    outliers_r = np.random.uniform(max_r * mult_min, max_r * mult_max, size=n_outliers)
    outliers_i = np.random.uniform(max_i * mult_min, max_i * mult_max, size=n_outliers)
    if random_signs_real:
        signs = np.random.randint(0, 2, size=n_outliers) * 2 - 1
        outliers_r = outliers_r * signs
    if random_signs_imag:
        signs = np.random.randint(0, 2, size=n_outliers) * 2 - 1
        outliers_i = outliers_i * signs
    outliers = outliers_r + 1j * outliers_i
    # add to observations
    logger.debug(f"Adding {n_outliers=} to observations")
    outlier_indices = np.random.randint(0, n_samples, size=n_outliers)
    y_new = np.array(y)
    y_new[outlier_indices] = y_new[outlier_indices] + outliers
    return y_new


[docs]def plot_observations(y: np.ndarray, size: int = 10, alpha: float = 1.0) -> None:
    """Plot observation data"""
    plt.scatter(
        y.real,
        y.imag,
        s=size,
        color="red",
        edgecolor="firebrick",
        marker="d",
        alpha=alpha,
        label="y",
    )
    plt.title("Observations")


[docs]def plot_observations_original(
    y_orig: np.ndarray, size: int = 10, alpha: float = 1.0
) -> None:
    """Plot original observations, meant for data without noise"""
    plt.scatter(
        y_orig.real,
        y_orig.imag,
        s=size,
        color="teal",
        edgecolor="darkslategrey",
        marker="*",
        alpha=alpha,
        label="y original",
    )
    plt.title("Observations original")


[docs]def plot_regressor(
    reg: np.ndarray, color: np.ndarray, size: int = 10, alpha: float = 1.0
) -> None:
    """Plot regression data"""
    plt.scatter(
        reg.real,
        reg.imag,
        s=size,
        color=color,
        edgecolor="k",
        alpha=alpha,
    )


[docs]def plot_estimate(
    est: np.ndarray,
    color: np.ndarray,
    size: int = 10,
    alpha: float = 1.0,
    label: str = "estimate",
) -> None:
    """Plot model estimates"""
    plt.scatter(est.real, est.imag, s=size, color=color, alpha=alpha, label=label)


[docs]def plot_complex(
    X,
    y,
    models: dict[str, Regressor],
    y_orig: Optional[np.ndarray] = None,
    size_obs: int = 10,
    size_reg: int = 10,
    size_est: int = 10,
):
    """Plot the complex data"""
    n_observations = 1 if y_orig is None else 2
    n_regressors = X.shape[1]
    n_models = len(models)
    n_rows = 2 if n_models == 0 else 3
    n_cols = max([n_observations, n_regressors, n_models])
    logger.info(f"{n_rows=}")
    logger.info(f"{n_cols=}")

    fig = plt.figure()
    # plot the observations
    plt.subplot(n_rows, n_cols, 1)
    plot_observations(y, size=size_obs)
    if y_orig is not None:
        plt.subplot(n_rows, n_cols, 2)
        plot_observations(y, size=size_obs, alpha=0.1)
        plot_observations_original(y_orig, size=size_obs)
        plt.legend()

    # plot the regressors
    colors = matplotlib.cm.get_cmap("Pastel1", n_regressors).colors
    for ireg, color in enumerate(colors):
        plt.subplot(n_rows, n_cols, n_cols + 1 + ireg)
        plot_regressor(X[:, ireg], color=color, size=size_reg)
        plt.title(f"Regressor {ireg + 1} of {n_regressors}")

    # plot the model predictions
    model_names = list(models.keys())
    colors = matplotlib.cm.get_cmap("Dark2", n_models).colors
    for imodel in range(n_models):
        plt.subplot(n_rows, n_cols, 2 * n_cols + 1 + imodel)
        name = model_names[imodel]
        color = colors[imodel]
        model = models[name]
        y_est = model.predict(X)
        plot_estimate(y_est, color=color, size=size_est, label=name)
        plot_observations(y, size=size_obs, alpha=0.1)
        if y_orig is not None:
            plot_observations_original(y_orig, size=size_obs, alpha=0.4)
        plt.title(name)

    plt.tight_layout()
    return fig