Source code for regressioninc.testing.complex

"""
Functions for generating and visualising complex-valued testing data
"""
from loguru import logger
from typing import Optional
from pydantic import BaseModel
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

from regressioninc.base import Estimator


[docs]class ComplexGrid(BaseModel): """A class to generate a grid for complex data""" r1: float """Starting real pt""" r2: float """End real pt""" nr: int """Number of real pts""" i1: float """Start imaginary pt""" i2: float """End imaginary pt""" ni: int """Number of imaginary pts""" @property def n_pts(self) -> int: """ Get the number of points grid Returns ------- int Number of points in grid Examples -------- >>> from regressioninc.testing.complex import ComplexGrid >>> grid = ComplexGrid(r1=0, r2=5, nr=6, i1=0, i2=5, ni=6) >>> grid.n_pts 36 """ return self.nr * self.ni
[docs] def grid(self) -> np.ndarray: """ Get the grid points as an 2-D array Returns ------- np.ndarray The grid points as a 2-D array Examples -------- >>> from regressioninc.testing.complex import ComplexGrid >>> grid = ComplexGrid(r1=-1, r2=1, nr=3, i1=-1, i2=1, ni=3) >>> grid.grid() array([[-1.-1.j, -1.+0.j, -1.+1.j], [ 0.-1.j, 0.+0.j, 0.+1.j], [ 1.-1.j, 1.+0.j, 1.+1.j]]) """ r_pts = np.linspace(self.r1, self.r2, num=self.nr) i_pts = np.linspace(self.i1, self.i2, num=self.ni) reals, imags = np.meshgrid(r_pts, i_pts, indexing="ij") return reals + 1j * imags
[docs] def flat_grid(self) -> np.ndarray: """ Get the grid as a flat array Returns ------- np.ndarray The grid of points as a 1-D flattened array Examples -------- >>> from regressioninc.testing.complex import ComplexGrid >>> grid = ComplexGrid(r1=-1, r2=1, nr=3, i1=-1, i2=1, ni=3) >>> grid.grid() array([[-1.-1.j, -1.+0.j, -1.+1.j], [ 0.-1.j, 0.+0.j, 0.+1.j], [ 1.-1.j, 1.+0.j, 1.+1.j]]) """ return self.grid().flatten().reshape(self.n_pts, 1)
[docs]def generate_linear_grid( params: np.ndarray, grids: list[ComplexGrid], intercept: complex = 0 ) -> tuple[np.ndarray, np.ndarray]: """ Generate complex-valued regression data from parameters and grids of regressors Parameters ---------- params : np.ndarray The parameters of the linear function grids : list[ComplexGrid] The grid of points for each regressor intercept : complex, optional The intercept, by default 0 Returns ------- tuple[np.ndarray, np.ndarray] X, y for the regression problem Raises ------ ValueError If the number of grids provided does not equal the number of parameters ValueError If the grids have different numbers of points """ n_params = params.size if (n_grids := len(grids)) != n_params: raise ValueError(f"{n_grids=} != {n_params=}") pts = [g.n_pts for g in grids] if len(set(pts)) != 1: raise ValueError(f"The regressor grids have different numbers of points {pts}") n_pts = pts[0] X = np.empty(shape=(n_pts, n_params), dtype=complex) for iparam in range(n_params): X[:, iparam] = np.squeeze(grids[iparam].flat_grid()) y = np.matmul(X, params) + intercept return X, y
[docs]def generate_random_regressors( n_regressors: int, n_samples: int, min_rand=-10, max_rand=10, ): """Produce random complex-values for the regressors""" if n_samples < n_regressors: raise ValueError(f"{n_samples=} must be >= {n_regressors=}") shape = (n_samples, n_regressors) # generate the data X = np.random.uniform(min_rand, max_rand, size=shape) X = X.astype(complex) + 1.0j * np.random.uniform(-20, 20, size=shape) return X.reshape(n_samples, n_regressors)
[docs]def generate_linear_random( params: np.ndarray, n_samples: int, intercept: complex = 0, min_rand=-10, max_rand=10, ): """Produce complex data for testing without any noise""" X = generate_random_regressors(params.size, n_samples, min_rand, max_rand) y = np.matmul(X, params) + intercept return X, y
[docs]def add_gaussian_noise( data: np.ndarray, loc: Optional[tuple[float]] = None, scale: Optional[tuple[float]] = None, ) -> np.ndarray: """Add Gaussian noise to data""" n_samples = data.shape[0] scale = 0.5 * np.array([[scale[0], 0], [0, scale[1]]]) z = np.random.multivariate_normal(loc, scale, size=n_samples).view(complex) return data + np.squeeze(z)
[docs]def add_outliers( y: np.ndarray, outlier_percent: float = 5, mult_min=3, mult_max=5, random_signs_real: bool = False, random_signs_imag: bool = False, ) -> np.ndarray: """Add outliers to a complex-valued 1-D regressand array""" if mult_min >= mult_max: raise ValueError(f"{mult_min=} must be less than {mult_max=}") if outlier_percent == 0: logger.debug("No outliers being added, function call is redundant") return y n_samples = y.size n_outliers = int((outlier_percent / 100) * n_samples) # create outliers max_r = np.max(np.abs(y.real)) max_i = np.max(np.abs(y.imag)) outliers_r = np.random.uniform(max_r * mult_min, max_r * mult_max, size=n_outliers) outliers_i = np.random.uniform(max_i * mult_min, max_i * mult_max, size=n_outliers) if random_signs_real: signs = np.random.randint(0, 2, size=n_outliers) * 2 - 1 outliers_r = outliers_r * signs if random_signs_imag: signs = np.random.randint(0, 2, size=n_outliers) * 2 - 1 outliers_i = outliers_i * signs outliers = outliers_r + 1j * outliers_i # add to regressand logger.debug(f"Adding {n_outliers=} to regressand") outlier_indices = np.random.randint(0, n_samples, size=n_outliers) y_new = np.array(y) y_new[outlier_indices] = y_new[outlier_indices] + outliers return y_new
[docs]def plot_regressand(y: np.ndarray, size: int = 10, alpha: float = 1.0) -> None: """Plot regressand data""" plt.scatter( y.real, y.imag, s=size, color="red", edgecolor="firebrick", marker="d", alpha=alpha, label="y", ) plt.title("Regressand")
[docs]def plot_regressand_original( y_orig: np.ndarray, size: int = 10, alpha: float = 1.0 ) -> None: """Plot original regressand, meant for data without noise""" plt.scatter( y_orig.real, y_orig.imag, s=size, color="teal", edgecolor="darkslategrey", marker="*", alpha=alpha, label="y original", ) plt.title("Regressand original")
[docs]def plot_regressor( reg: np.ndarray, color: np.ndarray, size: int = 10, alpha: float = 1.0 ) -> None: """Plot regression data""" plt.scatter( reg.real, reg.imag, s=size, color=color, edgecolor="k", alpha=alpha, )
[docs]def plot_estimate( est: np.ndarray, color: np.ndarray, size: int = 10, alpha: float = 1.0, label: str = "estimate", ) -> None: """Plot model estimates""" plt.scatter(est.real, est.imag, s=size, color=color, alpha=alpha, label=label)
[docs]def plot_complex( X, y, models: dict[str, Estimator], y_orig: Optional[np.ndarray] = None, size_obs: int = 10, size_reg: int = 10, size_est: int = 10, ): """Plot the complex data""" n_regressand = 1 if y_orig is None else 2 n_regressors = X.shape[1] n_models = len(models) n_rows = 2 if n_models == 0 else 3 n_cols = max([n_regressand, n_regressors, n_models]) logger.debug(f"{n_rows=}") logger.debug(f"{n_cols=}") fig = plt.figure() # plot the regressand plt.subplot(n_rows, n_cols, 1) plot_regressand(y, size=size_obs) if y_orig is not None: plt.subplot(n_rows, n_cols, 2) plot_regressand(y, size=size_obs, alpha=0.1) plot_regressand_original(y_orig, size=size_obs) plt.legend() # plot the regressors colors = matplotlib.cm.get_cmap("Pastel1", n_regressors).colors for ireg, color in enumerate(colors): plt.subplot(n_rows, n_cols, n_cols + 1 + ireg) plot_regressor(X[:, ireg], color=color, size=size_reg) plt.title(f"Regressor {ireg + 1} of {n_regressors}") # plot the model predictions model_names = list(models.keys()) colors = matplotlib.cm.get_cmap("Dark2", n_models).colors for imodel in range(n_models): plt.subplot(n_rows, n_cols, 2 * n_cols + 1 + imodel) name = model_names[imodel] color = colors[imodel] model = models[name] y_est = model.predict(X) plot_estimate(y_est, color=color, size=size_est, label=name) plot_regressand(y, size=size_obs, alpha=0.1) if y_orig is not None: plot_regressand_original(y_orig, size=size_obs, alpha=0.4) plt.title(name) plt.tight_layout() return fig