Source code for mlcolvar.core.stats.pca

"""Principal component analysis"""

import torch

from mlcolvar.core.stats import Stats

__all__ = ["PCA"]


[docs] class PCA(Stats): """ Principal Component Analysis class. Attributes ---------- in_features : int Number of features out_features : int Number of output features evals : torch.Tensor PCA eigenvalues evecs : torch.Tensor PCA eigenvectors """
[docs] def __init__(self, in_features: int, out_features: int = None): """ Initialize a PCA object. If out_features (<in_features) is given a low rank approximation will be performed. Parameters ---------- in_features : int number of input features out_features : int, optional number of principal components, by default None (equal to in_features) """ super().__init__() # Save attributes self.in_features = in_features self.out_features = in_features if out_features is None else out_features # create eigenvector buffer self.register_buffer("evecs", torch.eye(in_features, self.out_features)) # initialize other attributes self.evals = None
[docs] def extra_repr(self) -> str: repr = f"in_features={self.in_features}, out_features={self.out_features}" return repr
[docs] def compute(self, X, save_params=True, **kwargs): """ Compute PCA eigenvalues and eigenvectors via torch.pca_lowrank method. Parameters ---------- X : array-like of shape (n_samples, in_features) Training data. save_params: bool, optional Whether to store parameters in model kwargs: optional Keyword arguments passed to torch.pca_lowrank (e.g. center) Returns ------- eigvals : torch.Tensor PCA eigenvalues eigvecs : torch.Tensor PCA eigenvectors Notes ----- The eigenvecs object which is returned is a matrix whose column eigvecs[:,i] is the eigenvector associated to eigvals[i] """ n, d = X.shape U, S, V = torch.pca_lowrank(X, q=self.out_features, **kwargs) evals = torch.square(S) / (n - 1) evecs = V if save_params: self.evals = evals self.evecs = evecs return evals, evecs
@property def explained_variance(self): """Explained variance ratio of the principal components selected. Returns ------- exp_var, torch.Tensor explained variance ratio """ return self.evals[: self.out_features] / self.evals.sum() @property def cumulative_explained_variance(self): """Cumulative explained variance. Returns ------- cum_exp_var, torch.Tensor cumulative variance ratio """ return torch.cumsum(self.explained_variance[: self.out_features], 0)
[docs] def forward(self, x: torch.Tensor) -> torch.Tensor: """ Compute linear combination with saved eigenvectors. If self.out_features is < then the number of eigenvectors only the first out_features components will be used. Parameters ---------- x: torch.Tensor input Returns ------- out : torch.Tensor output """ return torch.matmul(x, self.evecs[:, : self.out_features])
def test_pca(): in_features = 10 torch.manual_seed(42) X = torch.rand(100, in_features) * 100 # find all components (default) pca = PCA(in_features) _ = pca.compute(X) s = pca(X) assert s.shape[1] == in_features assert len(pca.explained_variance) == in_features assert len(pca.cumulative_explained_variance) == in_features # select first n_components after calculation n_components = 5 pca.out_features = n_components assert len(pca.explained_variance) == n_components assert len(pca.cumulative_explained_variance) == n_components # select n_components in init pca = PCA(in_features, n_components) _ = pca.compute(X) s = pca(X) assert s.shape[1] == n_components assert len(pca.explained_variance) == n_components assert len(pca.cumulative_explained_variance) == n_components if __name__ == "__main__": test_pca()