Source code for mlcolvar.core.transform.tools.normalization

import torch
from mlcolvar.core.transform.utils import Statistics
from mlcolvar.core.transform.tools.utils import batch_reshape
from mlcolvar.core.transform import Transform

__all__ = ["Normalization"]


def sanitize_range(range: torch.Tensor):
    """Sanitize

    Parameters
    ----------
    range : torch.Tensor
        range to be used for standardization

    """

    if (range < 1e-6).nonzero().sum() > 0:
        print(
            "[Warning] Normalization: the following features have a range of values < 1e-6:",
            (range < 1e-6).nonzero(),
        )
    range[range < 1e-6] = 1.0

    return range


[docs] class Normalization(Transform): """ Normalizing block, used for computing standardized inputs/outputs. """
[docs] def __init__( self, in_features: int, mean: torch.Tensor = None, range: torch.Tensor = None, stats: dict = None, mode: str = "mean_std", ): """Initialize a normalization object. Values will be subtracted by self.mean and then divided by self.range. The parameters for the standardization can be either given from the user (via mean/range keywords), or they can be calculated from a datamodule. In the former, the mode will be overriden as 'custom'. 'In the latter, the standardization mode can be either 'mean_std' (remove by the mean and divide by the standard deviation) or 'min_max' (scale and shift the range of values such that all inputs are between -1 and 1). Parameters ---------- in_features : int number of inputs mean: torch.Tensor, optional values to be subtracted range: torch.Tensor, optional values to be scaled by mode : str, optional normalization mode (mean_std, min_max), by default 'mean_std' """ super().__init__(in_features=in_features, out_features=in_features) # buffers containing mean and range for standardization self.register_buffer("mean", torch.zeros(in_features)) self.register_buffer("range", torch.ones(in_features)) self.mode = mode self.is_initialized = False # set values based on args if provided self.set_custom(mean, range) if stats is not None: self.set_from_stats(stats, mode=mode) # save params self.in_features = in_features self.out_features = in_features
[docs] def extra_repr(self) -> str: return f"in_features={self.in_features}, out_features={self.out_features}, mode={self.mode}"
[docs] def set_custom(self, mean: torch.Tensor = None, range: torch.Tensor = None): """Set parameter of the normalization layer. Parameters ---------- mean : torch.Tensor Value that will be removed. range : torch.Tensor, optional Value that will be divided for. """ if mean is not None: self.mean = mean if range is not None: self.range = sanitize_range(range) if mean is not None or range is not None: self.is_initialized = True self.mode = "custom"
[docs] def set_from_stats(self, stats: dict or Statistics, mode: str = None): """Set parameters of the normalization layer based on a dictionary with statistics Parameters ---------- stats : dict or Statistics dictionary with statistics mode : str, optional standardization mode ('mean_std' or 'min_max'), by default None (will use self.mode) """ if mode is None: mode = self.mode if isinstance(stats, Statistics): stats = stats.to_dict() if mode == "mean_std": self.mean = stats["mean"] range = stats["std"] self.range = sanitize_range(range) elif mode == "min_max": min = stats["min"] max = stats["max"] self.mean = (max + min) / 2.0 range = (max - min) / 2.0 self.range = sanitize_range(range) elif mode == "custom": raise AttributeError( "If mode is custom the parameters should be supplied via mean and range values when creating the Normalization object or with the set_custom, not with set_from_stats." ) else: raise ValueError( f'Mode {self.mode} unknonwn. Available modes: "mean_std", "min_max","custom"' ) self.is_initialized = True if mode != self.mode: self.mode = mode
[docs] def setup_from_datamodule(self, datamodule): if not self.is_initialized: # obtain statistics from the dataloader try: stats = datamodule.train_dataloader().get_stats()["data"] except KeyError: raise ValueError( f"Impossible to initialize {self.__class__.__name__} " 'because the training dataloader does not have a "data" key ' "(are you using multiple datasets?). A manual initialization " 'of "mean" and "range" is necessary.' ) self.set_from_stats(stats, self.mode)
[docs] def forward(self, x: torch.Tensor) -> torch.Tensor: """ Compute standardized inputs. Parameters ---------- x: torch.Tensor input/output Returns ------- out : torch.Tensor standardized inputs """ # get mean and range mean = batch_reshape(self.mean, x.size()) range = batch_reshape(self.range, x.size()) return x.sub(mean).div(range)
[docs] def inverse(self, x: torch.Tensor) -> torch.Tensor: """ Remove standardization. Parameters ---------- x: torch.Tensor input Returns ------- out : torch.Tensor un-normalized inputs """ # get mean and range mean = batch_reshape(self.mean, x.size()) range = batch_reshape(self.range, x.size()) return x.mul(range).add(mean)
def test_normalization(): from mlcolvar.core.transform.utils import Inverse # create data torch.manual_seed(42) in_features = 2 X = torch.randn((100, in_features)) * 10 # get stats from mlcolvar.core.transform.utils import Statistics stats = Statistics(X).to_dict() norm = Normalization(in_features, mean=stats["mean"], range=stats["std"]) y = norm(X) # test inverse z = norm.inverse(y) assert(torch.allclose(X.mean(0), z.mean(0))) assert(torch.allclose(X.std(0) , z.std(0))) # test inverse class inverse = Inverse(norm) q = inverse(y) assert(torch.allclose(X.mean(0), q.mean(0))) assert(torch.allclose(X.std(0), q.std(0))) norm = Normalization( in_features, mean=stats["mean"], range=stats["std"], mode="min_max" ) if __name__ == "__main__": test_normalization()