Source code for mlcolvar.cvs.supervised.regression

import torch
import lightning
from mlcolvar.cvs import BaseCV
from mlcolvar.core import FeedForward, Normalization
from mlcolvar.core.loss import MSELoss

__all__ = ["RegressionCV"]


[docs] class RegressionCV(BaseCV, lightning.LightningModule): """ Example of collective variable obtained with a regression task. Combine the inputs with a neural-network and optimize it to match a target function. **Data**: for training it requires a DictDataset with the keys 'data' and 'target' and optionally 'weights'. **Loss**: least squares (MSELoss). See also -------- mlcolvar.core.loss.MSELoss (weighted) Mean Squared Error (MSE) loss function. """ BLOCKS = ["norm_in", "nn"]
[docs] def __init__(self, layers: list, options: dict = None, **kwargs): """Example of collective variable obtained with a regression task. By default a module standardizing the inputs is used. Parameters ---------- layers : list Number of neurons per layer options : dict[str, Any], optional Options for the building blocks of the model, by default None. Available blocks: ['norm_in', 'nn']. Set 'block_name' = None or False to turn off that block """ super().__init__(in_features=layers[0], out_features=layers[-1], **kwargs) # ======= LOSS ======= self.loss_fn = MSELoss() # ======= OPTIONS ======= # parse and sanitize options = self.parse_options(options) # Initialize norm_in o = "norm_in" if (options[o] is not False) and (options[o] is not None): self.norm_in = Normalization(self.in_features, **options[o]) # initialize NN o = "nn" self.nn = FeedForward(layers, **options[o])
[docs] def training_step(self, train_batch, batch_idx): """Compute and return the training loss and record metrics.""" # =================get data=================== x = train_batch["data"] labels = train_batch["target"] loss_kwargs = {} if "weights" in train_batch: loss_kwargs["weights"] = train_batch["weights"] # =================forward==================== y = self.forward_cv(x) # ===================loss===================== loss = self.loss_fn(y, labels, **loss_kwargs) # ====================log===================== name = "train" if self.training else "valid" self.log(f"{name}_loss", loss, on_epoch=True) return loss
def test_regression_cv(): """ Create a synthetic dataset and test functionality of the RegressionCV class """ from mlcolvar.data import DictDataset, DictModule in_features, out_features = 2, 1 layers = [in_features, 5, 10, out_features] # initialize via dictionary options = {"nn": {"activation": "relu"}} model = RegressionCV(layers=layers, options=options) print("----------") print(model) # create dataset X = torch.randn((100, 2)) y = X.square().sum(1) dataset = DictDataset({"data": X, "target": y}) datamodule = DictModule(dataset, lengths=[0.75, 0.2, 0.05], batch_size=25) # train model model.optimizer_name = "SGD" model.optimizer_kwargs.update(dict(lr=1e-2)) trainer = lightning.Trainer( accelerator="cpu", max_epochs=1, logger=None, enable_checkpointing=False ) trainer.fit(model, datamodule) model.eval() # trace model traced_model = model.to_torchscript( file_path=None, method="trace", example_inputs=X[0] ) assert torch.allclose(model(X), traced_model(X)) # weighted loss print("weighted loss") w = torch.randn((100)) dataset_weights = DictDataset({"data": X, "target": y, "weights": w}) datamodule_weights = DictModule( dataset_weights, lengths=[0.75, 0.2, 0.05], batch_size=25 ) trainer.fit(model, datamodule_weights) # use custom loss print("custom loss") trainer = lightning.Trainer( accelerator="cpu", max_epochs=1, logger=None, enable_checkpointing=False ) model = RegressionCV(layers=[2, 10, 10, 1]) model.loss_fn = lambda y, y_ref: (y - y_ref).abs().mean() trainer.fit(model, datamodule) if __name__ == "__main__": test_regression_cv()