import torch
import lightning
from mlcolvar.cvs import BaseCV
from mlcolvar.core import FeedForward, Normalization
from mlcolvar.core.loss import MSELoss
__all__ = ["RegressionCV"]
[docs]
class RegressionCV(BaseCV, lightning.LightningModule):
"""
Example of collective variable obtained with a regression task.
Combine the inputs with a neural-network and optimize it to match a target function.
**Data**: for training it requires a DictDataset with the keys 'data' and 'target' and optionally 'weights'.
**Loss**: least squares (MSELoss).
See also
--------
mlcolvar.core.loss.MSELoss
(weighted) Mean Squared Error (MSE) loss function.
"""
BLOCKS = ["norm_in", "nn"]
[docs]
def __init__(self, layers: list, options: dict = None, **kwargs):
"""Example of collective variable obtained with a regression task.
By default a module standardizing the inputs is used.
Parameters
----------
layers : list
Number of neurons per layer
options : dict[str, Any], optional
Options for the building blocks of the model, by default None.
Available blocks: ['norm_in', 'nn'].
Set 'block_name' = None or False to turn off that block
"""
super().__init__(in_features=layers[0], out_features=layers[-1], **kwargs)
# ======= LOSS =======
self.loss_fn = MSELoss()
# ======= OPTIONS =======
# parse and sanitize
options = self.parse_options(options)
# Initialize norm_in
o = "norm_in"
if (options[o] is not False) and (options[o] is not None):
self.norm_in = Normalization(self.in_features, **options[o])
# initialize NN
o = "nn"
self.nn = FeedForward(layers, **options[o])
[docs]
def training_step(self, train_batch, batch_idx):
"""Compute and return the training loss and record metrics."""
# =================get data===================
x = train_batch["data"]
labels = train_batch["target"]
loss_kwargs = {}
if "weights" in train_batch:
loss_kwargs["weights"] = train_batch["weights"]
# =================forward====================
y = self.forward_cv(x)
# ===================loss=====================
loss = self.loss_fn(y, labels, **loss_kwargs)
# ====================log=====================
name = "train" if self.training else "valid"
self.log(f"{name}_loss", loss, on_epoch=True)
return loss
def test_regression_cv():
"""
Create a synthetic dataset and test functionality of the RegressionCV class
"""
from mlcolvar.data import DictDataset, DictModule
in_features, out_features = 2, 1
layers = [in_features, 5, 10, out_features]
# initialize via dictionary
options = {"nn": {"activation": "relu"}}
model = RegressionCV(layers=layers, options=options)
print("----------")
print(model)
# create dataset
X = torch.randn((100, 2))
y = X.square().sum(1)
dataset = DictDataset({"data": X, "target": y})
datamodule = DictModule(dataset, lengths=[0.75, 0.2, 0.05], batch_size=25)
# train model
model.optimizer_name = "SGD"
model.optimizer_kwargs.update(dict(lr=1e-2))
trainer = lightning.Trainer(
accelerator="cpu", max_epochs=1, logger=None, enable_checkpointing=False
)
trainer.fit(model, datamodule)
model.eval()
# trace model
traced_model = model.to_torchscript(
file_path=None, method="trace", example_inputs=X[0]
)
assert torch.allclose(model(X), traced_model(X))
# weighted loss
print("weighted loss")
w = torch.randn((100))
dataset_weights = DictDataset({"data": X, "target": y, "weights": w})
datamodule_weights = DictModule(
dataset_weights, lengths=[0.75, 0.2, 0.05], batch_size=25
)
trainer.fit(model, datamodule_weights)
# use custom loss
print("custom loss")
trainer = lightning.Trainer(
accelerator="cpu", max_epochs=1, logger=None, enable_checkpointing=False
)
model = RegressionCV(layers=[2, 10, 10, 1])
model.loss_fn = lambda y, y_ref: (y - y_ref).abs().mean()
trainer.fit(model, datamodule)