Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def forward(
"visibility": visibility_loss.detach(),
}

return loss * batch_size, sub_losses
return loss, sub_losses

def _preprocess_kpts_target(
self, kpts_target: Tensor, batch_size: int, scale_tensor: Tensor
Expand Down
7 changes: 7 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,9 @@ def pytest_collection_modifyitems(items: list[Function]):
elif "test_combinations.py" in path:
item.add_marker(pytest.mark.combinations)
item.add_marker(pytest.mark.order(3))
elif "test_overfit_convergence.py" in path:
item.add_marker(pytest.mark.overfit_convergence)
item.add_marker(pytest.mark.order(4))
else:
item.add_marker(pytest.mark.misc)
item.add_marker(pytest.mark.order(1))
Expand All @@ -416,3 +419,7 @@ def pytest_configure(config: Config):
config.addinivalue_line(
"markers", "misc: mark test as a miscellaneous test"
)
config.addinivalue_line(
"markers",
"overfit_convergence: mark test as an overfit convergence test",
)
104 changes: 104 additions & 0 deletions tests/integration/test_overfit_convergence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
from collections.abc import Generator
from pathlib import Path

import pytest
import torch
from luxonis_ml.typing import Params

from luxonis_train.config import Config
from luxonis_train.core import LuxonisModel
from tests.conftest import LuxonisTestDatasets
from tests.integration.backbone_model_utils import (
prepare_predefined_model_config,
)

# Loss params with base weights (configs have them multiplied by
# accumulate_grad_batches=8 which inflates loss values in this test).
_DETECTION_LOSS_PARAMS = {
"model.predefined_model.params.loss_params.iou_loss_weight": 2.5,
"model.predefined_model.params.loss_params.class_loss_weight": 1.0,
}
_INSTANCE_SEG_LOSS_PARAMS = {
"model.predefined_model.params.loss_params.bbox_loss_weight": 7.5,
"model.predefined_model.params.loss_params.class_loss_weight": 0.5,
"model.predefined_model.params.loss_params.dfl_loss_weight": 1.5,
}
_KEYPOINT_LOSS_PARAMS = {
"model.predefined_model.params.loss_params.iou_loss_weight": 7.5,
"model.predefined_model.params.loss_params.class_loss_weight": 0.5,
"model.predefined_model.params.loss_params.regr_kpts_loss_weight": 12.0,
"model.predefined_model.params.loss_params.vis_kpts_loss_weight": 1.0,
}

OVERFIT_MODELS = [
("classification_light_model", None, 1.0),
("detection_light_model", _DETECTION_LOSS_PARAMS, 5.0),
("segmentation_light_model", None, 1.0),
# Instance seg sums 4 weighted losses (bbox 7.5 + seg 7.5 + dfl 1.5 + cls 0.5 = 17.0 total weight).
# With 200 epochs at lr=1e-3 the total converges to ~6.4, not near zero.
("instance_segmentation_light_model", _INSTANCE_SEG_LOSS_PARAMS, 8.0),
# Keypoint model sums 4 weighted losses (iou 7.5 + kpt_regr 12.0 + vis 1.0 + cls 0.5 = 21.0 total weight).
# With 200 epochs at lr=1e-3 the total converges to ~9.1, not near zero.
("keypoint_bbox_light_model", _KEYPOINT_LOSS_PARAMS, 12.0),
("fomo_light_model", None, 1.0),
("anomaly_detection_model", None, 1.0),
("ocr_recognition_light_model", None, 1.0),
]


@pytest.fixture(autouse=True)
def reset_deterministic_state() -> Generator[None]:
"""Reset PyTorch deterministic state after test."""
yield
torch.use_deterministic_algorithms(False)


@pytest.mark.parametrize(
("config_name", "extra_opts", "loss_threshold"),
OVERFIT_MODELS,
ids=[m[0] for m in OVERFIT_MODELS],
)
def test_overfit_convergence(
config_name: str,
extra_opts: dict | None,
loss_threshold: float,
opts: Params,
test_datasets: LuxonisTestDatasets,
save_dir: Path,
):
"""Train on a single batch and assert loss converges."""
opts |= {
"trainer.overfit_batches": 1,
"trainer.seed": 42,
"trainer.epochs": 200,
"trainer.batch_size": 4,
"trainer.validation_interval": 200,
"trainer.smart_cfg_auto_populate": False,
"trainer.training_strategy": None,
"trainer.optimizer": {"name": "Adam", "params": {"lr": 0.001}},
"trainer.scheduler": {"name": "ConstantLR"},
"trainer.callbacks": [
{"name": "TestOnTrainEnd", "active": False},
{"name": "ExportOnTrainEnd", "active": False},
{"name": "ArchiveOnTrainEnd", "active": False},
{"name": "ConvertOnTrainEnd", "active": False},
{"name": "UploadCheckpoint", "active": False},
],
"tracker.save_directory": str(save_dir),
}

if extra_opts:
opts |= extra_opts

config_file, opts, _ = prepare_predefined_model_config(
config_name, opts, test_datasets
)

cfg = Config.get_config(config_file, opts)
model = LuxonisModel(cfg)
model.train()

final_loss = model.pl_trainer.callback_metrics["train/loss"].item()
assert final_loss < loss_threshold, (
f"{config_name} loss did not converge: {final_loss:.4f} >= {loss_threshold}"
)
Loading