From 2f9c10ed4ab1554077ab119fee8f01c71c8233f4 Mon Sep 17 00:00:00 2001
From: heyufan1995 <heyufan1995@gmail.com>
Date: Tue, 27 Aug 2024 16:32:31 -0400
Subject: [PATCH 1/8] Fix point bugs and finetuning issue

Signed-off-by: heyufan1995 <heyufan1995@gmail.com>
---
 models/vista3d/configs/train.json           |  7 +++---
 models/vista3d/configs/train_continual.json | 20 +++++++---------
 models/vista3d/docs/README.md               | 26 +++++++++++++++------
 models/vista3d/scripts/evaluator.py         | 15 ++++++------
 models/vista3d/scripts/inferer.py           |  4 +++-
 5 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/models/vista3d/configs/train.json b/models/vista3d/configs/train.json
index f6fc6bbb..9b80f843 100644
--- a/models/vista3d/configs/train.json
+++ b/models/vista3d/configs/train.json
@@ -16,7 +16,7 @@
     "early_stop": false,
     "fold": 0,
     "device": "$torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')",
-    "epochs": 100,
+    "epochs": 5,
     "val_interval": 1,
     "val_at_start": false,
     "sw_overlap": 0.625,
@@ -28,8 +28,8 @@
     "max_prompt": null,
     "max_backprompt": null,
     "max_foreprompt": null,
-    "drop_label_prob": 0.5,
-    "drop_point_prob": 0.5,
+    "drop_label_prob": 0.25,
+    "drop_point_prob": 0.25,
     "exclude_background": true,
     "use_cfp": true,
     "label_set": null,
@@ -379,6 +379,7 @@
                 "exclude_background": "@exclude_background",
                 "use_cfp": "@use_cfp",
                 "label_set": "@label_set",
+                "val_head": "auto",
                 "user_prompt": false
             }
         }
diff --git a/models/vista3d/configs/train_continual.json b/models/vista3d/configs/train_continual.json
index 3d9651ff..ce8a85b3 100644
--- a/models/vista3d/configs/train_continual.json
+++ b/models/vista3d/configs/train_continual.json
@@ -6,8 +6,8 @@
     "finetune_model_path": "$@bundle_root + '/models/model.pt'",
     "n_train_samples": 10,
     "n_val_samples": 10,
-    "val_interval": 40,
-    "learning_rate": 0.0001,
+    "val_interval": 1,
+    "learning_rate": 0.00005,
     "lr_schedule#activate": false,
     "loss#smooth_dr": 0.01,
     "loss#smooth_nr": 0.0001,
@@ -18,18 +18,14 @@
         "default": [
             [
                 1,
-                2
-            ],
-            [
-                2,
-                254
+                3
             ]
         ]
     },
     "patch_size": [
-        160,
-        160,
-        160
+        128,
+        128,
+        128
     ],
     "label_set": "$[0] + list(x[1] for x in @label_mappings#default)",
     "val_label_set": "$[0] + list(x[0] for x in @label_mappings#default)",
@@ -99,11 +95,13 @@
         "num_workers": "@num_cache_workers",
         "progress": "@show_cache_progress"
     },
+    "validate#evaluator#hyper_kwargs#val_label_set": "$list(range(len(@val_label_set)))", 
     "validate#preprocessing#transforms": "$@train#deterministic_transforms + [@valid_remap]",
     "valid_remap": {
         "_target_": "monai.apps.vista3d.transforms.Relabeld",
         "keys": "label",
         "label_mappings": "${'default': [[c, i] for i, c in enumerate(@val_label_set)]}",
         "dtype": "$torch.uint8"
-    }
+    },
+    "validate#handlers#3#key_metric_filename": "model_finetune.pt" 
 }
diff --git a/models/vista3d/docs/README.md b/models/vista3d/docs/README.md
index 966dba99..fd600ae2 100644
--- a/models/vista3d/docs/README.md
+++ b/models/vista3d/docs/README.md
@@ -90,10 +90,9 @@ torchrun --standalone --nnodes=1 --nproc_per_node=2 -m monai.bundle run --config
 #### Execute continual learning
 When finetuning with new class names, please update `configs/train_continual.json`'s `label_mappings` accordingly.
 
-The current label mapping `[[1, 2], [2, 254]]` indicates that training labels' class indices `1` and `2`, are mapped
-to the VISTA model's class `2` and `254` respectively (format `[[src_class_0, dst_class_0], [src_class_1, dst_class_1], ...]`).
-Since `254` is not used by VISTA, it is therefore indicating
-training with a new class (the training label's class `2` will be trained as VISTA class `254`).
+The current label mapping `[[1, 3]]` indicates that training labels' class indices `1` is mapped
+to the VISTA model's class `3` (format `[[src_class_0, dst_class_0], [src_class_1, dst_class_1], ...]`). For new classes, user
+can map to any value larger than 132.
 
 `label_set` is used to identify the VISTA model classes for providing training prompts.
 `val_label_set` is used to identify the original training label classes for computing foreground/background mask during validation.
@@ -103,7 +102,10 @@ The default configs for both variables are derived from the `label_mappings` con
 "label_set": "$[0] + list(x[1] for x in @label_mappings#default)"
 "val_label_set": "$[0] + list(x[0] for x in @label_mappings#default)"
 ```
-
+`drop_label_prob` and `drop_point_prob` means percentage to remove class prompts and point prompts respectively. If `drop_point_prob`=1, the
+model is only finetuning for automatic segmentation, while `drop_label_prob`=1 means only finetuning for interactive segmentation. The VISTA3D foundation
+model is trained with interactive only (drop_label_prob=1) and then froze the point branch and trained with fully automatic segmentation (`drop_point_prob=1`).
+In this bundle, the training is simplified by jointly training with class prompts and point prompts. 
 
 Single-GPU:
 ```
@@ -117,11 +119,21 @@ torchrun --nnodes=1 --nproc_per_node=8 -m monai.bundle run \
 	--config_file="['configs/train.json','configs/train_continual.json','configs/multi_gpu_train.json']" --epochs=320 --learning_rate=0.005
 ```
 
-The patch size parameter is defined in `configs/train_continual.json`: `"patch_size": [160, 160, 160]`, and this works for the use cases
+The patch size parameter is defined in `configs/train_continual.json`: `"patch_size": [128, 128, 128]`, and this works for the use cases
 of extending the current model to segment a few novel classes. Finetuning all supported classes may require large GPU memory and carefully designed
 multi-stage training processes.
 
-Changing `patch_size` to a smaller value such as `"patch_size": [128, 128, 128]` used in `configs/train.json` would reduce the training memory footprint.
+Changing `patch_size` to a smaller value such as `"patch_size": [96, 96, 96]` used in `configs/train.json` would reduce the training memory footprint.
+
+In `train_continual.json`, only subset of training and validation data are used, change `n_train_samples` and `n_val_samples` to use full dataset. 
+
+In `train.json`, `validate[evaluator][val_head]` can be `auto` and `point`. If `auto`, the validation results will be automatic segmentation. If `point`,
+the validation results will be sampling one positive point per object per patch. The validation scheme of combining auto and point is deprecated due to 
+speed issue. 
+
+Note: `valid_remap` is a transform that maps the groundtruth label indexes, e.g. [0,2,3,5,6] to sequential and continuous labels [0,1,2,3,4]. This is
+required by monai dice calculation. It is not related to mapping label index to VISTA3D defined global class index. The validation data is not mapped
+to the VISTA3D global class index. 
 
 #### Execute evaluation
 `n_train_samples` and `n_val_samples` are used to specify the number of samples to use for training and validation respectively.
diff --git a/models/vista3d/scripts/evaluator.py b/models/vista3d/scripts/evaluator.py
index 6b1d4a89..59e34298 100644
--- a/models/vista3d/scripts/evaluator.py
+++ b/models/vista3d/scripts/evaluator.py
@@ -207,6 +207,8 @@ def _iteration(self, engine: SupervisedEvaluator, batchdata: dict[str, torch.Ten
         if batchdata is None:
             raise ValueError("Must provide batch data for current iteration.")
         label_set = engine.hyper_kwargs.get("label_set", None)
+        # this validation label set should be consistent with 'labels.unique()', used to generate fg/bg points
+        val_label_set = engine.hyper_kwargs.get("val_label_set", label_set)
         # If user provide prompts in the inference, input image must contain original affine.
         # the point coordinates are from the original_affine space, while image here is after preprocess transforms.
         if engine.hyper_kwargs["user_prompt"]:
@@ -242,18 +244,17 @@ def _iteration(self, engine: SupervisedEvaluator, batchdata: dict[str, torch.Ten
                 output_classes = engine.hyper_kwargs["output_classes"]
                 label_set = np.arange(output_classes).tolist()
             label_prompt = torch.tensor(label_set).to(engine.state.device).unsqueeze(-1)
-            # point prompt is generated withing vista3d,provide empty points
+            # point prompt is generated withing vista3d, provide empty points
             points = torch.zeros(label_prompt.shape[0], 1, 3).to(inputs.device)
             point_labels = -1 + torch.zeros(label_prompt.shape[0], 1).to(inputs.device)
-            if engine.hyper_kwargs["drop_point_prob"] > 0.99:
+            # validation for either auto or point.
+            if engine.hyper_kwargs.get("val_head", "auto") == 'auto':
                 # automatic only validation
-                points = None
-                point_labels = None
-            if engine.hyper_kwargs["drop_label_prob"] > 0.99:
+                # remove val_label_set, vista3d will not sample points from gt labels.
+                val_label_set = None
+            else:
                 # point only validation
                 label_prompt = None
-        # this validation label set should be consistent with 'labels.unique()', used to generate fg/bg points
-        val_label_set = engine.hyper_kwargs.get("val_label_set", label_set)
 
         # put iteration outputs into engine.state
         engine.state.output = {Keys.IMAGE: inputs, Keys.LABEL: labels}
diff --git a/models/vista3d/scripts/inferer.py b/models/vista3d/scripts/inferer.py
index c30ba386..b7c9bc43 100644
--- a/models/vista3d/scripts/inferer.py
+++ b/models/vista3d/scripts/inferer.py
@@ -25,7 +25,7 @@ class Vista3dInferer(Inferer):
     Args:
         roi_size: the sliding window patch size.
         overlap: sliding window overlap ratio.
-        use_cfp: use class prompt for point head.
+        use_cfp: use class prompt for point head. Deprecated.
     """
 
     def __init__(self, roi_size, overlap, use_cfp, use_point_window=False, sw_batch_size=1) -> None:
@@ -91,6 +91,7 @@ def __call__(
                 roi_size=self.roi_size,
                 sw_batch_size=self.sw_batch_size,
                 transpose=True,
+                with_coord=True,
                 predictor=network,
                 mode="gaussian",
                 sw_device=device,
@@ -113,6 +114,7 @@ def __call__(
                 roi_size=self.roi_size,
                 sw_batch_size=self.sw_batch_size,
                 transpose=True,
+                with_coord=True,
                 predictor=network,
                 mode="gaussian",
                 sw_device=device,

From d214d0642c613466dc5290aa88e93996128f607a Mon Sep 17 00:00:00 2001
From: heyufan1995 <heyufan1995@gmail.com>
Date: Tue, 27 Aug 2024 16:38:53 -0400
Subject: [PATCH 2/8] fixes racing condition when InvertD is used along with
 ThreadDataLoader

Signed-off-by: heyufan1995 <heyufan1995@gmail.com>
---
 models/vista3d/configs/inference.json | 3 ++-
 models/vista3d/scripts/evaluator.py   | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/models/vista3d/configs/inference.json b/models/vista3d/configs/inference.json
index fd158695..6e654a78 100644
--- a/models/vista3d/configs/inference.json
+++ b/models/vista3d/configs/inference.json
@@ -4,6 +4,7 @@
         "$import os",
         "$import scripts",
         "$import numpy as np",
+        "$import copy",
         "$import json"
     ],
     "bundle_root": "./",
@@ -146,7 +147,7 @@
             {
                 "_target_": "Invertd",
                 "keys": "pred",
-                "transform": "@preprocessing",
+                "transform": "$copy.deepcopy(@preprocessing)",
                 "orig_keys": "@image_key",
                 "nearest_interp": true,
                 "to_tensor": true
diff --git a/models/vista3d/scripts/evaluator.py b/models/vista3d/scripts/evaluator.py
index 59e34298..21aec86a 100644
--- a/models/vista3d/scripts/evaluator.py
+++ b/models/vista3d/scripts/evaluator.py
@@ -19,7 +19,7 @@
 from monai.engines.evaluator import SupervisedEvaluator
 from monai.engines.utils import IterationEvents, default_metric_cmp_fn, default_prepare_batch
 from monai.inferers import Inferer, SimpleInferer
-from monai.transforms import Transform
+from monai.transforms import Transform, reset_ops_id
 from monai.utils import ForwardMode, RankFilter, min_version, optional_import
 from monai.utils.enums import CommonKeys as Keys
 from torch.utils.data import DataLoader
@@ -281,6 +281,7 @@ def _iteration(self, engine: SupervisedEvaluator, batchdata: dict[str, torch.Ten
                     labels=labels,
                     label_set=val_label_set,
                 )
+        inputs = reset_ops_id(inputs)
         # Add dim 0 for decollate batch
         engine.state.output["label_prompt"] = label_prompt.unsqueeze(0) if label_prompt is not None else None
         engine.state.output["points"] = points.unsqueeze(0) if points is not None else None

From 14e238592b5c3097d9de2f70ac75cee82d5f125d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 27 Aug 2024 20:44:39 +0000
Subject: [PATCH 3/8] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 models/vista3d/configs/train_continual.json |  6 +++---
 models/vista3d/docs/README.md               | 10 +++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/models/vista3d/configs/train_continual.json b/models/vista3d/configs/train_continual.json
index ce8a85b3..b700a178 100644
--- a/models/vista3d/configs/train_continual.json
+++ b/models/vista3d/configs/train_continual.json
@@ -7,7 +7,7 @@
     "n_train_samples": 10,
     "n_val_samples": 10,
     "val_interval": 1,
-    "learning_rate": 0.00005,
+    "learning_rate": 5e-05,
     "lr_schedule#activate": false,
     "loss#smooth_dr": 0.01,
     "loss#smooth_nr": 0.0001,
@@ -95,7 +95,7 @@
         "num_workers": "@num_cache_workers",
         "progress": "@show_cache_progress"
     },
-    "validate#evaluator#hyper_kwargs#val_label_set": "$list(range(len(@val_label_set)))", 
+    "validate#evaluator#hyper_kwargs#val_label_set": "$list(range(len(@val_label_set)))",
     "validate#preprocessing#transforms": "$@train#deterministic_transforms + [@valid_remap]",
     "valid_remap": {
         "_target_": "monai.apps.vista3d.transforms.Relabeld",
@@ -103,5 +103,5 @@
         "label_mappings": "${'default': [[c, i] for i, c in enumerate(@val_label_set)]}",
         "dtype": "$torch.uint8"
     },
-    "validate#handlers#3#key_metric_filename": "model_finetune.pt" 
+    "validate#handlers#3#key_metric_filename": "model_finetune.pt"
 }
diff --git a/models/vista3d/docs/README.md b/models/vista3d/docs/README.md
index fd600ae2..39268acb 100644
--- a/models/vista3d/docs/README.md
+++ b/models/vista3d/docs/README.md
@@ -105,7 +105,7 @@ The default configs for both variables are derived from the `label_mappings` con
 `drop_label_prob` and `drop_point_prob` means percentage to remove class prompts and point prompts respectively. If `drop_point_prob`=1, the
 model is only finetuning for automatic segmentation, while `drop_label_prob`=1 means only finetuning for interactive segmentation. The VISTA3D foundation
 model is trained with interactive only (drop_label_prob=1) and then froze the point branch and trained with fully automatic segmentation (`drop_point_prob=1`).
-In this bundle, the training is simplified by jointly training with class prompts and point prompts. 
+In this bundle, the training is simplified by jointly training with class prompts and point prompts.
 
 Single-GPU:
 ```
@@ -125,15 +125,15 @@ multi-stage training processes.
 
 Changing `patch_size` to a smaller value such as `"patch_size": [96, 96, 96]` used in `configs/train.json` would reduce the training memory footprint.
 
-In `train_continual.json`, only subset of training and validation data are used, change `n_train_samples` and `n_val_samples` to use full dataset. 
+In `train_continual.json`, only subset of training and validation data are used, change `n_train_samples` and `n_val_samples` to use full dataset.
 
 In `train.json`, `validate[evaluator][val_head]` can be `auto` and `point`. If `auto`, the validation results will be automatic segmentation. If `point`,
-the validation results will be sampling one positive point per object per patch. The validation scheme of combining auto and point is deprecated due to 
-speed issue. 
+the validation results will be sampling one positive point per object per patch. The validation scheme of combining auto and point is deprecated due to
+speed issue.
 
 Note: `valid_remap` is a transform that maps the groundtruth label indexes, e.g. [0,2,3,5,6] to sequential and continuous labels [0,1,2,3,4]. This is
 required by monai dice calculation. It is not related to mapping label index to VISTA3D defined global class index. The validation data is not mapped
-to the VISTA3D global class index. 
+to the VISTA3D global class index.
 
 #### Execute evaluation
 `n_train_samples` and `n_val_samples` are used to specify the number of samples to use for training and validation respectively.

From a9e030c94a740a115828302d5142b02c4669537b Mon Sep 17 00:00:00 2001
From: heyufan1995 <heyufan1995@gmail.com>
Date: Thu, 29 Aug 2024 10:38:03 -0400
Subject: [PATCH 4/8] Fix comments

Signed-off-by: heyufan1995 <heyufan1995@gmail.com>
---
 models/vista3d/configs/inference.json | 3 ---
 models/vista3d/configs/train.json     | 4 ----
 models/vista3d/docs/README.md         | 4 ++--
 models/vista3d/scripts/inferer.py     | 6 +-----
 models/vista3d/scripts/trainer.py     | 3 +--
 5 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/models/vista3d/configs/inference.json b/models/vista3d/configs/inference.json
index 6e654a78..ada91042 100644
--- a/models/vista3d/configs/inference.json
+++ b/models/vista3d/configs/inference.json
@@ -48,7 +48,6 @@
         128
     ],
     "device": "$torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')",
-    "use_cfp": false,
     "use_point_window": true,
     "network_def": "$monai.networks.nets.vista3d132(in_channels=@input_channels)",
     "network": "$@network_def.to(@device)",
@@ -128,7 +127,6 @@
         "roi_size": "@patch_size",
         "overlap": 0.5,
         "sw_batch_size": "@sw_batch_size",
-        "use_cfp": "@use_cfp",
         "use_point_window": "@use_point_window"
     },
     "postprocessing": {
@@ -193,7 +191,6 @@
         "val_handlers": "@handlers",
         "amp": true,
         "hyper_kwargs": {
-            "use_cfp": "@use_cfp",
             "user_prompt": true,
             "everything_labels": "@everything_labels"
         }
diff --git a/models/vista3d/configs/train.json b/models/vista3d/configs/train.json
index 9b80f843..aca8cb9a 100644
--- a/models/vista3d/configs/train.json
+++ b/models/vista3d/configs/train.json
@@ -31,7 +31,6 @@
     "drop_label_prob": 0.25,
     "drop_point_prob": 0.25,
     "exclude_background": true,
-    "use_cfp": true,
     "label_set": null,
     "val_label_set": "@label_set",
     "amp": true,
@@ -277,7 +276,6 @@
                 "drop_label_prob": "@drop_label_prob",
                 "drop_point_prob": "@drop_point_prob",
                 "exclude_background": "@exclude_background",
-                "use_cfp": "@use_cfp",
                 "label_set": "@label_set",
                 "patch_size": "@patch_size",
                 "user_prompt": false
@@ -315,7 +313,6 @@
             "_target_": "scripts.inferer.Vista3dInferer",
             "roi_size": "@patch_size_valid",
             "overlap": "@sw_overlap",
-            "use_cfp": "@use_cfp"
         },
         "handlers": [
             {
@@ -377,7 +374,6 @@
                 "drop_label_prob": "@drop_label_prob",
                 "drop_point_prob": "@drop_point_prob",
                 "exclude_background": "@exclude_background",
-                "use_cfp": "@use_cfp",
                 "label_set": "@label_set",
                 "val_head": "auto",
                 "user_prompt": false
diff --git a/models/vista3d/docs/README.md b/models/vista3d/docs/README.md
index fd600ae2..14f2976b 100644
--- a/models/vista3d/docs/README.md
+++ b/models/vista3d/docs/README.md
@@ -102,8 +102,8 @@ The default configs for both variables are derived from the `label_mappings` con
 "label_set": "$[0] + list(x[1] for x in @label_mappings#default)"
 "val_label_set": "$[0] + list(x[0] for x in @label_mappings#default)"
 ```
-`drop_label_prob` and `drop_point_prob` means percentage to remove class prompts and point prompts respectively. If `drop_point_prob`=1, the
-model is only finetuning for automatic segmentation, while `drop_label_prob`=1 means only finetuning for interactive segmentation. The VISTA3D foundation
+`drop_label_prob` and `drop_point_prob` means percentage to remove class prompts and point prompts respectively. If `drop_point_prob=1`, the
+model is only finetuning for automatic segmentation, while `drop_label_prob=1` means only finetuning for interactive segmentation. The VISTA3D foundation
 model is trained with interactive only (drop_label_prob=1) and then froze the point branch and trained with fully automatic segmentation (`drop_point_prob=1`).
 In this bundle, the training is simplified by jointly training with class prompts and point prompts. 
 
diff --git a/models/vista3d/scripts/inferer.py b/models/vista3d/scripts/inferer.py
index b7c9bc43..25f48525 100644
--- a/models/vista3d/scripts/inferer.py
+++ b/models/vista3d/scripts/inferer.py
@@ -25,14 +25,12 @@ class Vista3dInferer(Inferer):
     Args:
         roi_size: the sliding window patch size.
         overlap: sliding window overlap ratio.
-        use_cfp: use class prompt for point head. Deprecated.
     """
 
-    def __init__(self, roi_size, overlap, use_cfp, use_point_window=False, sw_batch_size=1) -> None:
+    def __init__(self, roi_size, overlap, use_point_window=False, sw_batch_size=1) -> None:
         Inferer.__init__(self)
         self.roi_size = roi_size
         self.overlap = overlap
-        self.use_cfp = use_cfp
         self.sw_batch_size = sw_batch_size
         self.use_point_window = use_point_window
         self.sliding_window_inferer = point_based_window_inferer if use_point_window else sliding_window_inference
@@ -104,7 +102,6 @@ def __call__(
                 prev_mask=prev_mask,
                 labels=labels,
                 label_set=label_set,
-                use_cfp=self.use_cfp,
             )
         except Exception:
             val_outputs = None
@@ -127,6 +124,5 @@ def __call__(
                 prev_mask=prev_mask,
                 labels=labels,
                 label_set=label_set,
-                use_cfp=self.use_cfp,
             )
         return val_outputs
diff --git a/models/vista3d/scripts/trainer.py b/models/vista3d/scripts/trainer.py
index 7a559afc..e96daf9c 100644
--- a/models/vista3d/scripts/trainer.py
+++ b/models/vista3d/scripts/trainer.py
@@ -182,8 +182,7 @@ def _compute_pred_loss():
                 input_images=inputs,
                 point_coords=point,
                 point_labels=point_label,
-                class_vector=label_prompt,
-                use_cfp=engine.hyper_kwargs["use_cfp"],
+                class_vector=label_prompt
             )
             # engine.state.output[Keys.PRED] = outputs
             engine.fire_event(IterationEvents.FORWARD_COMPLETED)

From 4989040454ad15e65e1d9b87a31c8d5741659c80 Mon Sep 17 00:00:00 2001
From: Yiheng Wang <vennw@nvidia.com>
Date: Fri, 30 Aug 2024 04:21:36 +0000
Subject: [PATCH 5/8] fix type error

Signed-off-by: Yiheng Wang <vennw@nvidia.com>
---
 models/vista3d/configs/metadata.json | 3 ++-
 models/vista3d/scripts/evaluator.py  | 2 +-
 models/vista3d/scripts/trainer.py    | 5 +----
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/models/vista3d/configs/metadata.json b/models/vista3d/configs/metadata.json
index 0be351c5..5a1b31c1 100644
--- a/models/vista3d/configs/metadata.json
+++ b/models/vista3d/configs/metadata.json
@@ -1,7 +1,8 @@
 {
     "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20240725.json",
-    "version": "0.4.2",
+    "version": "0.4.3",
     "changelog": {
+        "0.4.3": "fix CL and batch infer issues",
         "0.4.2": "use MONAI components for network and utils",
         "0.4.1": "initial OSS version"
     },
diff --git a/models/vista3d/scripts/evaluator.py b/models/vista3d/scripts/evaluator.py
index 21aec86a..74959a36 100644
--- a/models/vista3d/scripts/evaluator.py
+++ b/models/vista3d/scripts/evaluator.py
@@ -248,7 +248,7 @@ def _iteration(self, engine: SupervisedEvaluator, batchdata: dict[str, torch.Ten
             points = torch.zeros(label_prompt.shape[0], 1, 3).to(inputs.device)
             point_labels = -1 + torch.zeros(label_prompt.shape[0], 1).to(inputs.device)
             # validation for either auto or point.
-            if engine.hyper_kwargs.get("val_head", "auto") == 'auto':
+            if engine.hyper_kwargs.get("val_head", "auto") == "auto":
                 # automatic only validation
                 # remove val_label_set, vista3d will not sample points from gt labels.
                 val_label_set = None
diff --git a/models/vista3d/scripts/trainer.py b/models/vista3d/scripts/trainer.py
index e96daf9c..b1b522ef 100644
--- a/models/vista3d/scripts/trainer.py
+++ b/models/vista3d/scripts/trainer.py
@@ -179,10 +179,7 @@ def _iteration(self, engine, batchdata: dict[str, torch.Tensor]):
 
         def _compute_pred_loss():
             outputs = engine.network(
-                input_images=inputs,
-                point_coords=point,
-                point_labels=point_label,
-                class_vector=label_prompt
+                input_images=inputs, point_coords=point, point_labels=point_label, class_vector=label_prompt
             )
             # engine.state.output[Keys.PRED] = outputs
             engine.fire_event(IterationEvents.FORWARD_COMPLETED)

From 048072161c3973e4382e20c7c91775ab1acd4319 Mon Sep 17 00:00:00 2001
From: Yiheng Wang <vennw@nvidia.com>
Date: Fri, 30 Aug 2024 12:53:22 +0800
Subject: [PATCH 6/8] fix extra comma

Signed-off-by: Yiheng Wang <vennw@nvidia.com>
---
 models/vista3d/configs/train.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/models/vista3d/configs/train.json b/models/vista3d/configs/train.json
index aca8cb9a..614618e1 100644
--- a/models/vista3d/configs/train.json
+++ b/models/vista3d/configs/train.json
@@ -312,7 +312,7 @@
         "inferer": {
             "_target_": "scripts.inferer.Vista3dInferer",
             "roi_size": "@patch_size_valid",
-            "overlap": "@sw_overlap",
+            "overlap": "@sw_overlap"
         },
         "handlers": [
             {

From aa6965e3b6e64d4ca57d99524b9de957e10467de Mon Sep 17 00:00:00 2001
From: Yiheng Wang <vennw@nvidia.com>
Date: Fri, 30 Aug 2024 13:23:19 +0800
Subject: [PATCH 7/8] update vista2d weights

Signed-off-by: Yiheng Wang <vennw@nvidia.com>
---
 models/vista2d/configs/metadata.json | 3 ++-
 models/vista2d/large_files.yml       | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/models/vista2d/configs/metadata.json b/models/vista2d/configs/metadata.json
index 91b8fbcd..05ad27a9 100644
--- a/models/vista2d/configs/metadata.json
+++ b/models/vista2d/configs/metadata.json
@@ -1,7 +1,8 @@
 {
     "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20240725.json",
-    "version": "0.2.2",
+    "version": "0.2.3",
     "changelog": {
+        "0.2.3": "update weights link",
         "0.2.2": "update to use monai components",
         "0.2.1": "initial OSS version"
     },
diff --git a/models/vista2d/large_files.yml b/models/vista2d/large_files.yml
index 75fe41d5..82a7c411 100644
--- a/models/vista2d/large_files.yml
+++ b/models/vista2d/large_files.yml
@@ -2,6 +2,6 @@ large_files:
 - path: "models/sam_vit_b_01ec64.pth"
   url: "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
 - path: "models/model.pt"
-  url: "https://drive.google.com/file/d/1odLhoOtlxxbEyRq-gvenP8bC0-mw63ng/view?usp=drive_link"
+  url: "https://github.com/Project-MONAI/model-zoo/releases/download/model_zoo_bundle_data/vista2d_model.pt"
 - path: "datalists.zip"
   url: "https://github.com/Project-MONAI/model-zoo/releases/download/model_zoo_bundle_data/vista2d_datalists.zip"

From 381844c414edef3b95f52bc739b180f23d595b65 Mon Sep 17 00:00:00 2001
From: Yiheng Wang <vennw@nvidia.com>
Date: Mon, 2 Sep 2024 16:45:59 +0800
Subject: [PATCH 8/8] fix unclosed warning

Signed-off-by: Yiheng Wang <vennw@nvidia.com>
---
 models/vista3d/configs/inference.json | 9 ++++-----
 models/vista3d/configs/train.json     | 2 +-
 models/vista3d/docs/README.md         | 2 +-
 models/vista3d/scripts/evaluator.py   | 6 +++---
 models/vista3d/scripts/trainer.py     | 6 +++---
 5 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/models/vista3d/configs/inference.json b/models/vista3d/configs/inference.json
index ada91042..e374ff57 100644
--- a/models/vista3d/configs/inference.json
+++ b/models/vista3d/configs/inference.json
@@ -5,7 +5,8 @@
         "$import scripts",
         "$import numpy as np",
         "$import copy",
-        "$import json"
+        "$import json",
+        "$import pathlib"
     ],
     "bundle_root": "./",
     "image_key": "image",
@@ -14,12 +15,10 @@
     "output_dtype": "$np.float32",
     "output_postfix": "trans",
     "separate_folder": true,
-    "input_dict": "${'image': '/data/Task09_Spleen/imagesTr/spleen_10.nii.gz', 'label_prompt': [3]}",
+    "input_dict": "${'image': '/home/venn/Desktop/data/medical/Task09_Spleen/imagesTr/spleen_10.nii.gz', 'label_prompt': [3]}",
     "everything_labels": "$list(set([i+1 for i in range(132)]) - set([2,16,18,20,21,23,24,25,26,27,128,129,130,131,132]))",
     "metadata_path": "$@bundle_root + '/configs/metadata.json'",
-    "metadata_file": "$open(@metadata_path,'r', encoding='utf8')",
-    "metadata": "$json.load(@metadata_file)",
-    "close_metadata_file": "$metadata_file.close()",
+    "metadata": "$json.loads(pathlib.Path(@metadata_path).read_text())",
     "labels_dict": "$@metadata['network_data_format']['outputs']['pred']['channel_def']",
     "subclass": {
         "2": [
diff --git a/models/vista3d/configs/train.json b/models/vista3d/configs/train.json
index 614618e1..b5d79b6e 100644
--- a/models/vista3d/configs/train.json
+++ b/models/vista3d/configs/train.json
@@ -16,7 +16,7 @@
     "early_stop": false,
     "fold": 0,
     "device": "$torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')",
-    "epochs": 5,
+    "epochs": 100,
     "val_interval": 1,
     "val_at_start": false,
     "sw_overlap": 0.625,
diff --git a/models/vista3d/docs/README.md b/models/vista3d/docs/README.md
index c91ade01..386796cf 100644
--- a/models/vista3d/docs/README.md
+++ b/models/vista3d/docs/README.md
@@ -65,7 +65,7 @@ python -m monai.bundle run --config_file configs/train.json --dataset_dir <actua
 #### Execute finetune:
 
 ```
-python -m monai.bundle run --config_file configs/train.json --finetune True
+python -m monai.bundle run --config_file configs/train.json --finetune True --epochs 5
 ```
 
 Please note that the path of model weights is "/models/model.pt", you can also override it by using `--finetune_model_path`:
diff --git a/models/vista3d/scripts/evaluator.py b/models/vista3d/scripts/evaluator.py
index 74959a36..ed4e715d 100644
--- a/models/vista3d/scripts/evaluator.py
+++ b/models/vista3d/scripts/evaluator.py
@@ -79,8 +79,8 @@ class Vista3dEvaluator(SupervisedEvaluator):
             default to `True`.
         to_kwargs: dict of other args for `prepare_batch` API when converting the input data, except for
             `device`, `non_blocking`.
-        amp_kwargs: dict of the args for `torch.cuda.amp.autocast()` API, for more details:
-            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
+        amp_kwargs: dict of the args for `torch.amp.autocast()` API, for more details:
+            https://pytorch.org/docs/stable/amp.html#torch.amp.autocast.
     """
 
     def __init__(
@@ -261,7 +261,7 @@ def _iteration(self, engine: SupervisedEvaluator, batchdata: dict[str, torch.Ten
         # execute forward computation
         with engine.mode(engine.network):
             if engine.amp:
-                with torch.cuda.amp.autocast(**engine.amp_kwargs):
+                with torch.amp.autocast("cuda", **engine.amp_kwargs):
                     engine.state.output[Keys.PRED] = engine.inferer(
                         inputs=inputs,
                         network=engine.network,
diff --git a/models/vista3d/scripts/trainer.py b/models/vista3d/scripts/trainer.py
index b1b522ef..701c6894 100644
--- a/models/vista3d/scripts/trainer.py
+++ b/models/vista3d/scripts/trainer.py
@@ -82,8 +82,8 @@ class Vista3dTrainer(Trainer):
             more details: https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.zero_grad.html.
         to_kwargs: dict of other args for `prepare_batch` API when converting the input data, except for
             `device`, `non_blocking`.
-        amp_kwargs: dict of the args for `torch.cuda.amp.autocast()` API, for more details:
-            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
+        amp_kwargs: dict of the args for `torch.amp.autocast()` API, for more details:
+            https://pytorch.org/docs/stable/amp.html#torch.amp.autocast.
     """
 
     def __init__(
@@ -197,7 +197,7 @@ def _compute_pred_loss():
         engine.optimizer.zero_grad(set_to_none=engine.optim_set_to_none)
 
         if engine.amp and engine.scaler is not None:
-            with torch.cuda.amp.autocast(**engine.amp_kwargs):
+            with torch.amp.autocast("cuda", **engine.amp_kwargs):
                 _compute_pred_loss()
             engine.scaler.scale(engine.state.output[Keys.LOSS]).backward()
             engine.fire_event(IterationEvents.BACKWARD_COMPLETED)