Project-MONAI
diff --git a/‎ci/unit_tests/test_maisi_ct_generative.py‎
Lines changed: 1 addition & 12 deletions b/‎ci/unit_tests/test_maisi_ct_generative.py‎
Lines changed: 1 addition & 12 deletions
diff --git a/‎models/maisi_ct_generative/configs/inference.json‎
Lines changed: 27 additions & 21 deletions b/‎models/maisi_ct_generative/configs/inference.json‎
Lines changed: 27 additions & 21 deletions
diff --git a/‎models/maisi_ct_generative/configs/metadata.json‎
Lines changed: 2 additions & 1 deletion b/‎models/maisi_ct_generative/configs/metadata.json‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎models/maisi_ct_generative/docs/README.md‎
Lines changed: 5 additions & 3 deletions b/‎models/maisi_ct_generative/docs/README.md‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎models/maisi_ct_generative/large_files.yml‎
Lines changed: 9 additions & 1 deletion b/‎models/maisi_ct_generative/large_files.yml‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎models/maisi_ct_generative/scripts/augmentation.py‎
Lines changed: 20 additions & 13 deletions b/‎models/maisi_ct_generative/scripts/augmentation.py‎
Lines changed: 20 additions & 13 deletions
@@ -85,17 +85,6 @@
     }
 ]
 
-TEST_CASE_INFER_ERROR = [
-    {
-        "bundle_root": "models/maisi_ct_generative",
-        "num_output_samples": 1,
-        "output_size": [256, 256, 256],
-        "body_region": ["head"],
-        "anatomy_list": ["colon cancer primaries"],
-    },
-    "Cannot find body region with given anatomy list.",
-]
-
 TEST_CASE_INFER_ERROR_2 = [
     {
         "bundle_root": "models/maisi_ct_generative",
@@ -277,7 +266,7 @@ def test_infer_config(self, override):
             else:
                 self.assertTrue(output_file.endswith(".nii.gz"))
 
-    @parameterized.expand([TEST_CASE_INFER_ERROR, TEST_CASE_INFER_ERROR_7])
+    @parameterized.expand([TEST_CASE_INFER_ERROR_7])
     def test_infer_config_error_input(self, override, expected_error):
         # update override
         override["output_dir"] = self.output_dir
 
@@ -9,9 +9,9 @@
     "output_dir": "$@bundle_root + '/output'",
     "create_output_dir": "$Path(@output_dir).mkdir(exist_ok=True)",
     "device": "$torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')",
-    "trained_autoencoder_path": "$@model_dir + '/autoencoder_epoch273.pt'",
-    "trained_diffusion_path": "$@model_dir + '/input_unet3d_data-all_steps1000size512ddpm_random_current_inputx_v1.pt'",
-    "trained_controlnet_path": "$@model_dir + '/controlnet-20datasets-e20wl100fold0bc_noi_dia_fsize_current.pt'",
+    "trained_autoencoder_path": "$@model_dir + '/autoencoder.pt'",
+    "trained_diffusion_path": "$@model_dir + '/diffusion_unet.pt'",
+    "trained_controlnet_path": "$@model_dir + '/controlnet.pt'",
     "trained_mask_generation_autoencoder_path": "$@model_dir + '/mask_generation_autoencoder.pt'",
     "trained_mask_generation_diffusion_path": "$@model_dir + '/mask_generation_diffusion_unet.pt'",
     "all_mask_files_base_dir": "$@bundle_root + '/datasets/all_masks_flexible_size_and_spacing_3000'",
@@ -21,14 +21,13 @@
     "label_dict_remap_json": "$@bundle_root + '/configs/label_dict_124_to_132.json'",
     "real_img_median_statistics_file": "$@bundle_root + '/configs/image_median_statistics.json'",
     "num_output_samples": 1,
-    "body_region": [
-        "abdomen"
-    ],
+    "body_region": [],
     "anatomy_list": [
         "liver"
     ],
+    "modality": "ct",
     "controllable_anatomy_size": [],
-    "num_inference_steps": 1000,
+    "num_inference_steps": 30,
     "mask_generation_num_inference_steps": 1000,
     "random_seed": null,
     "spatial_dims": 3,
@@ -63,11 +62,11 @@
         64
     ],
     "autoencoder_sliding_window_infer_size": [
-        96,
-        96,
-        96
+        80,
+        80,
+        80
     ],
-    "autoencoder_sliding_window_infer_overlap": 0.6667,
+    "autoencoder_sliding_window_infer_overlap": 0.4,
     "autoencoder_def": {
         "_target_": "monai.apps.generation.maisi.networks.autoencoderkl_maisi.AutoencoderKlMaisi",
         "spatial_dims": "@spatial_dims",
@@ -96,7 +95,7 @@
         "use_checkpointing": false,
         "use_convtranspose": false,
         "norm_float16": true,
-        "num_splits": 8,
+        "num_splits": 2,
         "dim_split": 1
     },
     "diffusion_unet_def": {
@@ -124,9 +123,12 @@
         ],
         "num_res_blocks": 2,
         "use_flash_attention": true,
-        "include_top_region_index_input": true,
-        "include_bottom_region_index_input": true,
-        "include_spacing_input": true
+        "include_top_region_index_input": false,
+        "include_bottom_region_index_input": false,
+        "include_spacing_input": true,
+        "num_class_embeds": 128,
+        "resblock_updown": true,
+        "include_fc": true
     },
     "controlnet_def": {
         "_target_": "monai.apps.generation.maisi.networks.controlnet_maisi.ControlNetMaisi",
@@ -157,7 +159,10 @@
             8,
             32,
             64
-        ]
+        ],
+        "num_class_embeds": 128,
+        "resblock_updown": true,
+        "include_fc": true
     },
     "mask_generation_autoencoder_def": {
         "_target_": "monai.apps.generation.maisi.networks.autoencoderkl_maisi.AutoencoderKlMaisi",
@@ -239,12 +244,11 @@
     "load_mask_generation_diffusion": "$@mask_generation_diffusion_unet.load_state_dict(@checkpoint_mask_generation_diffusion_unet['unet_state_dict'], strict=True)",
     "mask_generation_scale_factor": "$@checkpoint_mask_generation_diffusion_unet['scale_factor']",
     "noise_scheduler": {
-        "_target_": "monai.networks.schedulers.ddpm.DDPMScheduler",
+        "_target_": "scripts.rectified_flow.RFlowScheduler",
         "num_train_timesteps": 1000,
-        "beta_start": 0.0015,
-        "beta_end": 0.0195,
-        "schedule": "scaled_linear_beta",
-        "clip_sample": false
+        "use_discrete_timesteps": false,
+        "use_timestep_transform": true,
+        "sample_method": "uniform"
     },
     "mask_generation_noise_scheduler": {
         "_target_": "monai.networks.schedulers.ddpm.DDPMScheduler",
@@ -269,6 +273,7 @@
         ],
         "body_region": "@body_region",
         "anatomy_list": "@anatomy_list",
+        "modality": "@modality",
         "all_mask_files_json": "@all_mask_files_json",
         "all_anatomy_size_condtions_json": "@all_anatomy_size_condtions_json",
         "all_mask_files_base_dir": "@all_mask_files_base_dir",
@@ -300,6 +305,7 @@
         "autoencoder_sliding_window_infer_overlap": "@autoencoder_sliding_window_infer_overlap"
     },
     "run": [
+        "$monai.utils.set_determinism(seed=@random_seed)",
         "$@ldm_sampler.sample_multiple_images(@num_output_samples)"
     ],
     "evaluator": null
 
@@ -1,7 +1,8 @@
 {
     "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_generator_ldm_20240318.json",
-    "version": "0.4.6",
+    "version": "1.0.0",
     "changelog": {
+        "1.0.0": "accelerated maisi, inference only, is not compartible with previous maisi diffusion model weights",
         "0.4.6": "add TensorRT support",
         "0.4.5": "update README",
         "0.4.4": "update issue for IgniteInfo",
 
@@ -4,7 +4,7 @@ This bundle is for Nvidia MAISI (Medical AI for Synthetic Imaging), a 3D Latent
 The inference workflow of MAISI is depicted in the figure below. It first generates latent features from random noise by applying multiple denoising steps using the trained diffusion model. Then it decodes the denoised latent features into images using the trained autoencoder.
 
 <p align="center">
-  <img src="https://developer.download.nvidia.com/assets/Clara/Images/monai_maisi_ct_generative_workflow.png" alt="MAISI inference scheme">
+  <img src="https://developer.download.nvidia.com/assets/Clara/Images/maisi_workflow_1.0.1.png" alt="MAISI inference scheme">
 </p>
 
 MAISI is based on the following papers:
@@ -13,6 +13,8 @@ MAISI is based on the following papers:
 
 [**ControlNet:**  Lvmin Zhang, Anyi Rao, Maneesh Agrawala; “Adding Conditional Control to Text-to-Image Diffusion Models.” ICCV 2023.](https://openaccess.thecvf.com/content/ICCV2023/papers/Zhang_Adding_Conditional_Control_to_Text-to-Image_Diffusion_Models_ICCV_2023_paper.pdf)
 
+[**Rectified Flow:** Liu, Xingchao, and Chengyue Gong. "Flow Straight and Fast: Learning to Generate and Transfer Data with Rectified Flow." ICLR 2023.](https://arxiv.org/pdf/2209.03003)
+
 #### Example synthetic image
 An example result from inference is shown below:
 ![Example synthetic image](https://developer.download.nvidia.com/assets/Clara/Images/monai_maisi_ct_generative_example_synthetic_data.png)
@@ -27,11 +29,11 @@ The information for the inference input, like body region and anatomy to generat
 
 - `"num_output_samples"`: int, the number of output image/mask pairs it will generate.
 - `"spacing"`: voxel size of generated images. E.g., if set to `[1.5, 1.5, 2.0]`, it will generate images with a resolution of 1.5&times;1.5&times;2.0 mm. The spacing for x and y axes has to be between 0.5 and 3.0 mm and the spacing for the z axis has to be between 0.5 and 5.0 mm.
-- `"output_size"`: volume size of generated images. E.g., if set to `[512, 512, 256]`, it will generate images with size of 512&times;512&times;256. They need to be divisible by 16. If you have a small GPU memory size, you should adjust it to small numbers. Note that `"spacing"` and `"output_size"` together decide the output field of view (FOV). For eample, if set them to `[1.5, 1.5, 2.0]`mm and `[512, 512, 256]`, the FOV is 768&times;768&times;512 mm. We recommend output_size is the FOV in x and y axis are same and to be at least 256mm for head, and at least 384mm for other body regions like abdomen. The output size for the x and y axes can be selected from [256, 384, 512], while for the z axis, it can be chosen from [128, 256, 384, 512, 640, 768].
+- `"output_size"`: volume size of generated images. E.g., if set to `[512, 512, 256]`, it will generate images with size of 512&times;512&times;256. They need to be divisible by 16. If you have a small GPU memory size, you should adjust it to small numbers. Note that `"spacing"` and `"output_size"` together decide the output field of view (FOV). For eample, if set them to `[1.5, 1.5, 2.0]`mm and `[512, 512, 256]`, the FOV is 768&times;768&times;512 mm. We recommend output_size is the FOV in x and y axis are same and to be at least 256mm for head, at least 384mm for other body regions like abdomen, and no larger than 640mm. The output size for the x and y axes can be selected from [256, 384, 512], while for the z axis, it can be chosen from [128, 256, 384, 512, 640, 768].
 - `"controllable_anatomy_size"`: a list of controllable anatomy and its size scale (0--1). E.g., if set to `[["liver", 0.5],["hepatic tumor", 0.3]]`, the generated image will contain liver that have a median size, with size around 50% percentile, and hepatic tumor that is relatively small, with around 30% percentile. In addition, if the size scale is set to -1, it indicates that the organ does not exist or should be removed. The output will contain paired image and segmentation mask for the controllable anatomy.
 The following organs support generation with a controllable size: ``["liver", "gallbladder", "stomach", "pancreas", "colon", "lung tumor", "bone lesion", "hepatic tumor", "colon cancer primaries", "pancreatic tumor"]``.
 The raw output of the current mask generation model has a fixed size of $256^3$ voxels with a spacing of $1.5^3$ mm. If the "output_size" differs from this default, the generated masks will be resampled to the desired `"output_size"` and `"spacing"`. Note that resampling may degrade the quality of the generated masks and could trigger multiple inference attempts if the images fail to pass the [image quality check](../scripts/quality_check.py).
-- `"body_region"`: If "controllable_anatomy_size" is not specified, "body_region" will be used to constrain the region of generated images. It needs to be chosen from "head", "chest", "thorax", "abdomen", "pelvis", "lower".
+- `"body_region"`: Deprecated, please leave it as empty `"[]"`.
 - `"anatomy_list"`: If "controllable_anatomy_size" is not specified, the output will contain paired image and segmentation mask for the anatomy in "./configs/label_dict.json".
 - `"autoencoder_sliding_window_infer_size"`: in order to save GPU memory, we use sliding window inference when decoding latents to image when `"output_size"` is large. This is the patch size of the sliding window. Small value will reduce GPU memory but increase time cost. They need to be divisible by 16.
 - `"autoencoder_sliding_window_infer_overlap"`: float between 0 and 1. Large value will reduce the stitching artifacts when stitching patches during sliding window inference, but increase time cost. If you do not observe seam lines in the generated image result, you can use a smaller value to save inference time.
 
@@ -1,5 +1,5 @@
 large_files:
-- path: "models/autoencoder_epoch273.pt"
+- path: "models/autoencoder.pt"
   url: "https://developer.download.nvidia.com/assets/Clara/monai/tutorials/model_zoo/model_maisi_autoencoder_epoch273_alternative.pt"
   hash_val: "917cfb1e49631c8a713e3bb7c758fbca"
   hash_type: "md5"
@@ -11,6 +11,14 @@ large_files:
   url: "https://developer.download.nvidia.com/assets/Clara/monai/tutorials/model_zoo/model_maisi_controlnet-20datasets-e20wl100fold0bc_noi_dia_fsize_current_alternative.pt"
   hash_val: "6c36572335372f405a0e85c760fa6dee"
   hash_type: "md5"
+- path: "models/diffusion_unet.pt"
+  url: "https://developer.download.nvidia.com/assets/Clara/monai/tutorials/diff_unet_ckpt_rflow_epoch19350.pt"
+  hash_val: "10501d59a3066802087c82ebd7a71719"
+  hash_type: "md5"
+- path: "models/controlnet.pt"
+  url: "https://developer.download.nvidia.com/assets/Clara/monai/tutorials/controlnet_rflow_epoch208.pt"
+  hash_val: "49933da32826c0f7ca17016ccd13e23b"
+  hash_type: "md5"
 - path: "models/mask_generation_autoencoder.pt"
   url: "https://developer.download.nvidia.com/assets/Clara/monai/tutorials/mask_generation_autoencoder.pt"
   hash_val: "b177778820f412abc9218cdb7ce3b653"
 
@@ -60,7 +60,7 @@ def dilate3d(input_tensor, erosion=3):
     return output.squeeze(0).squeeze(0)
 
 
-def augmentation_tumor_bone(pt_nda, output_size):
+def augmentation_tumor_bone(pt_nda, output_size, random_seed):
     volume = pt_nda.squeeze(0)
     real_l_volume_ = torch.zeros_like(volume)
     real_l_volume_[volume == 128] = 1
@@ -74,6 +74,7 @@ def augmentation_tumor_bone(pt_nda, output_size):
         scale_range=(0.15, 0.15, 0),
         padding_mode="zeros",
     )
+    elastic.set_random_state(seed=random_seed)
 
     tumor_szie = torch.sum((real_l_volume_ > 0).float())
     ###########################
@@ -112,7 +113,7 @@ def augmentation_tumor_bone(pt_nda, output_size):
     return pt_nda
 
 
-def augmentation_tumor_liver(pt_nda, output_size):
+def augmentation_tumor_liver(pt_nda, output_size, random_seed):
     volume = pt_nda.squeeze(0)
     real_l_volume_ = torch.zeros_like(volume)
     real_l_volume_[volume == 1] = 1
@@ -129,6 +130,7 @@ def augmentation_tumor_liver(pt_nda, output_size):
         scale_range=(0.2, 0.2, 0.2),
         padding_mode="zeros",
     )
+    elastic.set_random_state(seed=random_seed)
 
     tumor_szie = torch.sum(real_l_volume_ == 2)
     ###########################
@@ -161,7 +163,7 @@ def augmentation_tumor_liver(pt_nda, output_size):
     return pt_nda
 
 
-def augmentation_tumor_lung(pt_nda, output_size):
+def augmentation_tumor_lung(pt_nda, output_size, random_seed):
     volume = pt_nda.squeeze(0)
     real_l_volume_ = torch.zeros_like(volume)
     real_l_volume_[volume == 23] = 1
@@ -177,6 +179,7 @@ def augmentation_tumor_lung(pt_nda, output_size):
         scale_range=(0.15, 0.15, 0.15),
         padding_mode="zeros",
     )
+    elastic.set_random_state(seed=random_seed)
 
     tumor_szie = torch.sum(real_l_volume_)
     # before move lung tumor maks, full the original location by lung labels
@@ -224,7 +227,7 @@ def augmentation_tumor_lung(pt_nda, output_size):
     return pt_nda
 
 
-def augmentation_tumor_pancreas(pt_nda, output_size):
+def augmentation_tumor_pancreas(pt_nda, output_size, random_seed):
     volume = pt_nda.squeeze(0)
     real_l_volume_ = torch.zeros_like(volume)
     real_l_volume_[volume == 4] = 1
@@ -241,6 +244,7 @@ def augmentation_tumor_pancreas(pt_nda, output_size):
         scale_range=(0.1, 0.1, 0.1),
         padding_mode="zeros",
     )
+    elastic.set_random_state(seed=random_seed)
 
     tumor_szie = torch.sum(real_l_volume_ == 2)
     ###########################
@@ -273,7 +277,7 @@ def augmentation_tumor_pancreas(pt_nda, output_size):
     return pt_nda
 
 
-def augmentation_tumor_colon(pt_nda, output_size):
+def augmentation_tumor_colon(pt_nda, output_size, random_seed):
     volume = pt_nda.squeeze(0)
     real_l_volume_ = torch.zeros_like(volume)
     real_l_volume_[volume == 27] = 1
@@ -289,6 +293,7 @@ def augmentation_tumor_colon(pt_nda, output_size):
         scale_range=(0.1, 0.1, 0.1),
         padding_mode="zeros",
     )
+    elastic.set_random_state(seed=random_seed)
 
     tumor_szie = torch.sum(real_l_volume_)
     ###########################
@@ -330,37 +335,39 @@ def augmentation_tumor_colon(pt_nda, output_size):
     return pt_nda
 
 
-def augmentation_body(pt_nda):
+def augmentation_body(pt_nda, random_seed):
     volume = pt_nda.squeeze(0)
 
     zoom = RandZoom(min_zoom=0.99, max_zoom=1.01, mode="nearest", align_corners=None, prob=1.0)
+    zoom.set_random_state(seed=random_seed)
+
     volume = zoom(volume)
 
     pt_nda = volume.unsqueeze(0)
     return pt_nda
 
 
-def augmentation(pt_nda, output_size):
+def augmentation(pt_nda, output_size, random_seed):
     label_list = torch.unique(pt_nda)
     label_list = list(label_list.cpu().numpy())
 
     if 128 in label_list:
         print("augmenting bone lesion/tumor")
-        pt_nda = augmentation_tumor_bone(pt_nda, output_size)
+        pt_nda = augmentation_tumor_bone(pt_nda, output_size, random_seed)
     elif 26 in label_list:
         print("augmenting liver tumor")
-        pt_nda = augmentation_tumor_liver(pt_nda, output_size)
+        pt_nda = augmentation_tumor_liver(pt_nda, output_size, random_seed)
     elif 23 in label_list:
         print("augmenting lung tumor")
-        pt_nda = augmentation_tumor_lung(pt_nda, output_size)
+        pt_nda = augmentation_tumor_lung(pt_nda, output_size, random_seed)
     elif 24 in label_list:
         print("augmenting pancreas tumor")
-        pt_nda = augmentation_tumor_pancreas(pt_nda, output_size)
+        pt_nda = augmentation_tumor_pancreas(pt_nda, output_size, random_seed)
     elif 27 in label_list:
         print("augmenting colon tumor")
-        pt_nda = augmentation_tumor_colon(pt_nda, output_size)
+        pt_nda = augmentation_tumor_colon(pt_nda, output_size, random_seed)
     else:
         print("augmenting body")
-        pt_nda = augmentation_body(pt_nda)
+        pt_nda = augmentation_body(pt_nda, random_seed)
 
     return pt_nda