Merge branch 'refs/heads/master' into submission-v4.1

anhappdev · anhappdev · commit d92f06975875 · 2024-09-11T21:47:10.000+07:00
diff --git a/flutter/assets/tasks.pbtxt b/flutter/assets/tasks.pbtxt
@@ -216,86 +216,12 @@ task {
   }
 }
 
-task {
-  id: "image_classification"
-  name: "Image Classification"
-  min_query_count: 1024
-  min_duration: 60
-  max_duration: 600
-  max_throughput: 1000
-  max_accuracy: 1.0
-  scenario: "SingleStream"
-  datasets {
-    type: IMAGENET
-    full {
-      name: "Imagenet classification validation set"
-      input_path: "local:///mlperf_datasets/imagenet/img"
-      groundtruth_path: "local:///mlperf_datasets/imagenet/imagenet_val_full.txt"
-    }
-    lite {
-      name: "Open images subset for classification"
-      input_path: "https://github.com/mlcommons/mobile_models/raw/main/v0_7/datasets/imagenet.zip"
-      groundtruth_path: ""
-    }
-    tiny {
-      name: "Imagenet dataset for integration test"
-      input_path: "https://github.com/mlcommons/mobile_models/raw/main/v0_7/datasets/imagenet_tiny.zip"
-      groundtruth_path: "https://github.com/mlcommons/mobile_models/raw/main/v3_0/assets/imagenet_tiny-groundtruth.txt"
-    }
-  }
-  model {
-    id: "mobilenetedgetpu_224_v1_0"
-    name: "MobileNetEdgeTPU 224 1.0"
-    offset: 1
-    image_width: 224
-    image_height: 224
-    num_classes: 1000
-  }
-}
-
-task {
-  id: "image_classification_offline"
-  name: "Image Classification (Offline)"
-  min_query_count: 24576
-  min_duration: 0
-  max_duration: 0
-  max_throughput: 2000
-  max_accuracy: 1.0
-  scenario: "Offline"
-  datasets {
-    type: IMAGENET
-    full {
-      name: "Imagenet classification validation set"
-      input_path: "local:///mlperf_datasets/imagenet/img"
-      groundtruth_path: "local:///mlperf_datasets/imagenet/imagenet_val_full.txt"
-    }
-    lite {
-      name: "Open images subset for classification"
-      input_path: "https://github.com/mlcommons/mobile_models/raw/main/v0_7/datasets/imagenet.zip"
-      groundtruth_path: ""
-    }
-    tiny {
-      name: "Imagenet dataset for integration test"
-      input_path: "https://github.com/mlcommons/mobile_models/raw/main/v0_7/datasets/imagenet_tiny.zip"
-      groundtruth_path: "https://github.com/mlcommons/mobile_models/raw/main/v3_0/assets/imagenet_tiny-groundtruth.txt"
-    }
-  }
-  model {
-    id: "mobilenetedgetpu_224_v1_0"
-    name: "MobileNetEdgeTPU 224 1.0"
-    offset: 1
-    image_width: 224
-    image_height: 224
-    num_classes: 1000
-  }
-}
-
 task {
   id: "stable_diffusion"
   name: "Stable Diffusion"
-  min_query_count: 5
+  min_query_count: 1024
   min_duration: 60
-  max_duration: 600
+  max_duration: 300
   max_throughput: 2000
   max_accuracy: 1.0
   scenario: "SingleStream"
diff --git a/flutter/cpp/datasets/coco_gen.cc b/flutter/cpp/datasets/coco_gen.cc
@@ -139,7 +139,7 @@ float CocoGen::ComputeAccuracy() {
     total_score += score;
   }
   float avg_score = total_score / total_samples;
-  return avg_score;
+  return avg_score / 100;
 }
 
 std::string CocoGen::ComputeAccuracyString() {
diff --git a/flutter/integration_test/expected_accuracy.dart b/flutter/integration_test/expected_accuracy.dart
@@ -16,17 +16,6 @@ key: <accelerator> OR <accelerator>|<backendName>
 - neuron > Android MediaTek
 */
 
-const Map<String, Interval> _imageClassification = {
-  'cpu': Interval(min: 1.00, max: 1.00),
-  'npu': Interval(min: 0.89, max: 0.91),
-  'tpu': Interval(min: 0.89, max: 0.91),
-  'ane': Interval(min: 1.00, max: 1.00),
-  'cpu&gpu&ane': Interval(min: 1.00, max: 1.00),
-  'snpe_dsp': Interval(min: 0.78, max: 0.82),
-  'psnpe_dsp': Interval(min: 0.78, max: 0.82),
-  'neuron-mdla': Interval(min: 0.88, max: 0.91),
-};
-
 const Map<String, Interval> _imageClassificationV2 = {
   'cpu': Interval(min: 0.88, max: 0.91),
   'npu': Interval(min: 0.88, max: 0.91),
@@ -100,13 +89,11 @@ const Map<String, Interval> _stableDiffusion = {
 };
 
 const benchmarkExpectedAccuracy = {
-  BenchmarkId.imageClassification: _imageClassification,
   BenchmarkId.imageClassificationV2: _imageClassificationV2,
   BenchmarkId.objectDetection: _objectDetection,
   BenchmarkId.imageSegmentationV2: _imageSegmentation,
   BenchmarkId.naturalLanguageProcessing: _naturalLanguageProcessing,
   BenchmarkId.superResolution: _superResolution,
   BenchmarkId.stableDiffusion: _stableDiffusion,
-  BenchmarkId.imageClassificationOffline: _imageClassification,
   BenchmarkId.imageClassificationOfflineV2: _imageClassificationV2,
 };
diff --git a/flutter/integration_test/expected_throughput.dart b/flutter/integration_test/expected_throughput.dart
@@ -29,30 +29,6 @@ const _kDN2103 = 'DN2103'; // OnePlus DN2103
 const _kIphoneOnGitHubAction = 'iPhone15,3';
 const _kIphoneOnMacbookM1 = 'iPhone14,7';
 
-const Map<String, Map<String, Interval>> _imageClassification = {
-  _kTFLiteBackend: {
-    _kCloudBuildX23: Interval(min: 4, max: 12),
-    _kCloudBuildX28: Interval(min: 4, max: 13),
-    _kRyzen5600: Interval(min: 31, max: 37),
-    _kPixel5: Interval(min: 80, max: 130),
-    _kPixel6: Interval(min: 600, max: 1100),
-    _kIphoneOnGitHubAction: Interval(min: 1, max: 8),
-    _kIphoneOnMacbookM1: Interval(min: 19, max: 27),
-  },
-  _kCoreMLBackend: {
-    _kIphoneOnGitHubAction: Interval(min: 1, max: 12),
-  },
-  _kPixelBackend: {
-    _kPixel6: Interval(min: 700, max: 1200),
-  },
-  _kQtiBackend: {
-    _kS22Ultra: Interval(min: 1600, max: 2400),
-  },
-  _kMediatekBackend: {
-    _kDN2103: Interval(min: 30, max: 55),
-  },
-};
-
 const Map<String, Map<String, Interval>> _imageClassificationV2 = {
   _kTFLiteBackend: {
     _kCloudBuildX23: Interval(min: 1, max: 9),
@@ -198,30 +174,6 @@ const Map<String, Map<String, Interval>> _stableDiffusion = {
   },
 };
 
-const Map<String, Map<String, Interval>> _imageClassificationOffline = {
-  _kTFLiteBackend: {
-    _kCloudBuildX23: Interval(min: 8, max: 14),
-    _kCloudBuildX28: Interval(min: 7, max: 16),
-    _kRyzen5600: Interval(min: 45, max: 60),
-    _kPixel5: Interval(min: 120, max: 190),
-    _kPixel6: Interval(min: 800, max: 1700),
-    _kIphoneOnGitHubAction: Interval(min: 2, max: 15),
-    _kIphoneOnMacbookM1: Interval(min: 30, max: 45),
-  },
-  _kCoreMLBackend: {
-    _kIphoneOnGitHubAction: Interval(min: 2, max: 20),
-  },
-  _kPixelBackend: {
-    _kPixel6: Interval(min: 900, max: 1800),
-  },
-  _kQtiBackend: {
-    _kS22Ultra: Interval(min: 2400, max: 3500),
-  },
-  _kMediatekBackend: {
-    _kDN2103: Interval(min: 75, max: 140),
-  },
-};
-
 const Map<String, Map<String, Interval>> _imageClassificationOfflineV2 = {
   _kTFLiteBackend: {
     _kCloudBuildX23: Interval(min: 1, max: 9),
@@ -247,13 +199,11 @@ const Map<String, Map<String, Interval>> _imageClassificationOfflineV2 = {
 };
 
 const benchmarkExpectedThroughput = {
-  BenchmarkId.imageClassification: _imageClassification,
   BenchmarkId.imageClassificationV2: _imageClassificationV2,
   BenchmarkId.objectDetection: _objectDetection,
   BenchmarkId.imageSegmentationV2: _imageSegmentationV2,
   BenchmarkId.naturalLanguageProcessing: _naturalLanguageProcessing,
   BenchmarkId.superResolution: _superResolution,
   BenchmarkId.stableDiffusion: _stableDiffusion,
-  BenchmarkId.imageClassificationOffline: _imageClassificationOffline,
   BenchmarkId.imageClassificationOfflineV2: _imageClassificationOfflineV2,
 };
diff --git a/flutter/lib/app_constants.dart b/flutter/lib/app_constants.dart
@@ -16,23 +16,19 @@ class WidgetKeys {
 }
 
 class BenchmarkId {
-  static const imageClassification = 'image_classification';
   static const objectDetection = 'object_detection';
   static const imageSegmentationV2 = 'image_segmentation_v2';
   static const naturalLanguageProcessing = 'natural_language_processing';
   static const superResolution = 'super_resolution';
-  static const imageClassificationOffline = 'image_classification_offline';
   static const imageClassificationV2 = 'image_classification_v2';
   static const imageClassificationOfflineV2 = 'image_classification_offline_v2';
   static const stableDiffusion = 'stable_diffusion';
 
   static const allIds = [
-    imageClassification,
     objectDetection,
     imageSegmentationV2,
     naturalLanguageProcessing,
     superResolution,
-    imageClassificationOffline,
     imageClassificationV2,
     imageClassificationOfflineV2,
     stableDiffusion,
diff --git a/flutter/lib/benchmark/info.dart b/flutter/lib/benchmark/info.dart
@@ -27,18 +27,6 @@ class BenchmarkInfo {
 
   BenchmarkLocalizationInfo getLocalizedInfo(AppLocalizations stringResources) {
     switch (task.id) {
-      case (BenchmarkId.imageClassification):
-        return BenchmarkLocalizationInfo(
-          name: stringResources.benchNameImageClassification,
-          detailsTitle: stringResources.benchInfoImageClassification,
-          detailsContent: stringResources.benchInfoImageClassificationDesc,
-        );
-      case (BenchmarkId.imageClassificationOffline):
-        return BenchmarkLocalizationInfo(
-          name: stringResources.benchNameImageClassificationOffline,
-          detailsTitle: stringResources.benchInfoImageClassification,
-          detailsContent: stringResources.benchInfoImageClassificationDesc,
-        );
       case (BenchmarkId.imageClassificationV2):
         return BenchmarkLocalizationInfo(
           name: stringResources.benchNameImageClassification,
diff --git a/flutter/lib/ui/icons.dart b/flutter/lib/ui/icons.dart
@@ -66,26 +66,21 @@ class AppIcons {
 
 class BenchmarkIcons {
   static final darkSet = {
-    BenchmarkId.imageClassification: AppIcons.imageClassification,
     BenchmarkId.imageClassificationV2: AppIcons.imageClassification,
     BenchmarkId.objectDetection: AppIcons.objectDetection,
     BenchmarkId.imageSegmentationV2: AppIcons.imageSegmentation,
     BenchmarkId.naturalLanguageProcessing: AppIcons.languageProcessing,
     BenchmarkId.superResolution: AppIcons.superResolution,
-    BenchmarkId.imageClassificationOffline: AppIcons.imageClassificationOffline,
     BenchmarkId.imageClassificationOfflineV2:
         AppIcons.imageClassificationOffline,
   };
 
   static final lightSet = {
-    BenchmarkId.imageClassification: AppIcons.imageClassificationWhite,
     BenchmarkId.imageClassificationV2: AppIcons.imageClassificationWhite,
     BenchmarkId.objectDetection: AppIcons.objectDetectionWhite,
     BenchmarkId.imageSegmentationV2: AppIcons.imageSegmentationWhite,
     BenchmarkId.naturalLanguageProcessing: AppIcons.languageProcessingWhite,
     BenchmarkId.superResolution: AppIcons.superResolutionWhite,
-    BenchmarkId.imageClassificationOffline:
-        AppIcons.imageClassificationOfflineWhite,
     BenchmarkId.imageClassificationOfflineV2:
         AppIcons.imageClassificationOfflineWhite,
   };
diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_invoker.cc
@@ -44,79 +44,31 @@ std::vector<float> StableDiffusionInvoker::encode_prompt(
 std::vector<float> StableDiffusionInvoker::diffusion_step(
     const std::vector<float>& latent, const std::vector<float>& t_emb,
     const std::vector<float>& context) {
-  // Prepare the first model's inputs
-
-  auto first_input_details =
-      TfLiteInterpreterGetInputTensor(backend_data_->first_interpreter, 0);
-  auto second_input_details =
-      TfLiteInterpreterGetInputTensor(backend_data_->first_interpreter, 1);
-  auto third_input_details =
-      TfLiteInterpreterGetInputTensor(backend_data_->first_interpreter, 2);
+  auto latent_input_details =
+      TfLiteInterpreterGetInputTensor(backend_data_->sd_interpreter, 0);
+  auto context_input_details =
+      TfLiteInterpreterGetInputTensor(backend_data_->sd_interpreter, 1);
+  auto time_stamp_embedding_input_details =
+      TfLiteInterpreterGetInputTensor(backend_data_->sd_interpreter, 2);
 
   std::copy(context.begin(), context.end(),
-            reinterpret_cast<float*>(TfLiteTensorData(first_input_details)));
+            reinterpret_cast<float*>(TfLiteTensorData(context_input_details)));
   std::copy(t_emb.begin(), t_emb.end(),
-            reinterpret_cast<float*>(TfLiteTensorData(second_input_details)));
+            reinterpret_cast<float*>(
+                TfLiteTensorData(time_stamp_embedding_input_details)));
   std::copy(latent.begin(), latent.end(),
-            reinterpret_cast<float*>(TfLiteTensorData(third_input_details)));
+            reinterpret_cast<float*>(TfLiteTensorData(latent_input_details)));
 
-  // Invoke the first model
-  if (TfLiteInterpreterInvoke(backend_data_->first_interpreter) != kTfLiteOk) {
+  // Invoke the model
+  if (TfLiteInterpreterInvoke(backend_data_->sd_interpreter) != kTfLiteOk) {
     std::cerr << "Failed to invoke the first diffusion model!" << std::endl;
     exit(-1);
   }
 
-  // Output names from the first model and corresponding input names for the
-  // second model
-  std::vector<std::string> output_names = {
-      "Identity_6",  "Identity_4", "Identity",    "input_1",    "Identity_12",
-      "Identity_11", "Identity_3", "Identity_10", "Identity_9", "Identity_5",
-      "Identity_8",  "Identity_7", "Identity_2"};
-
-  std::vector<std::string> input_names = {
-      "args_0",    "args_0_1",  "args_0_2", "args_0_4", "args_0_3",
-      "args_0_5",  "args_0_6",  "args_0_7", "args_0_8", "args_0_9",
-      "args_0_10", "args_0_11", "args_0_12"};
-
-  // Copy outputs of the first model to the inputs of the second model based on
-  // names
-  for (size_t i = 0; i < input_names.size(); ++i) {
-    int input_index = get_tensor_index_by_name(
-        backend_data_->second_interpreter, input_names[i], true);
-    int output_index = get_tensor_index_by_name(
-        backend_data_->first_interpreter, output_names[i], false);
-
-    if (input_index == -1 || output_index == -1) {
-      std::cerr << "Failed to find matching input or output tensor by name!"
-                << std::endl;
-      exit(-1);
-    }
-
-    auto first_model_output_details = TfLiteInterpreterGetOutputTensor(
-        backend_data_->first_interpreter, output_index);
-
-    float* output_data =
-        reinterpret_cast<float*>(TfLiteTensorData(first_model_output_details));
-    int output_size =
-        TfLiteTensorByteSize(first_model_output_details) / sizeof(float);
-
-    float* input_data = reinterpret_cast<float*>(
-        TfLiteTensorData(TfLiteInterpreterGetInputTensor(
-            backend_data_->second_interpreter, input_index)));
-
-    std::copy(output_data, output_data + output_size, input_data);
-  }
-
-  // Invoke the second model
-  if (TfLiteInterpreterInvoke(backend_data_->second_interpreter) != kTfLiteOk) {
-    std::cerr << "Failed to invoke the second diffusion model!" << std::endl;
-    exit(-1);
-  }
-
   float* output = reinterpret_cast<float*>(TfLiteTensorData(
-      TfLiteInterpreterGetOutputTensor(backend_data_->second_interpreter, 0)));
+      TfLiteInterpreterGetOutputTensor(backend_data_->sd_interpreter, 0)));
   int output_size = TfLiteTensorByteSize(TfLiteInterpreterGetOutputTensor(
-                        backend_data_->second_interpreter, 0)) /
+                        backend_data_->sd_interpreter, 0)) /
                     sizeof(float);
   return std::vector<float>(output, output + output_size);
 }
@@ -201,9 +153,9 @@ std::vector<float> StableDiffusionInvoker::run_inference(
 
   // Access the input tensors
   void* pos_ids_input_data =
-      TfLiteTensorData(TfLiteInterpreterGetInputTensor(interpreter, 0));
-  void* encoded_input_data =
       TfLiteTensorData(TfLiteInterpreterGetInputTensor(interpreter, 1));
+  void* encoded_input_data =
+      TfLiteTensorData(TfLiteInterpreterGetInputTensor(interpreter, 0));
 
   // Copy data to input tensors (type cast required for correct copy operation)
   std::memcpy(pos_ids_input_data, pos_ids.data(), pos_ids.size() * sizeof(int));
diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.cc b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.cc
diff --git a/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h b/mobile_back_tflite/cpp/backend_tflite/stable_diffusion_pipeline.h

Original file line number	Diff line number	Diff line change
`@@ -139,7 +139,7 @@ float CocoGen::ComputeAccuracy() {`
`139`	`139`	`total_score += score;`
`140`	`140`	`}`
`141`	`141`	`float avg_score = total_score / total_samples;`
`142`		`- return avg_score;`
	`142`	`+ return avg_score / 100;`
`143`	`143`	`}`
`144`	`144`
`145`	`145`	`std::string CocoGen::ComputeAccuracyString() {`