Skip to content

Commit aab2697

Browse files
authored
feat: add icon and description for Stable Diffusion benchmark (#917)
* Add icon for stable_diffusion task * Add description for stable_diffusion task * Sort the order of task based on BenchmarkId.allIds * Fix ios-build-test.yml * Fix ios-build-test.yml
1 parent f74b27c commit aab2697

File tree

9 files changed

+47
-11
lines changed

9 files changed

+47
-11
lines changed

.github/workflows/ios-build-test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ jobs:
1010
build:
1111
name: Build and test iOS app
1212
# https://github.com/actions/runner-images/blob/main/images/macos/macos-12-Readme.md
13-
runs-on: macos-12
13+
runs-on: macos-13
1414
timeout-minutes: 120
1515
env:
1616
PERF_TEST: true
Lines changed: 12 additions & 0 deletions
Loading
Lines changed: 12 additions & 0 deletions
Loading

flutter/integration_test/expected_throughput.dart

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ const _kS22Ultra = 'SM-S908U1'; // Galaxy S22 Ultra
2626
const _kDN2103 = 'DN2103'; // OnePlus DN2103
2727

2828
// iOS
29-
const _kIphoneOnGitHubAction = 'iPhone15,3';
29+
const _kIphoneOnGitHubAction = 'iPhone16,2';
3030
const _kIphoneOnMacbookM1 = 'iPhone14,7';
3131

3232
const Map<String, Map<String, Interval>> _imageClassificationV2 = {

flutter/lib/app_constants.dart

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,15 @@ class BenchmarkId {
2424
static const imageClassificationOfflineV2 = 'image_classification_offline_v2';
2525
static const stableDiffusion = 'stable_diffusion';
2626

27+
// The sort order of this list will be used in the UI
2728
static const allIds = [
29+
imageClassificationV2,
2830
objectDetection,
2931
imageSegmentationV2,
3032
naturalLanguageProcessing,
3133
superResolution,
32-
imageClassificationV2,
33-
imageClassificationOfflineV2,
3434
stableDiffusion,
35+
imageClassificationOfflineV2,
3536
];
3637
}
3738

flutter/lib/benchmark/benchmark.dart

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,11 @@ class BenchmarkStore {
130130
required List<pb.BenchmarkSetting> backendConfig,
131131
required Map<String, bool> taskSelection,
132132
}) {
133-
for (final task in appConfig.task) {
133+
// sort the order of task based on BenchmarkId.allIds
134+
final List<pb.TaskConfig> sortedTasks = List.from(appConfig.task)
135+
..sort((a, b) =>
136+
BenchmarkId.allIds.indexOf(a.id) - BenchmarkId.allIds.indexOf(b.id));
137+
for (final task in sortedTasks) {
134138
final backendSettings = backendConfig
135139
.singleWhereOrNull((setting) => setting.benchmarkId == task.id);
136140
if (backendSettings == null) {

flutter/lib/benchmark/info.dart

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,12 @@ class BenchmarkInfo {
6363
detailsTitle: stringResources.benchInfoSuperResolution,
6464
detailsContent: stringResources.benchInfoSuperResolutionDesc,
6565
);
66+
case (BenchmarkId.stableDiffusion):
67+
return BenchmarkLocalizationInfo(
68+
name: stringResources.benchNameStableDiffusion,
69+
detailsTitle: stringResources.benchInfoStableDiffusion,
70+
detailsContent: stringResources.benchInfoStableDiffusionDesc,
71+
);
6672
default:
6773
throw 'unhandled task id: ${task.id}';
6874
}

flutter/lib/l10n/app_en.arb

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,17 +102,20 @@
102102
"benchNameLanguageProcessing": "Language Processing",
103103
"benchNameImageClassificationOffline": "Image Classification (offline)",
104104
"benchNameSuperResolution": "Super Resolution",
105+
"benchNameStableDiffusion": "Stable Diffusion",
105106
"benchInfoImageClassification": "Image Classification",
106107
"benchInfoObjectDetection": "Object detection",
107108
"benchInfoImageSegmentation": "Image Segmentation",
108109
"benchInfoLanguageProcessing": "Language Processing",
109110
"benchInfoSuperResolution": "Super Resolution",
111+
"benchInfoStableDiffusion": "Stable Diffusion",
110112
"benchInfoImageClassificationDesc": "Image classification picks the best label to describe an input image and is commonly used for photo search and text extraction. The MobileNetEdgeTPU reference model is evaluated on the ImageNet 2012 validation dataset and requires a minimum accuracy of 74.66% (98% of FP32 accuracy of 76.19%) Top-1 accuracy (For Performance measurements, App uses a different dataset).\n\nThe MobileNetEdgeTPU network is a descendent of the MobileNet-v2 family that is optimized for low-latency and mobile accelerators. The MobileNetEdgeTPU model architecture is based on convolutional layers with inverted residuals and linear bottlenecks, similar to MobileNet v2, but is optimized by introducing fused inverted bottleneck convolutions to improve hardware utilization, and removing hard-swish and squeeze-and-excite blocks.\n\nThe offline variant of image classification has no latency constraints and typically uses batched inference and has higher throughput.",
111113
"benchInfoImageClassificationV2Desc": "Image classification picks the best label to describe an input image and is commonly used for photo search and text extraction.\n\nThe MobileNetV4-Conv-L model boasts an impressive 83% accuracy with the ImageNet dataset, versus 76% accuracy for the prior standard, MobileNetEdgeTPU. MobileNetV4-Conv-L is designed to perform well across a range of mobile processor types, from CPUs and GPUs to neural accelerators. The MLPerf Mobile working group worked closely with the MobileNetV4 team in order to ensure optimized performance. This combination of an improved model architecture and collaborative optimization has proven quite potent. Although MobileNetV4-Conv-L executes six times the number of mathematical operations of its predecessor, MobileNetEdgeTPU, benchmark execution times have only increased by a factor of roughly 4.6.\n\nThe offline variant of image classification has no latency constraints and typically uses batched inference and has higher throughput.",
112114
"benchInfoObjectDetectionDesc": "Object detection draws bounding boxes around recognized objects in an input image, assigning each one a label. This is a common approach for identifying objects in photos, and automotive safety. Since v1.0, our reference model has been updated to MobileDets (from v0.7 model, Single Shot Detector with a MobileNet-v2 feature extractor operating). MobileDets are trained on the COCO 2017 validation dataset. The MobileDets Object Detection task is evaluated on the COCO 2017 dataset with an input image resolution of 320x320. It requires a minimum mean Average Precision (mAP) of 27.075 (95% of FP32 mAP of 28.5%), which is significantly higher than that of the previous model.\n\nMobileDets are searched for object detection. A key feature of MobileDets is that the search space includes both inverted bottleneck blocks and regular convolution operations to help improve the accuracy-latency trade-off on several hardware accelerators.",
113115
"benchInfoImageSegmentationDesc": "Semantic image segmentation partitions an input image into labeled objects at pixel granularity, and is used for complex image manipulation such as red-eye reduction as well as automotive and medical applications. The reference model is the MOSAIC network paired with a tailored feature extraction backbone. It operates on 512x512 resolution input images from the ADE20K validation set and requires a minimum mean Intersection Over Union (mIoU) value of 57.36% (96% of FP32 mIoU of 59.75%), significantly higher than the previous segmentation model (MobileNetv2-Deeplabv3+).\n\nMOSAIC employs a simple asymmetric encoder-decoder structure which consists of an efficient multi-scale context encoder and a light-weight hybrid decoder to recover spatial details from aggregated information with multiple lateral connections between the two. The feature extractor is a variant of MobileNet Multi-Hardware, which is a network built and optimized with neural architecture search. It is further enhanced for image segmentation by reducing the output stride, adding dilated convolutions at the end stage, and halving the feature channels.",
114116
"benchInfoLanguageProcessingDesc": "Question Answering finds the best answer to an input question based on a body of text, and is commonly employed in applications such as virtual assistants and chatbots. The reference model, MobileBERT, is evaluated on the Stanford Question Answering Dataset (SQUAD) v1.1 Dev-mini. The task requires a minimum F1-score of 87.4% (93% of FP32 F1-score of 93.08%).\n\nMobileBERT is a streamlined, mobile-optimized version of the larger BERT_LARGE network. It features bottleneck structures and a carefully designed balance between self-attention and feed-forward networks. While BERT is task-agnostic and can be applied to various downstream natural language processing tasks, the MobileBERT variant used in MLPerf is specifically fine-tuned for question answering.",
115117
"benchInfoSuperResolutionDesc": "Image Super Resolution (SR) upscales a lower resolution input into a higher resolution output image, enhancing the quality and detail. It is a common task in many mobile applications such as digital zoom. The reference model, EDSR F32B5, is a lightweight member of the Enhanced Deep Super Resolution (EDSR) family that is trained for 2X super resolution on the DIV2K dataset with bicubic downsampling and tested on the OpenSR test-set which comprises 25 selected 1920x1080 HDR images. The benchmark requires a minimum accuracy of 33 dB Peak Signal to Noise Ratio (PSNR) relative to a 33.58 dB accuracy with FP32.\n\nThe EDSR family of models demonstrated excellent performance by winning a super resolution challenge at CVPR 2017. The EDSR F32B5 reference model features five EDSR blocks, each with 32 feature maps. The EDSR block is a simple residual block consisting of a residual connection on one branch and a convolution-ReLU-convolution on the other branch. The final upsampling layer is a depth-to-space operator, which facilitates the x2 super resolution process.",
118+
"benchInfoStableDiffusionDesc": "The Text to Image Gen AI benchmark adopts Stable Diffusion v1.5 for generating images from text prompts. It is a latent diffusion model. The benchmarked Stable Diffusion v1.5 refers to a specific configuration of the model architecture that uses a downsampling-factor 8 autoencoder with an 860M UNet,123M CLIP ViT-L/14 text encoder for the diffusion model, and VAE Decoder of 49.5M parameters. The model was trained on 595k steps at resolution of 512x512, which enables it to generate high quality images. We refer you to https://huggingface.co/benjamin-paine/stable-diffusion-v1-5 for more information. The benchmark runs 20 denoising steps for inference, and uses a precalculated time embedding of size 1x1280. Reference models can be found here https://github.com/mlcommons/mobile_open/releases.\n\nFor latency benchmarking, we benchmark end to end, excluding the time embedding calculation and the tokenizer. For accuracy calculations, the app adopts the CLIP metric for text-to-image consistency, and further evaluation of the generated images using this Image Quality Aesthetic Assessment metric https://github.com/idealo/image-quality-assessment/tree/master?tab=readme-ov-file",
116119

117120
"resourceErrorMessage": "Some resources failed to load.\nIf you didn't change config from default you can try clearing the cache.\nIf you use a custom configuration file ensure that it has correct structure or switch back to default config.",
118121
"resourceErrorSelectTaskFile": "Update task configuration",

flutter/lib/ui/icons.dart

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,8 @@ class AppIcons {
2626
_pSvg('ic_task_image_classification_offline.svg');
2727
static final SvgPicture superResolution =
2828
_pSvg('ic_task_super_resolution.svg');
29-
30-
// TODO (anhappdev): update icon
3129
static final SvgPicture stableDiffusion =
32-
_pSvg('ic_task_super_resolution.svg');
30+
_pSvg('ic_task_stable_diffusion.svg');
3331

3432
static final SvgPicture imageClassificationWhite =
3533
_pSvg('ic_task_image_classification_white.svg');
@@ -43,10 +41,8 @@ class AppIcons {
4341
_pSvg('ic_task_image_classification_offline_white.svg');
4442
static final SvgPicture superResolutionWhite =
4543
_pSvg('ic_task_super_resolution_white.svg');
46-
47-
// TODO (anhappdev): update icon
4844
static final SvgPicture stableDiffusionWhite =
49-
_pSvg('ic_task_super_resolution_white.svg');
45+
_pSvg('ic_task_stable_diffusion_white.svg');
5046

5147
static final SvgPicture arrow = _pSvg('ic_arrow.svg');
5248

@@ -71,6 +67,7 @@ class BenchmarkIcons {
7167
BenchmarkId.imageSegmentationV2: AppIcons.imageSegmentation,
7268
BenchmarkId.naturalLanguageProcessing: AppIcons.languageProcessing,
7369
BenchmarkId.superResolution: AppIcons.superResolution,
70+
BenchmarkId.stableDiffusion: AppIcons.stableDiffusion,
7471
BenchmarkId.imageClassificationOfflineV2:
7572
AppIcons.imageClassificationOffline,
7673
};
@@ -81,6 +78,7 @@ class BenchmarkIcons {
8178
BenchmarkId.imageSegmentationV2: AppIcons.imageSegmentationWhite,
8279
BenchmarkId.naturalLanguageProcessing: AppIcons.languageProcessingWhite,
8380
BenchmarkId.superResolution: AppIcons.superResolutionWhite,
81+
BenchmarkId.stableDiffusion: AppIcons.stableDiffusionWhite,
8482
BenchmarkId.imageClassificationOfflineV2:
8583
AppIcons.imageClassificationOfflineWhite,
8684
};

0 commit comments

Comments
 (0)