quic · quic-khrahul · Oct 5, 2025 · Oct 4, 2025 · Oct 4, 2025 · Oct 4, 2025
diff --git a/GenAI-Solutions/GenAI-Studio/Image-Generation/Dockerfile b/GenAI-Solutions/GenAI-Studio/Image-Generation/Dockerfile
@@ -10,6 +10,9 @@ RUN apt-get update && \
     apt-get clean && \
     apt-get install -y \
         cmake make gcc g++ wget unzip git && \
+    apt-get install -y software-properties-common && \
+    apt-add-repository -s ppa:ubuntu-qcom-iot/qcom-ppa && \
+    apt install -y qcom-fastrpc1 qcom-libdmabufheap-dev qcom-fastrpc-dev qcom-dspservices-headers-dev && \
     rm -rf /var/lib/apt/lists/*
 
 RUN mkdir -p /app/Image-Generation/
@@ -21,7 +24,7 @@ RUN git clone https://github.com/quic/ai-engine-direct-helper.git --recursive &&
 COPY stable_diffusion_v1_5.py /app/Image-Generation/ai-engine-direct-helper/samples/python/stable_diffusion_v1_5/stable_diffusion_v1_5.py
 COPY utils.patch requirements.txt /app/Image-Generation/ai-engine-direct-helper/
 WORKDIR /app/Image-Generation/ai-engine-direct-helper
-RUN git apply utils.patch
+RUN git checkout 3fd2c54 && git apply utils.patch
 RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh && \
     bash Miniconda3-latest-Linux-aarch64.sh -b && \
     rm Miniconda3-latest-Linux-aarch64.sh
@@ -30,17 +33,17 @@ RUN . ~/miniconda3/bin/activate && \
     conda create -n py312 python=3.12 && \
     conda activate py312 && \
     pip install -r requirements.txt
-RUN wget https://softwarecenter.qualcomm.com/api/download/software/sdks/Qualcomm_AI_Runtime_Community/All/2.34.0.250424/v2.34.0.250424.zip && \
-    unzip v2.34.0.250424.zip && chmod +x qairt/2.34.0.250424/bin/aarch64-oe-linux-gcc11.2/* && \
-    export  QNN_SDK_ROOT=/app/Image-Generation/ai-engine-direct-helper/qairt/2.34.0.250424/ && \
+RUN wget https://softwarecenter.qualcomm.com/api/download/software/sdks/Qualcomm_AI_Runtime_Community/All/2.38.0.250901/v2.38.0.250901.zip && \
+    unzip v2.38.0.250901.zip && chmod +x qairt/2.38.0.250901/bin/aarch64-oe-linux-gcc11.2/* && \
+    export  QNN_SDK_ROOT=/app/Image-Generation/ai-engine-direct-helper/qairt/2.38.0.250901/ && \
     . ~/miniconda3/bin/activate && \
     conda activate py312 && \
     python setup.py bdist_wheel && \
-    pip install dist/qai_appbuilder-2.34.0-cp312-cp312-linux_aarch64.whl && \
+    pip install dist/qai_appbuilder-2.38.0-cp312-cp312-linux_aarch64.whl && \
     mkdir -p /app/Image-Generation/ai-engine-direct-helper/samples/python/qai_libs/ && \
-    cp qairt/2.34.0.250424/lib/hexagon-v73/unsigned/* /app/Image-Generation/ai-engine-direct-helper/samples/python/qai_libs/ && \
-    cp qairt/2.34.0.250424/lib/aarch64-oe-linux-gcc11.2/* /app/Image-Generation/ai-engine-direct-helper/samples/python/qai_libs/ && \
-    rm -rf v2.34.0.250424.zip qairt
+    cp qairt/2.38.0.250901/lib/hexagon-v73/unsigned/* /app/Image-Generation/ai-engine-direct-helper/samples/python/qai_libs/ && \
+    cp qairt/2.38.0.250901/lib/aarch64-oe-linux-gcc11.2/* /app/Image-Generation/ai-engine-direct-helper/samples/python/qai_libs/ && \
+    rm -rf v2.38.0.250901.zip qairt
 RUN mv /root/miniconda3/envs/py312/lib/libstdc++.so.6 /root/miniconda3/envs/py312/lib/libstdc++.so.6.bak
 WORKDIR /app/Image-Generation/ai-engine-direct-helper/samples/python
 COPY run.sh /app/Image-Generation/ai-engine-direct-helper/samples/python/

diff --git a/GenAI-Solutions/GenAI-Studio/Image-Generation/stable_diffusion_v1_5.py b/GenAI-Solutions/GenAI-Studio/Image-Generation/stable_diffusion_v1_5.py
@@ -35,7 +35,7 @@
 UNET_MODEL_NAME             = MODEL_NAME + "_w8a16_quantized-unetquantizable-qualcomm_snapdragon_x_elite.bin"
 VAE_DECODER_MODEL_NAME      = MODEL_NAME + "_w8a16_quantized-vaedecoderquantizable-qualcomm_snapdragon_x_elite.bin"
 
-HUB_ID_H="185c2df6375b8219c30b5d6205387d2fee753f63"
+HUB_ID_H="ox06ibpbkxb4pr0mcyfe7wqgx5pf5r0cm3rf3dzi"
 
 TIMESTEP_EMBEDDING_MODEL_ID = "m7mrzdgxn"
 TOKENIZER_MODEL_NAME        = "openai/clip-vit-large-patch14"

diff --git a/GenAI-Solutions/GenAI-Studio/README.md b/GenAI-Solutions/GenAI-Studio/README.md
@@ -14,7 +14,7 @@ sudo add-apt-repository ppa:ubuntu-qcom-iot/qcom-ppa
 sudo apt update
 ```
 ```
-sudo apt install -y qcom-fastrpc1 qcom-libdmabufheap-dev qcom-fastrpc-dev qcom-dspservices-headers-dev libqnn1 qnn-tools libsnpe1 snpe-tools
+sudo apt install -y qcom-fastrpc1 qcom-libdmabufheap-dev qcom-fastrpc-dev qcom-dspservices-headers-dev libqnn1 qnn-tools libsnpe1 snpe-tools qcom-property-vault gstreamer1.0-plugins-qcom-mlmetaparser gstreamer1.0-plugins-qcom-mlvpose
 ```
 ### CDI setup
 
@@ -28,7 +28,7 @@ bash generate_cdi_json.sh
 ```
 ls /etc/cdi/docker-run-cdi-hw-acc.json
 ```
-
+##### Replace "/etc/cdi/docker-run-cdi-hw-acc.json" with [docker-run-cdi-hw-acc.json](docker-run-cdi-hw-acc.json) from this folder
 ```
 sudo chown -R ubuntu:ubuntu /opt/
 ```
@@ -39,7 +39,7 @@ snpe-platform-validator --runtime dsp
 ```
 #### Expected output
 SNPE is supported for runtime DSP on the device
-![image](https://github.qualcomm.com/aicatalog/genai-studio/assets/30177/a24ab16d-bec7-402e-aba1-05f7bc72e022)
+![DSP_Runtime](./assets/dsp_runtime.png)
 
 ## Docker Installation
 #### Update package index
@@ -77,6 +77,9 @@ sudo apt-get install -y docker-ce docker-ce-cli containerd.io  docker-compose
 ```
 sudo usermod -aG docker $USER
 ```
+```
+newgrp docker
+```
 
 ### Update /etc/docker/daemon.json
 ```
@@ -91,32 +94,87 @@ mkdir -p /etc/docker/
 }
 ```
 ```
-systemctl restart docker
+sudo systemctl restart docker
 ```
 ## Docker containers
-## Build container images (Linux x86)
-#### NOTE: Run below commands on x86 machine
+## Steps to install arm64 qemu Docker driver on x86 machine
+```
+docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
+docker buildx rm builder
+docker buildx create --name builder --driver docker-container --use
+docker buildx inspect --bootstrap
+```
+## Build container images
+#### NOTE: Run below commands with **linux/arm64/v8** to build on x86 machine
+
+#### On Target
+```
+cd Speech-To-Text
+docker  build --progress=plain -t asr .
+docker save -o asr asr
+```
+```
+cd Text-Generation
+docker  build --progress=plain -t text2text . 
+docker save text2text -o text2text
+```
+
+```
+cd Text-To-Speech
+docker  build --progress=plain -t text2speech . 
+docker save text2speech -o text2speech
+```
+
+```
+cd Image-Generation
+docker  build --progress=plain -t text2image . 
+docker save text2image -o text2image
+```
+
+```
+cd web-ui
+docker  build --progress=plain -t web-ui . 
+docker save web-ui -o web-ui
+```
+#### On x86
+
 ```
 cd Speech-To-Text
-docker  build --progress=plain --platform=linux/arm64/v8  -t asr .
+docker  build --progress=plain --platform=linux/arm64/v8 -t asr .
 docker save -o asr asr
 ```
+
 ```
 cd Text-Generation
 docker  build --progress=plain --platform=linux/arm64/v8  -t text2text . 
+docker save text2text -o text2text
+```
+
+```
+cd Text-To-Speech
+docker  build --progress=plain --platform=linux/arm64/v8  -t text2speech . 
+docker save text2speech -o text2speech
+```
+
+```
+cd Image-Generation
+docker  build --progress=plain --platform=linux/arm64/v8  -t text2image . 
 docker save text2image -o text2image
 ```
+
 ```
 cd web-ui
 docker  build --progress=plain --platform=linux/arm64/v8  -t web-ui . 
-docker save text2image -o web-ui
+docker save web-ui -o web-ui
 ```
 
 ## Pre-built container images (aarch64)
 #### NOTE: Push container images to device following above and run below commands on target aarch64 device
 ```
 docker load -i asr
 docker load -i text2text
+docker load -i text2speech
+docker load -i text2image
 docker load -i web-ui
 ```
 ## LLM steps (Linux X86)
@@ -126,13 +184,19 @@ Follow https://github.com/quic/ai-hub-apps/tree/main/tutorials/llm_on_genie
 ```
 python -m qai_hub_models.models.llama_v3_8b_instruct.export --chipset qualcomm-snapdragon-x-elite --skip-inferencing --skip-profiling --output-dir genie_bundle
 ```
+##### NOTE: Push models folder genie_bundle to "/opt/" on target device
+## TTS steps (linux x86)
+Follow https://qpm.qualcomm.com/#/main/tools/details/VoiceAI_TTS to generate models
+##### NOTE: Push models to "/opt/TTS_models/" on target device
+
 
 ## Start GenAI Studio (Target Device aarch64)
+### Push docker-compose.yml file to device
 ```
 docker-compose -f docker-compose.yml up -d
 ```
 #### Expected output
-![image](https://github.qualcomm.com/aicatalog/genai-studio/assets/30177/4b0e35aa-1fb6-4b7f-a8db-40b3562f40a8)
+![start_genai_studio](./assets/start_genai_studio.png)
 
 **NOTE:** If you face this error "CDI device injection failed: failed to inject devices: failed to stat CDI host device "/dev/kgsl-3d0": no such file or directory"
 
@@ -142,6 +206,9 @@ docker-compose -f docker-compose.yml up -d
    "path": "/dev/kgsl-3d0"
 },
 ```
+
+If there are any other errors use "**[docker-run-cdi-hw-acc.json](docker-run-cdi-hw-acc.json)**" from this folder
+
 ### Network URL
 ```
 docker logs -f web-ui
@@ -163,4 +230,10 @@ Click on http://192.168.0.4:8501 to open webpage
 docker-compose -f docker-compose.yml down
 ```
 #### Expected output
-![image](https://github.qualcomm.com/aicatalog/genai-studio/assets/30177/6db82450-22fe-4c4b-8990-ab2caac5894e)
+![stop_genai_studio](./assets/stop_genai_studio.png)
+
+
+
+
+
+
diff --git a/GenAI-Solutions/GenAI-Studio/Speech-To-Text/Dockerfile b/GenAI-Solutions/GenAI-Studio/Speech-To-Text/Dockerfile
@@ -23,7 +23,7 @@ RUN git clone https://github.com/quic/ai-engine-direct-helper.git --recursive &&
 COPY whisper_base_en.py /app/Speech-To-Text/ai-engine-direct-helper/samples/python/whisper_base_en/whisper_base_en.py
 COPY utils.patch requirements.txt /app/Speech-To-Text/ai-engine-direct-helper/
 WORKDIR /app/Speech-To-Text/ai-engine-direct-helper
-RUN git apply utils.patch
+RUN git checkout 3fd2c54 && git apply utils.patch
 RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh && \
     bash Miniconda3-latest-Linux-aarch64.sh -b && \
     rm Miniconda3-latest-Linux-aarch64.sh

diff --git a/GenAI-Solutions/GenAI-Studio/Text-Generation/README.md b/GenAI-Solutions/GenAI-Studio/Text-Generation/README.md
diff --git a/GenAI-Solutions/GenAI-Studio/Text-To-Speech/meloTTS/Dockerfile b/GenAI-Solutions/GenAI-Studio/Text-To-Speech/meloTTS/Dockerfile
@@ -0,0 +1,50 @@
+# ---------------------------------------------------------------------
+# Copyright (c) Qualcomm Innovation Center, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# ---------------------------------------------------------------------
+
+FROM --platform=arm64 ubuntu:24.04
+
+# Update the package list
+RUN apt-get update && \
+    apt-get clean && \
+    apt-get install -y cmake make gcc g++ wget unzip git && \
+    apt-get install -y software-properties-common && \
+    apt-add-repository -s ppa:ubuntu-qcom-iot/qcom-ppa && \
+    apt install -y qcom-fastrpc1 qcom-libdmabufheap-dev qcom-fastrpc-dev qcom-dspservices-headers-dev && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN mkdir -p /app/Text-To-Speech/
+WORKDIR /app/Text-To-Speech/
+
+COPY meloTTS_app.py OnnxRunnerHelper.py requirements.txt /app/Text-To-Speech/
+RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh && \
+    bash Miniconda3-latest-Linux-aarch64.sh -b && \
+    rm Miniconda3-latest-Linux-aarch64.sh
+RUN wget https://softwarecenter.qualcomm.com/api/download/software/sdks/Qualcomm_AI_Runtime_Community/All/2.38.0.250901/v2.38.0.250901.zip && \
+    unzip v2.38.0.250901.zip && \
+    chmod +x qairt/2.38.0.250901/bin/aarch64-oe-linux-gcc11.2/* && \
+    mkdir -p /usr/lib/rfsa/adsp/ && \
+    cp qairt/2.38.0.250901/bin/aarch64-oe-linux-gcc11.2/* /usr/bin && \
+    cp qairt/2.38.0.250901/lib/hexagon-v73/unsigned/* /usr/lib/rfsa/adsp/ && \
+    cp qairt/2.38.0.250901/lib/aarch64-oe-linux-gcc11.2/* /usr/lib/ && \
+    . ~/miniconda3/bin/activate && \
+    conda tos accept && \
+    conda create -n py310 python=3.10.9 && \
+    conda activate py310 && \
+    pip install -r requirements.txt && \
+    pip install flask && \
+    python -m unidic download && \
+    git clone --recursive https://github.com/microsoft/onnxruntime && \
+    cd onnxruntime && \
+    git checkout e5678a133f121ed3ea514960ac53a6dd060ac4c3 && \
+    cd /app/Text-To-Speech/onnxruntime/tools/ci_build/ && \
+    python build.py --use_qnn --qnn_home=/app/Text-To-Speech/qairt/2.38.0.250901/ --build_wheel --skip_submodule_sync --config Release --build_dir /app/Text-To-Speech/onnxruntime/build/ --allow_running_as_root --parallel 8 --skip_tests && \
+    pip install /app/Text-To-Speech/onnxruntime/build/Release/dist/onnxruntime_qnn-1.23.0-cp310-cp310-linux_aarch64.whl && \
+    cd /app/Text-To-Speech/ && \
+    rm -rf v2.38.0.250901.zip qairt onnxruntime
+
+RUN mv /root/miniconda3/envs/py310/lib/libstdc++.so.6 /root/miniconda3/envs/py310/lib/libstdc++.so.6.bak
+WORKDIR /app/Text-To-Speech/
+COPY run.sh /app/Text-To-Speech/run.sh
+RUN chmod +x run.sh