diff --git a/GenAI-Solutions/GenAI-Studio/Image-Generation/Dockerfile b/GenAI-Solutions/GenAI-Studio/Image-Generation/Dockerfile index 727893b..872f9eb 100644 --- a/GenAI-Solutions/GenAI-Studio/Image-Generation/Dockerfile +++ b/GenAI-Solutions/GenAI-Studio/Image-Generation/Dockerfile @@ -10,6 +10,9 @@ RUN apt-get update && \ apt-get clean && \ apt-get install -y \ cmake make gcc g++ wget unzip git && \ + apt-get install -y software-properties-common && \ + apt-add-repository -s ppa:ubuntu-qcom-iot/qcom-ppa && \ + apt install -y qcom-fastrpc1 qcom-libdmabufheap-dev qcom-fastrpc-dev qcom-dspservices-headers-dev && \ rm -rf /var/lib/apt/lists/* RUN mkdir -p /app/Image-Generation/ @@ -21,7 +24,7 @@ RUN git clone https://github.com/quic/ai-engine-direct-helper.git --recursive && COPY stable_diffusion_v1_5.py /app/Image-Generation/ai-engine-direct-helper/samples/python/stable_diffusion_v1_5/stable_diffusion_v1_5.py COPY utils.patch requirements.txt /app/Image-Generation/ai-engine-direct-helper/ WORKDIR /app/Image-Generation/ai-engine-direct-helper -RUN git apply utils.patch +RUN git checkout 3fd2c54 && git apply utils.patch RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh && \ bash Miniconda3-latest-Linux-aarch64.sh -b && \ rm Miniconda3-latest-Linux-aarch64.sh @@ -30,17 +33,17 @@ RUN . ~/miniconda3/bin/activate && \ conda create -n py312 python=3.12 && \ conda activate py312 && \ pip install -r requirements.txt -RUN wget https://softwarecenter.qualcomm.com/api/download/software/sdks/Qualcomm_AI_Runtime_Community/All/2.34.0.250424/v2.34.0.250424.zip && \ - unzip v2.34.0.250424.zip && chmod +x qairt/2.34.0.250424/bin/aarch64-oe-linux-gcc11.2/* && \ - export QNN_SDK_ROOT=/app/Image-Generation/ai-engine-direct-helper/qairt/2.34.0.250424/ && \ +RUN wget https://softwarecenter.qualcomm.com/api/download/software/sdks/Qualcomm_AI_Runtime_Community/All/2.38.0.250901/v2.38.0.250901.zip && \ + unzip v2.38.0.250901.zip && chmod +x qairt/2.38.0.250901/bin/aarch64-oe-linux-gcc11.2/* && \ + export QNN_SDK_ROOT=/app/Image-Generation/ai-engine-direct-helper/qairt/2.38.0.250901/ && \ . ~/miniconda3/bin/activate && \ conda activate py312 && \ python setup.py bdist_wheel && \ - pip install dist/qai_appbuilder-2.34.0-cp312-cp312-linux_aarch64.whl && \ + pip install dist/qai_appbuilder-2.38.0-cp312-cp312-linux_aarch64.whl && \ mkdir -p /app/Image-Generation/ai-engine-direct-helper/samples/python/qai_libs/ && \ - cp qairt/2.34.0.250424/lib/hexagon-v73/unsigned/* /app/Image-Generation/ai-engine-direct-helper/samples/python/qai_libs/ && \ - cp qairt/2.34.0.250424/lib/aarch64-oe-linux-gcc11.2/* /app/Image-Generation/ai-engine-direct-helper/samples/python/qai_libs/ && \ - rm -rf v2.34.0.250424.zip qairt + cp qairt/2.38.0.250901/lib/hexagon-v73/unsigned/* /app/Image-Generation/ai-engine-direct-helper/samples/python/qai_libs/ && \ + cp qairt/2.38.0.250901/lib/aarch64-oe-linux-gcc11.2/* /app/Image-Generation/ai-engine-direct-helper/samples/python/qai_libs/ && \ + rm -rf v2.38.0.250901.zip qairt RUN mv /root/miniconda3/envs/py312/lib/libstdc++.so.6 /root/miniconda3/envs/py312/lib/libstdc++.so.6.bak WORKDIR /app/Image-Generation/ai-engine-direct-helper/samples/python COPY run.sh /app/Image-Generation/ai-engine-direct-helper/samples/python/ diff --git a/GenAI-Solutions/GenAI-Studio/Image-Generation/stable_diffusion_v1_5.py b/GenAI-Solutions/GenAI-Studio/Image-Generation/stable_diffusion_v1_5.py index 1456bbf..6e530db 100644 --- a/GenAI-Solutions/GenAI-Studio/Image-Generation/stable_diffusion_v1_5.py +++ b/GenAI-Solutions/GenAI-Studio/Image-Generation/stable_diffusion_v1_5.py @@ -35,7 +35,7 @@ UNET_MODEL_NAME = MODEL_NAME + "_w8a16_quantized-unetquantizable-qualcomm_snapdragon_x_elite.bin" VAE_DECODER_MODEL_NAME = MODEL_NAME + "_w8a16_quantized-vaedecoderquantizable-qualcomm_snapdragon_x_elite.bin" -HUB_ID_H="185c2df6375b8219c30b5d6205387d2fee753f63" +HUB_ID_H="ox06ibpbkxb4pr0mcyfe7wqgx5pf5r0cm3rf3dzi" TIMESTEP_EMBEDDING_MODEL_ID = "m7mrzdgxn" TOKENIZER_MODEL_NAME = "openai/clip-vit-large-patch14" diff --git a/GenAI-Solutions/GenAI-Studio/README.md b/GenAI-Solutions/GenAI-Studio/README.md index 1c15fef..babdc81 100644 --- a/GenAI-Solutions/GenAI-Studio/README.md +++ b/GenAI-Solutions/GenAI-Studio/README.md @@ -14,7 +14,7 @@ sudo add-apt-repository ppa:ubuntu-qcom-iot/qcom-ppa sudo apt update ``` ``` -sudo apt install -y qcom-fastrpc1 qcom-libdmabufheap-dev qcom-fastrpc-dev qcom-dspservices-headers-dev libqnn1 qnn-tools libsnpe1 snpe-tools +sudo apt install -y qcom-fastrpc1 qcom-libdmabufheap-dev qcom-fastrpc-dev qcom-dspservices-headers-dev libqnn1 qnn-tools libsnpe1 snpe-tools qcom-property-vault gstreamer1.0-plugins-qcom-mlmetaparser gstreamer1.0-plugins-qcom-mlvpose ``` ### CDI setup @@ -28,7 +28,7 @@ bash generate_cdi_json.sh ``` ls /etc/cdi/docker-run-cdi-hw-acc.json ``` - +##### Replace "/etc/cdi/docker-run-cdi-hw-acc.json" with [docker-run-cdi-hw-acc.json](docker-run-cdi-hw-acc.json) from this folder ``` sudo chown -R ubuntu:ubuntu /opt/ ``` @@ -39,7 +39,7 @@ snpe-platform-validator --runtime dsp ``` #### Expected output SNPE is supported for runtime DSP on the device -![image](https://github.qualcomm.com/aicatalog/genai-studio/assets/30177/a24ab16d-bec7-402e-aba1-05f7bc72e022) +![DSP_Runtime](./assets/dsp_runtime.png) ## Docker Installation #### Update package index @@ -77,6 +77,9 @@ sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-compose ``` sudo usermod -aG docker $USER ``` +``` +newgrp docker +``` ### Update /etc/docker/daemon.json ``` @@ -91,25 +94,78 @@ mkdir -p /etc/docker/ } ``` ``` -systemctl restart docker +sudo systemctl restart docker ``` ## Docker containers -## Build container images (Linux x86) -#### NOTE: Run below commands on x86 machine +## Steps to install arm64 qemu Docker driver on x86 machine +``` +docker run --rm --privileged multiarch/qemu-user-static --reset -p yes +docker buildx rm builder +docker buildx create --name builder --driver docker-container --use +docker buildx inspect --bootstrap +``` +## Build container images +#### NOTE: Run below commands with **linux/arm64/v8** to build on x86 machine + +#### On Target +``` +cd Speech-To-Text +docker build --progress=plain -t asr . +docker save -o asr asr +``` +``` +cd Text-Generation +docker build --progress=plain -t text2text . +docker save text2text -o text2text +``` + +``` +cd Text-To-Speech +docker build --progress=plain -t text2speech . +docker save text2speech -o text2speech +``` + +``` +cd Image-Generation +docker build --progress=plain -t text2image . +docker save text2image -o text2image +``` + +``` +cd web-ui +docker build --progress=plain -t web-ui . +docker save web-ui -o web-ui +``` +#### On x86 + ``` cd Speech-To-Text -docker build --progress=plain --platform=linux/arm64/v8 -t asr . +docker build --progress=plain --platform=linux/arm64/v8 -t asr . docker save -o asr asr ``` + ``` cd Text-Generation docker build --progress=plain --platform=linux/arm64/v8 -t text2text . +docker save text2text -o text2text +``` + +``` +cd Text-To-Speech +docker build --progress=plain --platform=linux/arm64/v8 -t text2speech . +docker save text2speech -o text2speech +``` + +``` +cd Image-Generation +docker build --progress=plain --platform=linux/arm64/v8 -t text2image . docker save text2image -o text2image ``` + ``` cd web-ui docker build --progress=plain --platform=linux/arm64/v8 -t web-ui . -docker save text2image -o web-ui +docker save web-ui -o web-ui ``` ## Pre-built container images (aarch64) @@ -117,6 +173,8 @@ docker save text2image -o web-ui ``` docker load -i asr docker load -i text2text +docker load -i text2speech +docker load -i text2image docker load -i web-ui ``` ## LLM steps (Linux X86) @@ -126,13 +184,19 @@ Follow https://github.com/quic/ai-hub-apps/tree/main/tutorials/llm_on_genie ``` python -m qai_hub_models.models.llama_v3_8b_instruct.export --chipset qualcomm-snapdragon-x-elite --skip-inferencing --skip-profiling --output-dir genie_bundle ``` +##### NOTE: Push models folder genie_bundle to "/opt/" on target device +## TTS steps (linux x86) +Follow https://qpm.qualcomm.com/#/main/tools/details/VoiceAI_TTS to generate models +##### NOTE: Push models to "/opt/TTS_models/" on target device + ## Start GenAI Studio (Target Device aarch64) +### Push docker-compose.yml file to device ``` docker-compose -f docker-compose.yml up -d ``` #### Expected output -![image](https://github.qualcomm.com/aicatalog/genai-studio/assets/30177/4b0e35aa-1fb6-4b7f-a8db-40b3562f40a8) +![start_genai_studio](./assets/start_genai_studio.png) **NOTE:** If you face this error "CDI device injection failed: failed to inject devices: failed to stat CDI host device "/dev/kgsl-3d0": no such file or directory" @@ -142,6 +206,9 @@ docker-compose -f docker-compose.yml up -d "path": "/dev/kgsl-3d0" }, ``` + +If there are any other errors use "**[docker-run-cdi-hw-acc.json](docker-run-cdi-hw-acc.json)**" from this folder + ### Network URL ``` docker logs -f web-ui @@ -163,4 +230,10 @@ Click on http://192.168.0.4:8501 to open webpage docker-compose -f docker-compose.yml down ``` #### Expected output -![image](https://github.qualcomm.com/aicatalog/genai-studio/assets/30177/6db82450-22fe-4c4b-8990-ab2caac5894e) +![stop_genai_studio](./assets/stop_genai_studio.png) + + + + + + diff --git a/GenAI-Solutions/GenAI-Studio/Speech-To-Text/Dockerfile b/GenAI-Solutions/GenAI-Studio/Speech-To-Text/Dockerfile index c10d1b5..13fb9fe 100644 --- a/GenAI-Solutions/GenAI-Studio/Speech-To-Text/Dockerfile +++ b/GenAI-Solutions/GenAI-Studio/Speech-To-Text/Dockerfile @@ -23,7 +23,7 @@ RUN git clone https://github.com/quic/ai-engine-direct-helper.git --recursive && COPY whisper_base_en.py /app/Speech-To-Text/ai-engine-direct-helper/samples/python/whisper_base_en/whisper_base_en.py COPY utils.patch requirements.txt /app/Speech-To-Text/ai-engine-direct-helper/ WORKDIR /app/Speech-To-Text/ai-engine-direct-helper -RUN git apply utils.patch +RUN git checkout 3fd2c54 && git apply utils.patch RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh && \ bash Miniconda3-latest-Linux-aarch64.sh -b && \ rm Miniconda3-latest-Linux-aarch64.sh diff --git a/GenAI-Solutions/GenAI-Studio/Text-Generation/README.md b/GenAI-Solutions/GenAI-Studio/Text-Generation/README.md deleted file mode 100644 index 05f3b54..0000000 --- a/GenAI-Solutions/GenAI-Studio/Text-Generation/README.md +++ /dev/null @@ -1,79 +0,0 @@ -# CLI Chat application - -Chat application for Windows on Snapdragon® demonstrating a large language model (LLM, e.g., [Llama 3.2 3B](https://aihub.qualcomm.com/compute/models/llama_v3_2_3b_instruct)) using Genie SDK. - -The app demonstrates how to use the Genie APIs from [QAIRT SDK](https://qpm.qualcomm.com/#/main/tools/details/Qualcomm_AI_Runtime_SDK) to run and accelerate LLMs using the Snapdragon® Neural Processing Unit (NPU). - -## Requirements - -### Platform - -- Lemans - -## Lllama models -1. Follow https://github.com/quic/ai-hub-apps/tree/main/tutorials/llm_on_genie to generate models -2. Push models to /opt/ folder onm device -## Build App -1. Build docker image -``` -docker build --progress=plain --platform=linux/arm64/v8 -t llama-server . --no-cache -``` -2.Save docker image -``` -docker save llama-server -o llama-server -``` -3. Push docker to device - -4. Load image -``` -docker load -i llama-server -``` -5. start docker -``` -docker run -d --net host --device /dev/dri/card0 --device /dev/dri/renderD128 --device /dev/kgsl-3d0 --device /dev/video32 --device /dev/video33 --device /dev/dma_heap/system --device /dev/dma_heap/qcom,system \ ---device /dev/fastrpc-cdsp --device /dev/fastrpc-cdsp1 -v /usr/lib/libatomic.so.1:/usr/lib/libatomic.so.1 -v /usr/lib/libatomic.so.1.2.0:/usr/lib/libatomic.so.1.2.0 -v /dev/socket/weston:/dev/socket/weston \ --v /usr/lib/libgbm.so.1:/usr/lib/libgbm.so.1 -v /usr/lib/libgsl.so:/usr/lib/libgsl.so -v /usr/lib/libdmabufheap.so.0:/usr/lib/libdmabufheap.so.0 \ --v /usr/lib/libcalculator.so:/usr/lib/libcalculator.so -v /usr/lib/libGenie.so:/usr/lib/libGenie.so -v /usr/lib/libQnnGenAiTransformer.so:/usr/lib/libQnnGenAiTransformer.so \ --v /usr/lib/libQnnGenAiTransformerCpuOpPkg.so:/usr/lib/libQnnGenAiTransformerCpuOpPkg.so -v /usr/lib/libQnnGenAiTransformerModel.so:/usr/lib/libQnnGenAiTransformerModel.so \ --v /usr/lib/libQnnHtpV73CalculatorStub.so:/usr/lib/libQnnHtpV73CalculatorStub.so -v /usr/lib/libSnpeHtpV73CalculatorStub.so:/usr/lib/libSnpeHtpV73CalculatorStub.so \ --v /usr/lib/libhta_hexagon_runtime_snpe.so:/usr/lib/libhta_hexagon_runtime_snpe.so -v /usr/lib/libPlatformValidatorShared.so:/usr/lib/libPlatformValidatorShared.so -v /usr/lib/libSNPE.so:/usr/lib/libSNPE.so \ --v /usr/lib/libSnpeDspV66Stub.so:/usr/lib/libSnpeDspV66Stub.so -v /usr/lib/libSnpeHta.so:/usr/lib/libSnpeHta.so -v /usr/lib/libSnpeHtpPrepare.so:/usr/lib/libSnpeHtpPrepare.so \ --v /usr/lib/libSnpeHtpV73Stub.so:/usr/lib/libSnpeHtpV73Stub.so -v /usr/lib/libQnnChrometraceProfilingReader.so:/usr/lib/libQnnChrometraceProfilingReader.so -v /usr/lib/libQnnGpu.so:/usr/lib/libQnnGpu.so \ --v /usr/lib/libQnnHtpProfilingReader.so:/usr/lib/libQnnHtpProfilingReader.so -v /usr/lib/libQnnCpu.so:/usr/lib/libQnnCpu.so -v /usr/lib/libQnnDspV66Stub.so:/usr/lib/libQnnDspV66Stub.so \ --v /usr/lib/libQnnHtpNetRunExtensions.so:/usr/lib/libQnnHtpNetRunExtensions.so -v /usr/lib/libQnnHtp.so:/usr/lib/libQnnHtp.so -v /usr/lib/rfsa/adsp/libCalculator_skel.so:/usr/lib/rfsa/adsp/libCalculator_skel.so \ --v /usr/lib/libQnnJsonProfilingReader.so:/usr/lib/libQnnJsonProfilingReader.so -v /usr/lib/libQnnDspNetRunExtensions.so:/usr/lib/libQnnDspNetRunExtensions.so \ --v /usr/lib/libQnnGpuNetRunExtensions.so:/usr/lib/libQnnGpuNetRunExtensions.so -v /usr/lib/libQnnHtpOptraceProfilingReader.so:/usr/lib/libQnnHtpOptraceProfilingReader.so -v /usr/lib/libQnnSaver.so:/usr/lib/libQnnSaver.so \ --v /usr/lib/libQnnDsp.so:/usr/lib/libQnnDsp.so -v /usr/lib/libQnnGpuProfilingReader.so:/usr/lib/libQnnGpuProfilingReader.so -v /usr/lib/libQnnHtpPrepare.so:/usr/lib/libQnnHtpPrepare.so \ --v /usr/lib/libQnnSystem.so:/usr/lib/libQnnSystem.so -v /usr/lib/libQnnHtpV73Stub.so:/usr/lib/libQnnHtpV73Stub.so -v /usr/lib/rfsa/adsp/libSnpeHtpV73Skel.so:/usr/lib/rfsa/adsp/libSnpeHtpV73Skel.so \ --v /usr/lib/rfsa/adsp/libQnnHtpV73Skel.so:/usr/lib/rfsa/adsp/libQnnHtpV73Skel.so -v /usr/lib/rfsa/adsp/libQnnHtpV73.so:/usr/lib/rfsa/adsp/libQnnHtpV73.so \ --v /usr/lib/rfsa/adsp/libQnnHtpV73QemuDriver.so:/usr/lib/rfsa/adsp/libQnnHtpV73QemuDriver.so -v /usr/lib/rfsa/adsp/libQnnSaver.so:/usr/lib/rfsa/adsp/libQnnSaver.so \ --v /usr/lib/rfsa/adsp/libQnnSystem.so:/usr/lib/rfsa/adsp/libQnnSystem.so -v /usr/lib/libenv_time.so:/usr/lib/libenv_time.so -v /usr/lib/libevaluation_proto.so:/usr/lib/libevaluation_proto.so \ --v /usr/lib/libimage_metrics.so:/usr/lib/libimage_metrics.so -v /usr/lib/libjpeg_internal.so:/usr/lib/libjpeg_internal.so -v /usr/lib/libtensorflowlite_c.so:/usr/lib/libtensorflowlite_c.so \ --v /usr/lib/libtf_logging.so:/usr/lib/libtf_logging.so -v /usr/lib/libIB2C.so:/usr/lib/libIB2C.so -v /usr/lib/libEGL_adreno.so:/usr/lib/libEGL_adreno.so -v /usr/lib/libGLESv2_adreno.so:/usr/lib/libGLESv2_adreno.so \ --v /usr/lib/libpropertyvault.so.0:/usr/lib/libpropertyvault.so.0 -v /usr/lib/libpropertyvault.so.0.0.0:/usr/lib/libpropertyvault.so.0.0.0 -v /usr/lib/libwayland-client.so.0:/usr/lib/libwayland-client.so.0 \ --v /usr/lib/libwayland-egl.so.1:/usr/lib/libwayland-egl.so.1 -v /usr/lib/libadreno_utils.so:/usr/lib/libadreno_utils.so -v /usr/lib/libCB.so:/usr/lib/libCB.so -v /usr/lib/libEGL.so:/usr/lib/libEGL.so \ --v /usr/lib/libEGL.so.1:/usr/lib/libEGL.so.1 -v /usr/lib/libEGL.so.1.0:/usr/lib/libEGL.so.1.0 -v /usr/lib/libEGL.so.1.0.0:/usr/lib/libEGL.so.1.0.0 -v /usr/lib/libeglSubDriverWayland.so:/usr/lib/libeglSubDriverWayland.so \ --v /usr/lib/libGLESv1_CM.so:/usr/lib/libGLESv1_CM.so -v /usr/lib/libGLESv1_CM.so.1:/usr/lib/libGLESv1_CM.so.1 -v /usr/lib/libGLESv1_CM.so.1.0:/usr/lib/libGLESv1_CM.so.1.0 \ --v /usr/lib/libGLESv1_CM.so.1.0.0:/usr/lib/libGLESv1_CM.so.1.0.0 -v /usr/lib/libGLESv1_CM_adreno.so:/usr/lib/libGLESv1_CM_adreno.so -v /usr/lib/libGLESv2.so:/usr/lib/libGLESv2.so \ --v /usr/lib/libGLESv2.so.2:/usr/lib/libGLESv2.so.2 -v /usr/lib/libGLESv2.so.2.0:/usr/lib/libGLESv2.so.2.0 -v /usr/lib/libGLESv2.so.2.0.0:/usr/lib/libGLESv2.so.2.0.0 -v /usr/lib/libllvm-glnext.so:/usr/lib/libllvm-glnext.so \ --v /usr/lib/libllvm-qcom.so:/usr/lib/libllvm-qcom.so -v /usr/lib/libllvm-qgl.so:/usr/lib/libllvm-qgl.so -v /usr/lib/libOpenCL.so:/usr/lib/libOpenCL.so -v /usr/lib/libOpenCL_adreno.so:/usr/lib/libOpenCL_adreno.so \ --v /usr/lib/libq3dtools_adreno.so:/usr/lib/libq3dtools_adreno.so -v /usr/lib/libq3dtools_esx.so:/usr/lib/libq3dtools_esx.so -v /usr/lib/libvulkan_adreno.so:/usr/lib/libvulkan_adreno.so \ --v /usr/lib/libQnnTFLiteDelegate.so:/usr/lib/libQnnTFLiteDelegate.so -v /usr/lib/libadsprpc.so:/usr/lib/libadsprpc.so -v /usr/lib/libcdsprpc.so:/usr/lib/libcdsprpc.so -v /usr/lib/libfastcvopt.so:/usr/lib/libfastcvopt.so \ --v /usr/lib/libfastcvdsp_stub.so:/usr/lib/libfastcvdsp_stub.so -v /usr/lib/libdmabufheap.so.0.0.0:/usr/lib/libdmabufheap.so.0.0.0 -v /usr/lib/dsp/cdsp/libc++.so.1:/usr/lib/dsp/cdsp/libc++.so.1 \ --v /usr/lib/dsp/cdsp/libc++abi.so.1:/usr/lib/dsp/cdsp/libc++abi.so.1 -v /usr/lib/dsp/cdsp1/libc++.so.1:/usr/lib/dsp/cdsp1/libc++.so.1 -v /usr/lib/dsp/cdsp1/libc++abi.so.1:/usr/lib/dsp/cdsp1/libc++abi.so.1 \ --v /usr/lib/dsp/cdsp1/fastrpc_shell_unsigned_4:/usr/lib/dsp/cdsp1/fastrpc_shell_unsigned_4 \ --v /opt/:/opt/ \ --h llama-server --name llama-server -it -d llama-server -``` - - - - - - - - - - - - diff --git a/GenAI-Solutions/GenAI-Studio/Text-To-Speech/meloTTS/Dockerfile b/GenAI-Solutions/GenAI-Studio/Text-To-Speech/meloTTS/Dockerfile new file mode 100644 index 0000000..52f767d --- /dev/null +++ b/GenAI-Solutions/GenAI-Studio/Text-To-Speech/meloTTS/Dockerfile @@ -0,0 +1,50 @@ +# --------------------------------------------------------------------- +# Copyright (c) Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- + +FROM --platform=arm64 ubuntu:24.04 + +# Update the package list +RUN apt-get update && \ + apt-get clean && \ + apt-get install -y cmake make gcc g++ wget unzip git && \ + apt-get install -y software-properties-common && \ + apt-add-repository -s ppa:ubuntu-qcom-iot/qcom-ppa && \ + apt install -y qcom-fastrpc1 qcom-libdmabufheap-dev qcom-fastrpc-dev qcom-dspservices-headers-dev && \ + rm -rf /var/lib/apt/lists/* + +RUN mkdir -p /app/Text-To-Speech/ +WORKDIR /app/Text-To-Speech/ + +COPY meloTTS_app.py OnnxRunnerHelper.py requirements.txt /app/Text-To-Speech/ +RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh && \ + bash Miniconda3-latest-Linux-aarch64.sh -b && \ + rm Miniconda3-latest-Linux-aarch64.sh +RUN wget https://softwarecenter.qualcomm.com/api/download/software/sdks/Qualcomm_AI_Runtime_Community/All/2.38.0.250901/v2.38.0.250901.zip && \ + unzip v2.38.0.250901.zip && \ + chmod +x qairt/2.38.0.250901/bin/aarch64-oe-linux-gcc11.2/* && \ + mkdir -p /usr/lib/rfsa/adsp/ && \ + cp qairt/2.38.0.250901/bin/aarch64-oe-linux-gcc11.2/* /usr/bin && \ + cp qairt/2.38.0.250901/lib/hexagon-v73/unsigned/* /usr/lib/rfsa/adsp/ && \ + cp qairt/2.38.0.250901/lib/aarch64-oe-linux-gcc11.2/* /usr/lib/ && \ + . ~/miniconda3/bin/activate && \ + conda tos accept && \ + conda create -n py310 python=3.10.9 && \ + conda activate py310 && \ + pip install -r requirements.txt && \ + pip install flask && \ + python -m unidic download && \ + git clone --recursive https://github.com/microsoft/onnxruntime && \ + cd onnxruntime && \ + git checkout e5678a133f121ed3ea514960ac53a6dd060ac4c3 && \ + cd /app/Text-To-Speech/onnxruntime/tools/ci_build/ && \ + python build.py --use_qnn --qnn_home=/app/Text-To-Speech/qairt/2.38.0.250901/ --build_wheel --skip_submodule_sync --config Release --build_dir /app/Text-To-Speech/onnxruntime/build/ --allow_running_as_root --parallel 8 --skip_tests && \ + pip install /app/Text-To-Speech/onnxruntime/build/Release/dist/onnxruntime_qnn-1.23.0-cp310-cp310-linux_aarch64.whl && \ + cd /app/Text-To-Speech/ && \ + rm -rf v2.38.0.250901.zip qairt onnxruntime + +RUN mv /root/miniconda3/envs/py310/lib/libstdc++.so.6 /root/miniconda3/envs/py310/lib/libstdc++.so.6.bak +WORKDIR /app/Text-To-Speech/ +COPY run.sh /app/Text-To-Speech/run.sh +RUN chmod +x run.sh diff --git a/GenAI-Solutions/GenAI-Studio/Text-To-Speech/meloTTS/OnnxRunnerHelper.py b/GenAI-Solutions/GenAI-Studio/Text-To-Speech/meloTTS/OnnxRunnerHelper.py new file mode 100644 index 0000000..d45fc4c --- /dev/null +++ b/GenAI-Solutions/GenAI-Studio/Text-To-Speech/meloTTS/OnnxRunnerHelper.py @@ -0,0 +1,131 @@ +# --------------------------------------------------------------------- +# Copyright (c) Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- + +import onnxruntime as ort +import numpy as np +import time +import logging + +logger = logging.getLogger("OnnxRuntimeHelper") + +if not logger.handlers: + handler = logging.StreamHandler() + formatter = logging.Formatter('%(asctime)s - %(module)s:%(lineno)d - %(levelname)s - %(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + +class ONNXRunner: + def __init__(self): + pass + + def create_ort_session(self, model_path, model_name, ep, backend_path, debug_log): + session_opts = ort.SessionOptions() + if debug_log: + session_opts.log_severity_level = 0 + if ep == 'cpu': + start_time = time.time() + session = ort.InferenceSession(model_path, sess_options=session_opts) + end_time = time.time() + elif ep == 'npu': + backend_path = "/usr/lib/libQnnHtp.so" if backend_path is None else backend_path + if hasattr(self, 'generate_context') and self.generate_context: + session_opts.add_session_config_entry("ep.context_enable", "1") + PROVIDER_OPTIONS = [{ + "backend_path": backend_path, + "htp_performance_mode": "burst", + }] + EXECUTION_PROVIDER = ["QNNExecutionProvider"] + start_time = time.time() + session = ort.InferenceSession( + model_path, + sess_options=session_opts, + providers=EXECUTION_PROVIDER, + provider_options=PROVIDER_OPTIONS + ) + end_time = time.time() + else: + logger.error(f"Unsupported execution provider: {ep}") + return None + + session_creation_time = (end_time - start_time) * 1000 + if logger.isEnabledFor(logging.DEBUG): + self.display_model_info(session, framework=ep) + logger.info(f"Session Creation Time {model_name} : {session_creation_time:.4f} ms") + logger.debug("************************************************************************************") + return session + + def get_input_and_output_names(self, session): + input_names = [node.name for node in session.get_inputs()] + output_names = [node.name for node in session.get_outputs()] + return input_names, output_names + + def get_random_input_data(self, session): + model_input_list = [] + input_data_list = [[]] + for inp_lists in input_data_list: + input_data = {} + for i, node in enumerate(session.get_inputs()): + input_shape = node.shape + np_dtype = self.get_input_data(node.type) + input_name = node.name + img = np.random.uniform(low=0.01, high=1.0, size=(input_shape)) + if np_dtype == np.int8: + img *= 255 + elif np_dtype == np.int16: + img *= 65535 + img = img.astype(np_dtype) + input_data[input_name] = img + model_input_list.append(input_data) + return model_input_list[0] if len(model_input_list) == 1 else model_input_list + + def get_onnx_model_input(self, session, inp_name_list, data_list): + result = {} + for sess in session.get_inputs(): + new_name = '' + input_name = sess.name + un_quant_name = input_name.replace("_dq", '') if "_dq" in input_name else input_name + if input_name in inp_name_list: + new_name = input_name + elif un_quant_name in inp_name_list: + new_name = un_quant_name + else: + logger.error(f"Session name {input_name} not in passed name list: {inp_name_list}") + return None + dtype = self.get_input_data(str(sess.type)) + result[input_name] = data_list[inp_name_list.index(new_name)].astype(dtype) + logger.debug(f"name : {input_name} {result[input_name].shape}, {type(result[input_name])} {result[input_name].dtype}") + return result + + def get_input_data(self, onnx_dtype): + if onnx_dtype == "tensor(float)": + return np.float32 + elif onnx_dtype == "tensor(float16)": + return np.float16 + elif onnx_dtype == "tensor(uint64)": + return np.uint64 + elif onnx_dtype == "tensor(int64)": + return np.int64 + elif onnx_dtype == "tensor(uint32)": + return np.uint32 + elif onnx_dtype == "tensor(int32)": + return np.int32 + else: + logger.error(f"Datatype not implemented: {onnx_dtype}") + return None + + def display_model_info(self, session, framework='CPU'): + logger.debug("Model Input:") + for node in session.get_inputs(): + logger.debug(f"name: {node.name}, shape: {node.shape}, dtype: {self.get_input_data(node.type)}") + logger.debug("Model Output:") + for node in session.get_outputs(): + logger.debug(f"name: {node.name}, shape: {node.shape}, dtype: {self.get_input_data(node.type)}") + + def execute(self, session, input_data, output_name=None): + start_time = time.time() + output = session.run(output_name, input_data) + end_time = time.time() + enc_time = (end_time - start_time) * 1000 + return output, enc_time diff --git a/GenAI-Solutions/GenAI-Studio/Text-To-Speech/meloTTS/meloTTS_app.py b/GenAI-Solutions/GenAI-Studio/Text-To-Speech/meloTTS/meloTTS_app.py new file mode 100644 index 0000000..bc11b70 --- /dev/null +++ b/GenAI-Solutions/GenAI-Studio/Text-To-Speech/meloTTS/meloTTS_app.py @@ -0,0 +1,422 @@ +# --------------------------------------------------------------------- +# Copyright (c) Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- +import argparse +import numpy as np +import torch +import soundfile as sf +from melo.api import TTS +import os +import time +import nltk +try: + nltk.data.find('taggers/averaged_perceptron_tagger_eng') +except LookupError: + nltk.download('averaged_perceptron_tagger_eng') + + +torch.manual_seed(0) +np.random.seed(0) +from OnnxRunnerHelper import ONNXRunner +MAX_SEQ_LEN = 512 +MAX_NUM_INPUT_IDS = 50 +NUM_BLOCKS =4 +MAX_DEC_SEQ_LEN = 40 +DEC_SEQ_OVERLAP = 12 +UPSAMPLE_FACTOR = 512 +BUF_DEBUG = 1 +model = None +language = None + +import logging +from flask import Flask, request, send_file, jsonify + +logger = logging.getLogger("meloTTS") +app = Flask(__name__) + +if not logger.handlers: + handler = logging.StreamHandler() + formatter = logging.Formatter('%(asctime)s - %(module)s:%(lineno)d - %(levelname)s - %(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + +logger.setLevel(logging.INFO) + + + +def generate_path(duration, mask): + b, _, t_y, t_x = mask.shape + cum_duration = np.cumsum(duration, axis=-1) + time_indices = np.arange(t_y)[None, None, None, :] + cum_duration_broadcast = cum_duration[:, :, :, None] + path = (time_indices < cum_duration_broadcast).astype(np.float32) + path = np.diff(path, axis=2, prepend=0) + path = path.transpose(0,1,3,2) + return path * mask + +class OnnxTTS: + + def __init__(self, encoder_model_path, flow_model_path, decoder_model_path, + charsiu_encoder_path=None,charsiu_decoder_path=None,bert_path=None, + language="ENGLISH",ep='cpu',backend_path=None,debug=False, + gen_ctx=None): + self.ep =ep + self.backend_path =backend_path + self.debug=debug + + self.generate_context =gen_ctx + # self.enable_g2p =enable_g2p + self.ort_runer = ONNXRunner() + + self.encoder = self.ort_runer.create_ort_session(encoder_model_path,"Encoder",self.ep,self.backend_path,self.debug) + self.flow = self.ort_runer.create_ort_session(flow_model_path,"Flow",self.ep,self.backend_path,self.debug) + self.decoder = self.ort_runer.create_ort_session(decoder_model_path,"Decoder",self.ep,self.backend_path,self.debug) + + self.encoder_input_names,self.encoder_output_names =self.ort_runer.get_input_and_output_names(self.encoder) + self.decoder_input_names,self.decoder_output_names =self.ort_runer.get_input_and_output_names(self.decoder) + self.flow_input_names,self.flow_output_names =self.ort_runer.get_input_and_output_names(self.flow) + + self.enable_bert =False + if language == "ENGLISH": + self.TTS_language = "EN_NEWEST" + self.enable_bert =True + elif language == "SPANISH": + self.TTS_language = "ES" + # elif language == "CHINESE": + # self.TTS_language = "ZH" + + self.tts = TTS(language=self.TTS_language, device="cpu") + self.language = language + self.max_seq_len = MAX_SEQ_LEN + self.get_text_for_tts_infer =None + if self.enable_bert: + from melo.text import cleaned_text_to_sequence + from melo.text.cleaner import clean_text + from melo import commons + from transformers import AutoTokenizer + self.text_normalize = None + if language =="ENGLISH": + from melo.text.english import text_normalize + self.text_normalize =text_normalize + # elif language == "CHINESE": + # from melo.text.chinese import text_normalize + # self.text_normalize =text_normalize + self.commons =commons + self.model_id = 'bert-base-uncased' + self.sent_tokenizer = AutoTokenizer.from_pretrained(self.model_id) + self.clean_text = clean_text + self.cleaned_text_to_sequence = cleaned_text_to_sequence + self.bert = self.ort_runer.create_ort_session(bert_path,"BERT",self.ep,self.backend_path,self.debug) + self.bart_input_names,self.bart_output_names = self.ort_runer.get_input_and_output_names(self.bert) + else: + from melo.utils import get_text_for_tts_infer + self.get_text_for_tts_infer = get_text_for_tts_infer + + + + def get_bert_feature(self,text, word2ph): + logger.debug(f"text : {text}") + inputs = self.sent_tokenizer(text, padding='max_length', max_length=200, return_tensors="pt") + inputs = {k: v.numpy() for k, v in inputs.items()} + bert_input = self.ort_runer.get_onnx_model_input(self.bert, list(inputs.keys()),list(inputs.values())) + res, self.bert_exe_time = self.ort_runer.execute(self.bert,bert_input) + res = torch.tensor(res[0].squeeze() ) + logger.debug("res : ",res.shape) + logger.debug(len(word2ph)) + logger.debug("word2ph : ",word2ph) + word2phone = word2ph + phone_level_feature = [] + for i in range(len(word2phone)): + repeat_feature = res[i].repeat(word2phone[i], 1) + phone_level_feature.append(repeat_feature) + phone_level_feature = torch.cat(phone_level_feature, dim=0) + return phone_level_feature.T + + + # def clean_text_modify(self,text): + # norm_text = self.text_normalize(text) + # phones, tones, word2ph = self.g2p_modify(norm_text) + # return norm_text, phones, tones, word2ph + + def get_text_for_tts_infer_modify(self,text, language_str, hps, device, symbol_to_id=None): + # if self.text_normalize is None: + # norm_text, phone, tone, word2ph = self.clean_text(text, language_str) + # else: + # norm_text, phone, tone, word2ph = self.clean_text_modify(text) + norm_text, phone, tone, word2ph = self.clean_text(text, language_str) + phone, tone, language = self.cleaned_text_to_sequence(phone, tone, language_str, symbol_to_id) + + if hps.data.add_blank: + phone = self.commons.intersperse(phone, 0) + tone = self.commons.intersperse(tone, 0) + language = self.commons.intersperse(language, 0) + for i in range(len(word2ph)): + word2ph[i] = word2ph[i] * 2 + word2ph[0] += 1 + + logger.debug("hps.data : ",hps.data) + if getattr(hps.data, "disable_bert", False): + bert = torch.zeros(1024, len(phone)) + ja_bert = torch.zeros(768, len(phone)) + else: + bert = self.get_bert_feature(norm_text, word2ph) + del word2ph + assert bert.shape[-1] == len(phone), phone + + if language_str == "ZH": + bert = bert + ja_bert = torch.zeros(768, len(phone)) + elif language_str in ["JP", "EN", "ZH_MIX_EN", 'KR', 'SP', 'ES', 'FR', 'DE', 'RU']: + ja_bert = bert + bert = torch.zeros(1024, len(phone)) + else: + raise NotImplementedError() + + assert bert.shape[-1] == len( + phone + ), f"Bert seq len {bert.shape[-1]} != {len(phone)}" + + phone = torch.LongTensor(phone) + tone = torch.LongTensor(tone) + language = torch.LongTensor(language) + return bert, ja_bert, phone, tone, language + + def preprocess_text(self, text): + if self.get_text_for_tts_infer is None: + logger.debug("self.tts.language : ",self.tts.language) + bert, ja_bert, phones, tones, lang_ids = self.get_text_for_tts_infer_modify(text, self.tts.language, self.tts.hps, "cpu", self.tts.symbol_to_id) + else : + bert, ja_bert, phones, tones, lang_ids = self.get_text_for_tts_infer(text, self.tts.language, self.tts.hps, "cpu", self.tts.symbol_to_id) + + logger.debug(f"phones : {phones}, len = {len(phones)}") + logger.debug(f"tones : {tones}, len = {len(tones)}") + logger.debug(f"language : {lang_ids}, len = {len(lang_ids)}") + + phone_len = phones.size(0) + phones = torch.nn.functional.pad(phones, (0, self.max_seq_len - phones.size(0)))[:self.max_seq_len] + tones = torch.nn.functional.pad(tones, (0, self.max_seq_len - tones.size(0)))[:self.max_seq_len] + lang_ids = torch.nn.functional.pad(lang_ids, (0, self.max_seq_len - lang_ids.size(0)))[:self.max_seq_len] + bert = torch.nn.functional.pad(bert, (0, self.max_seq_len - bert.size(1), 0, 0))[:, :self.max_seq_len] + ja_bert = torch.nn.functional.pad(ja_bert, (0, self.max_seq_len - ja_bert.size(1), 0, 0))[:, :self.max_seq_len] + + return phones, tones, lang_ids, bert, ja_bert, phone_len + + def tts_to_file(self, text, speaker_id, output_path, noise_scale=0.667, length_scale=1.0, noise_scale_w=0.8, sdp_ratio=0.2): + pipe_start_time = time.time() + pre_start_time = time.time() + phones, tones, lang_ids, bert, ja_bert, phone_len = self.preprocess_text(text) + pre_exe_time = ( time.time() - pre_start_time )*1000 + + inputs = { + 'x': phones.unsqueeze(0).numpy(), + 'x_lengths': np.array([phone_len], dtype=np.int64), + 'sid': np.array([speaker_id], dtype=np.int64), + 'tone': tones.unsqueeze(0).numpy(), + 'language': lang_ids.unsqueeze(0).numpy(), + 'bert': bert.unsqueeze(0).numpy(), + 'ja_bert': ja_bert.unsqueeze(0).numpy(), + 'sdp_ratio': np.array([sdp_ratio], dtype=np.float32), + 'length_scale': np.array([length_scale], dtype=np.float32), + 'noise_scale_w': np.array([noise_scale_w], dtype=np.float32), + } + + + logger.info("Executing Encoder : ...") + encoder_input = self.ort_runer.get_onnx_model_input(self.encoder,list(inputs.keys()),list(inputs.values())) + encoder_output,enc_exe_time = self.ort_runer.execute(self.encoder,encoder_input,['y_lengths', 'x_mask', 'm_p', 'logs_p', 'g', 'w_ceil']) + y_lengths, x_mask, m_p, logs_p, g, w_ceil = encoder_output + y_mask = np.expand_dims(np.arange(MAX_SEQ_LEN*3) < y_lengths[:, None], axis=1).astype(np.float32) + attn_mask = np.expand_dims(x_mask, axis=2) * np.expand_dims(y_mask, axis=-1) + attn = generate_path(w_ceil, attn_mask) + attn_squeezed = attn.squeeze(1) + + flow_inputs = { + "m_p": m_p.astype(np.float32), + "logs_p": logs_p.astype(np.float32), + "y_mask": y_mask, + "g": g, + "attn_squeezed": attn_squeezed.astype(np.float32), + 'noise_scale': np.array([noise_scale], dtype=np.float32), + } + + logger.info("Executing Flow : ...") + flow_input = self.ort_runer.get_onnx_model_input(self.flow,list(flow_inputs.keys()),list(flow_inputs.values())) + flow_output,flow_exe_time = self.ort_runer.execute(self.flow,flow_input) + z = flow_output[0] + + decoder_inputs = { + "z" : z, + "g": g + } + i = 0 + + dec_seq_len = MAX_DEC_SEQ_LEN + dec_seq_overlap = DEC_SEQ_OVERLAP + + z_buf = np.zeros([z.shape[0], z.shape[1], MAX_DEC_SEQ_LEN + 2 * DEC_SEQ_OVERLAP]).astype(np.float32) + z_buf[:,:,:(dec_seq_len+dec_seq_overlap)] = z[:,:,:(dec_seq_len+dec_seq_overlap)] + #print(z_buf.shape) + decoder_inputs = { + "z" : z_buf, + "g": g + } + + logger.info("Executing Decoder : ...") + decoder_input = self.ort_runer.get_onnx_model_input(self.decoder,list(decoder_inputs.keys()),list(decoder_inputs.values())) + decoder_output , first_dec_exe_time = self.ort_runer.execute(self.decoder,decoder_input) + audio_chunk = decoder_output[0] + first_decoder_time =first_dec_exe_time+enc_exe_time+flow_exe_time + + upsample_factor = 512 + audio = audio_chunk.squeeze()[:dec_seq_len*upsample_factor] + total_dec_seq_len = dec_seq_len + total_dec_exection_time =first_dec_exe_time + total_token_count =0 + while (total_dec_seq_len < y_lengths): + total_token_count +=1 + z_buf = z[:,:,total_dec_seq_len-DEC_SEQ_OVERLAP:total_dec_seq_len+MAX_DEC_SEQ_LEN+DEC_SEQ_OVERLAP] + #print(z_buf.shape) + decoder_inputs = { + "z" : z_buf, + "g": g + } + + decoder_input = self.ort_runer.get_onnx_model_input(self.decoder,list(decoder_inputs.keys()),list(decoder_inputs.values())) + decoder_output , first_dec_exe_time = self.ort_runer.execute(self.decoder,decoder_input) + audio_chunk = decoder_output[0] + total_dec_exection_time +=first_dec_exe_time + #print(audio_chunk.shape) + audio_chunk = audio_chunk.squeeze()[DEC_SEQ_OVERLAP * UPSAMPLE_FACTOR:(MAX_DEC_SEQ_LEN+DEC_SEQ_OVERLAP) * UPSAMPLE_FACTOR] + audio = np.concatenate([audio, audio_chunk]) + #print(audio.shape) + total_dec_seq_len += dec_seq_len + + length = int(y_lengths[0])*512 + + + audio = audio.squeeze()[:length] + pipe_end_time = time.time() + sf.write(output_path, audio.squeeze(), samplerate=self.tts.hps.data.sampling_rate) + print(f"\n**********Summary {self.ep.upper()}***********\n") + print(f"Bert Execution Time : {self.bert_exe_time:.4f} ms") + print(f"Encoder Execution Time : {enc_exe_time:.4f}ms") + print(f"Flow Execution Time : {flow_exe_time:.4f}ms") + print(f"First Decoder Execution Time : {(first_decoder_time+enc_exe_time):.4f}ms") + print(f"Total Decoder Execution Time : {total_dec_exection_time:.4f}ms") + print(f"Total Model Execution Time : {(self.bert_exe_time+enc_exe_time+flow_exe_time+total_dec_exection_time):.4f} ms") + # print(f"Total Pipeline Execution Time : {(pipe_end_time - pipe_start_time):.4f} sec") + print(f"Total Token : {total_token_count} ") + decoder_speed_tokens_per_sec = total_token_count / (total_dec_exection_time / 1000) + print(f"Decoder speed: {decoder_speed_tokens_per_sec:.4f} token/sec") + # print(f"Preprocessing Execution Time : {pre_exe_time:.4f} sec") + + print(f"\n*****************************************************\n") + + +@app.route('/generate', methods=['POST']) +def inference(): + global language, model + data = request.get_json() + text = data.get("text", "") + if not text: + return jsonify({"error": "No text provided"}), 400 + + + result = os.path.join("result",language) + os.makedirs(result,exist_ok=True) + if not isinstance(text,list): + text =[text] + + for idx, input_text,in enumerate(text): + logger.info(f"text : {input_text}") + output_path = os.path.join(result, f"test-onnx-output_{language}.wav") + model.tts_to_file(input_text, speaker_id=0, output_path=output_path) + logger.info(f"Audio generated and saved to {output_path}") + + # Send the .wav file + return send_file( + output_path, + mimetype="audio/wav", + as_attachment=False + ) + + + + +def main(args): + global language, model + language = args.language.upper() + ep = args.ep + backend_path = args.backend_path + debug = args.debug + working_dir = args.working_dir + + if language =="ENGLISH": + text = "This is an example of text to speech using Melo for English. How does it sound?" + if ep=="cpu": + # for testing exported model + charsiu_decoder_path = os.path.join(working_dir,'models', "charsiu_decoder.onnx") + charsiu_encoder_path = os.path.join(working_dir,'models',"charsiu_encoder.onnx") + decoder_model_path = os.path.join(working_dir,'models',"decoder_ENGLISH.onnx") + encoder_model_path= os.path.join(working_dir,'models',"encoder_ENGLISH.onnx") + flow_model_path =os.path.join(working_dir,'models',"flow_ENGLISH.onnx") + bert_model_path =os.path.join(working_dir,'models',"bert_ENGLISH.onnx") + else: + charsiu_decoder_path = os.path.join(working_dir,'models',"charsiu_decoder_net_qnn_ctx.onnx") + charsiu_encoder_path = os.path.join(working_dir,'models',"charsiu_encoder_net_qnn_ctx.onnx") + decoder_model_path = os.path.join(working_dir,'models',"decoder_net_qnn_ctx.onnx") + encoder_model_path= os.path.join(working_dir,'models',"encoder_net_qnn_ctx.onnx") + flow_model_path =os.path.join(working_dir,'models',"flow_net_qnn_ctx.onnx") + bert_model_path =os.path.join(working_dir,'models',"bert_net_qnn_ctx.onnx") + elif language =="SPANISH": + text = "Este es un ejemplo de conversión de texto a voz con Melo para español. ¿Cómo suena?" + if ep=="cpu": + charsiu_decoder_path = os.path.join(working_dir,'models', "charsiu_decoder.onnx") + charsiu_encoder_path = os.path.join(working_dir,'models',"charsiu_encoder.onnx") + decoder_model_path = os.path.join(working_dir,'models',"decoder_SPANISH.onnx") + encoder_model_path= os.path.join(working_dir,'models',"encoder_SPANISH.onnx") + flow_model_path =os.path.join(working_dir,'models',"flow_SPANISH.onnx") + bert_model_path =None + else: + + charsiu_decoder_path = os.path.join(working_dir,'models', "charsiu_decoder.onnx") + charsiu_encoder_path = os.path.join(working_dir,'models',"charsiu_encoder.onnx") + bert_model_path =None + decoder_model_path = os.path.join(working_dir,'models',"decoder_net_qnn_ctx.onnx") + encoder_model_path= os.path.join(working_dir,'models',"encoder_net_qnn_ctx.onnx") + flow_model_path =os.path.join(working_dir,'models',"flow_net_qnn_ctx.onnx") + + + model_list =[charsiu_decoder_path,charsiu_encoder_path,decoder_model_path,encoder_model_path,flow_model_path,bert_model_path] + for model_file in model_list: + if model_file is not None: + if not os.path.exists: + raise FileNotFoundError(f"The specified file was not found at : {model_file}") + + + + + model = OnnxTTS(encoder_model_path, flow_model_path, decoder_model_path,charsiu_encoder_path, charsiu_decoder_path,bert_model_path, + language, # language_map[language], + ep=ep, backend_path=backend_path,debug=debug) + + logger.info(f"Language : {language}") + logger.info(f"ep : {ep}") + + logger.info(f"backend_path : {backend_path}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run MeloTTS Text-to-Speech") + parser.add_argument("-ep", choices=["cpu", "npu"], default="npu", help="Execution provider") + parser.add_argument("-t","--text", default=None, help="text to generated speech") + parser.add_argument("-l","--language", default="english", help="speech language") + # parser.add_argument('-g2p',"--enable_g2p", action="store_true", help="Enable G2P processing") + parser.add_argument("-b","--backend_path", default=None, help="Path to backend, if applicable") + parser.add_argument("-d","--debug", action="store_true", help="Enable debug mode") + parser.add_argument("-wd","--working_dir", default=os.getcwd(),help="Working directory containing models and output") + + args = parser.parse_args() + main(args) + app.run(host='0.0.0.0', port=8083) diff --git a/GenAI-Solutions/GenAI-Studio/Text-To-Speech/meloTTS/requirements.txt b/GenAI-Solutions/GenAI-Studio/Text-To-Speech/meloTTS/requirements.txt new file mode 100644 index 0000000..a5f9b7f Binary files /dev/null and b/GenAI-Solutions/GenAI-Studio/Text-To-Speech/meloTTS/requirements.txt differ diff --git a/GenAI-Solutions/GenAI-Studio/Text-To-Speech/meloTTS/run.sh b/GenAI-Solutions/GenAI-Studio/Text-To-Speech/meloTTS/run.sh new file mode 100644 index 0000000..ec70f40 --- /dev/null +++ b/GenAI-Solutions/GenAI-Studio/Text-To-Speech/meloTTS/run.sh @@ -0,0 +1,9 @@ +# --------------------------------------------------------------------- +# Copyright (c) Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- + +#! /bin/bash +. ~/miniconda3/bin/activate +conda activate py310 +python3 meloTTS_app.py \ No newline at end of file diff --git a/GenAI-Solutions/GenAI-Studio/assets/dsp_runtime.png b/GenAI-Solutions/GenAI-Studio/assets/dsp_runtime.png new file mode 100644 index 0000000..a170450 Binary files /dev/null and b/GenAI-Solutions/GenAI-Studio/assets/dsp_runtime.png differ diff --git a/GenAI-Solutions/GenAI-Studio/assets/start_genai_studio.png b/GenAI-Solutions/GenAI-Studio/assets/start_genai_studio.png new file mode 100644 index 0000000..213ca86 Binary files /dev/null and b/GenAI-Solutions/GenAI-Studio/assets/start_genai_studio.png differ diff --git a/GenAI-Solutions/GenAI-Studio/assets/stop_genai_studio.png b/GenAI-Solutions/GenAI-Studio/assets/stop_genai_studio.png new file mode 100644 index 0000000..821faa1 Binary files /dev/null and b/GenAI-Solutions/GenAI-Studio/assets/stop_genai_studio.png differ diff --git a/GenAI-Solutions/GenAI-Studio/docker-compose.yml b/GenAI-Solutions/GenAI-Studio/docker-compose.yml index 4b18412..35e3f4e 100644 --- a/GenAI-Solutions/GenAI-Studio/docker-compose.yml +++ b/GenAI-Solutions/GenAI-Studio/docker-compose.yml @@ -18,6 +18,50 @@ services: device_ids: - qualcomm.com/device=cdi-hw-acc network_mode: host + asr: + image: asr:latest + platform: arm64 + container_name: ASR + hostname: ASR + user: root + restart: always + command: bash run.sh + volumes: + - /opt/asr/:/app/Speech-To-Text/ai-engine-direct-helper/samples/python/whisper_base_en/models/ + - /usr/lib/dsp/cdsp1/libc++.so.1:/usr/lib/dsp/cdsp1/libc++.so.1 + - /usr/lib/dsp/cdsp1/libc++abi.so.1:/usr/lib/dsp/cdsp1/libc++abi.so.1 + deploy: + resources: + reservations: + devices: + - driver: cdi + device_ids: + - qualcomm.com/device=cdi-hw-acc + expose: + - 8081 + network_mode: host + text2image: + image: text2image:latest + platform: arm64 + container_name: Text2Image + hostname: Text2Image + user: root + restart: always + command: bash run.sh + volumes: + - /opt/text2img/:/app/Image-Generation/ai-engine-direct-helper/samples/python/stable_diffusion_v1_5/models/ + - /usr/lib/dsp/cdsp1/libc++.so.1:/usr/lib/dsp/cdsp1/libc++.so.1 + - /usr/lib/dsp/cdsp1/libc++abi.so.1:/usr/lib/dsp/cdsp1/libc++abi.so.1 + deploy: + resources: + reservations: + devices: + - driver: cdi + device_ids: + - qualcomm.com/device=cdi-hw-acc + expose: + - 8082 + network_mode: host text2text: image: text2text:latest @@ -39,18 +83,20 @@ services: device_ids: - qualcomm.com/device=cdi-hw-acc expose: - - 8088 + - 8088 network_mode: host - asr: - image: asr:latest + + text2speech: + image: text2speech:latest platform: arm64 - container_name: ASR - hostname: ASR + container_name: TTS + hostname: TTS user: root - restart: always command: bash run.sh + restart: always volumes: - - /opt/:/opt/ + - /opt/TTS_models/:/app/Text-To-Speech/models/ + - /opt/:/app/Text-To-Speech/result/ENGLISH/ - /usr/lib/dsp/cdsp1/libc++.so.1:/usr/lib/dsp/cdsp1/libc++.so.1 - /usr/lib/dsp/cdsp1/libc++abi.so.1:/usr/lib/dsp/cdsp1/libc++abi.so.1 deploy: @@ -61,5 +107,5 @@ services: device_ids: - qualcomm.com/device=cdi-hw-acc expose: - - 8081 + - 8083 network_mode: host diff --git a/GenAI-Solutions/GenAI-Studio/docker-run-cdi-hw-acc.json b/GenAI-Solutions/GenAI-Studio/docker-run-cdi-hw-acc.json new file mode 100644 index 0000000..95e03fd --- /dev/null +++ b/GenAI-Solutions/GenAI-Studio/docker-run-cdi-hw-acc.json @@ -0,0 +1,645 @@ +{ + "cdiVersion": "0.6.0", + "kind": "qualcomm.com/device", + "devices": [ + { + "name": "cdi-hw-acc", + "containerEdits": { + "env": [ + "XDG_RUNTIME_DIR=/run/user/1000", + "WAYLAND_DISPLAY=wayland-0", + "GST_DEBUG_NO_COLOR=1", + "GST_DEBUG=2", + "GST_PLUGIN_SCANNER=\"/usr/lib/aarch64-linux-gnu/gstreamer1.0/gstreamer-1.0/gst-plugin-scanner\"" + ], + "deviceNodes": [ + { + "path": "/dev/dri/card0" + }, + { + "path": "/dev/dri/renderD128" + }, + { + "path": "/dev/video32" + }, + { + "path": "/dev/video33" + }, + { + "path": "/dev/dma_heap/system" + }, + { + "path": "/dev/dma_heap/qcom,system" + }, + { + "path": "/dev/fastrpc-cdsp" + } + ], + "mounts": [ + { + "hostPath": "/run/user/1000", + "containerPath": "/run/user/1000", + "options": [ + "bind" + ] + }, + { + "hostPath": "/tmp/property-vault.socket", + "containerPath": "/tmp/property-vault.socket", + "options": [ + "bind" + ] + }, + { + "hostPath": "/etc/labels", + "containerPath": "/etc/labels", + "options": [ + "bind" + ] + }, + { + "hostPath": "/etc/media", + "containerPath": "/etc/media", + "options": [ + "bind" + ] + }, + { + "hostPath": "/etc/models", + "containerPath": "/etc/models", + "options": [ + "bind" + ] + }, + { + "hostPath": "/etc/configs", + "containerPath": "/etc/configs", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/gstreamer-1.0/ml", + "containerPath": "/usr/lib/aarch64-linux-gnu/gstreamer-1.0/ml", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/gstreamer-1.0/mlmetaparser", + "containerPath": "/usr/lib/aarch64-linux-gnu/gstreamer-1.0/mlmetaparser", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/pulseaudio", + "containerPath": "/usr/lib/aarch64-linux-gnu/pulseaudio", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/bin/gst-device-monitor-1.0", + "containerPath": "/usr/bin/gst-device-monitor-1.0", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/bin/gst-discoverer-1.0", + "containerPath": "/usr/bin/gst-discoverer-1.0", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/bin/gst-inspect-1.0", + "containerPath": "/usr/bin/gst-inspect-1.0", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/bin/gst-launch-1.0", + "containerPath": "/usr/bin/gst-launch-1.0", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/bin/gst-play-1.0", + "containerPath": "/usr/bin/gst-play-1.0", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/bin/gst-stats-1.0", + "containerPath": "/usr/bin/gst-stats-1.0", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/bin/gst-tester-1.0", + "containerPath": "/usr/bin/gst-tester-1.0", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/bin/gst-typefind-1.0", + "containerPath": "/usr/bin/gst-typefind-1.0", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/ao/plugins-4/libpulse.so", + "containerPath": "/usr/lib/aarch64-linux-gnu/ao/plugins-4/libpulse.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/gbm/dri_gbm.so", + "containerPath": "/usr/lib/aarch64-linux-gnu/gbm/dri_gbm.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/gstreamer-1.0/libgstcoreelements.so", + "containerPath": "/usr/lib/aarch64-linux-gnu/gstreamer-1.0/libgstcoreelements.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/gstreamer-1.0/libgstpulseaudio.so", + "containerPath": "/usr/lib/aarch64-linux-gnu/gstreamer-1.0/libgstpulseaudio.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/gstreamer-1.0/libgstvideo4linux2.so", + "containerPath": "/usr/lib/aarch64-linux-gnu/gstreamer-1.0/libgstvideo4linux2.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/libEGL.so.1", + "containerPath": "/usr/lib/aarch64-linux-gnu/libEGL.so.1", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/libGLESv2.so.2", + "containerPath": "/usr/lib/aarch64-linux-gnu/libGLESv2.so.2", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/libadsprpc.so.1", + "containerPath": "/usr/lib/aarch64-linux-gnu/libadsprpc.so.1", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/libatomic.so.1", + "containerPath": "/usr/lib/aarch64-linux-gnu/libatomic.so.1", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/libcdsprpc.so.1", + "containerPath": "/usr/lib/aarch64-linux-gnu/libcdsprpc.so.1", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/libdmabufheap.so.0", + "containerPath": "/usr/lib/aarch64-linux-gnu/libdmabufheap.so.0", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/libgallium-25.0.7-0ubuntu0.24.04.2.so", + "containerPath": "/usr/lib/aarch64-linux-gnu/libgallium-25.0.7-0ubuntu0.24.04.2.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/libgbm.so.1", + "containerPath": "/usr/lib/aarch64-linux-gnu/libgbm.so.1", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/libgstbase-1.0.so.0", + "containerPath": "/usr/lib/aarch64-linux-gnu/libgstbase-1.0.so.0", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/libgstreamer-1.0.so.0", + "containerPath": "/usr/lib/aarch64-linux-gnu/libgstreamer-1.0.so.0", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/libgstvideo-1.0.so.0", + "containerPath": "/usr/lib/aarch64-linux-gnu/libgstvideo-1.0.so.0", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/libpulse-simple.so.0", + "containerPath": "/usr/lib/aarch64-linux-gnu/libpulse-simple.so.0", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/libpulse.so.0", + "containerPath": "/usr/lib/aarch64-linux-gnu/libpulse.so.0", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/libwayland-client.so.0", + "containerPath": "/usr/lib/aarch64-linux-gnu/libwayland-client.so.0", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/libwayland-egl.so.1", + "containerPath": "/usr/lib/aarch64-linux-gnu/libwayland-egl.so.1", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libPlatformValidatorShared.so", + "containerPath": "/usr/lib/libPlatformValidatorShared.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnChrometraceProfilingReader.so", + "containerPath": "/usr/lib/libQnnChrometraceProfilingReader.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnCpu.so", + "containerPath": "/usr/lib/libQnnCpu.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnCpuNetRunExtensions.so", + "containerPath": "/usr/lib/libQnnCpuNetRunExtensions.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnDsp.so", + "containerPath": "/usr/lib/libQnnDsp.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnDspNetRunExtensions.so", + "containerPath": "/usr/lib/libQnnDspNetRunExtensions.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnGenAiTransformer.so", + "containerPath": "/usr/lib/libQnnGenAiTransformer.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnGenAiTransformerCpuOpPkg.so", + "containerPath": "/usr/lib/libQnnGenAiTransformerCpuOpPkg.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnGenAiTransformerModel.so", + "containerPath": "/usr/lib/libQnnGenAiTransformerModel.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnGpu.so", + "containerPath": "/usr/lib/libQnnGpu.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnGpuNetRunExtensions.so", + "containerPath": "/usr/lib/libQnnGpuNetRunExtensions.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnGpuProfilingReader.so", + "containerPath": "/usr/lib/libQnnGpuProfilingReader.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnHta.so", + "containerPath": "/usr/lib/libQnnHta.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnHtaNetRunExtensions.so", + "containerPath": "/usr/lib/libQnnHtaNetRunExtensions.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnHtp.so", + "containerPath": "/usr/lib/libQnnHtp.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnHtpNetRunExtensions.so", + "containerPath": "/usr/lib/libQnnHtpNetRunExtensions.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnHtpOptraceProfilingReader.so", + "containerPath": "/usr/lib/libQnnHtpOptraceProfilingReader.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnHtpPrepare.so", + "containerPath": "/usr/lib/libQnnHtpPrepare.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnHtpProfilingReader.so", + "containerPath": "/usr/lib/libQnnHtpProfilingReader.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnHtpV73CalculatorStub.so", + "containerPath": "/usr/lib/libQnnHtpV73CalculatorStub.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnHtpV73Stub.so", + "containerPath": "/usr/lib/libQnnHtpV73Stub.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnIr.so", + "containerPath": "/usr/lib/libQnnIr.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnJsonProfilingReader.so", + "containerPath": "/usr/lib/libQnnJsonProfilingReader.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnModelDlc.so", + "containerPath": "/usr/lib/libQnnModelDlc.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnSaver.so", + "containerPath": "/usr/lib/libQnnSaver.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnSystem.so", + "containerPath": "/usr/lib/libQnnSystem.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libQnnTFLiteDelegate.so", + "containerPath": "/usr/lib/libQnnTFLiteDelegate.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libSNPE.so", + "containerPath": "/usr/lib/libSNPE.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libSnpeDspStub.so", + "containerPath": "/usr/lib/libSnpeDspStub.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libSnpeHta.so", + "containerPath": "/usr/lib/libSnpeHta.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libSnpeHtpPrepare.so", + "containerPath": "/usr/lib/libSnpeHtpPrepare.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libSnpeHtpV73CalculatorStub.so", + "containerPath": "/usr/lib/libSnpeHtpV73CalculatorStub.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libSnpeHtpV73Stub.so", + "containerPath": "/usr/lib/libSnpeHtpV73Stub.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libhta_hexagon_runtime.so", + "containerPath": "/usr/lib/libhta_hexagon_runtime.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/libhta_hexagon_runtime_snpe.so", + "containerPath": "/usr/lib/libhta_hexagon_runtime_snpe.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/rfsa/adsp/hexagon-v73/libCalculator_skel.so", + "containerPath": "/usr/lib/rfsa/adsp/hexagon-v73/libCalculator_skel.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/rfsa/adsp/hexagon-v73/libQnnSaver.so", + "containerPath": "/usr/lib/rfsa/adsp/hexagon-v73/libQnnSaver.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/rfsa/adsp/hexagon-v73/libQnnSystem.so", + "containerPath": "/usr/lib/rfsa/adsp/hexagon-v73/libQnnSystem.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/rfsa/adsp/libCalculator_skel.so", + "containerPath": "/usr/lib/rfsa/adsp/libCalculator_skel.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/rfsa/adsp/libQnnHtpV73.so", + "containerPath": "/usr/lib/rfsa/adsp/libQnnHtpV73.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/rfsa/adsp/libQnnHtpV73QemuDriver.so", + "containerPath": "/usr/lib/rfsa/adsp/libQnnHtpV73QemuDriver.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/rfsa/adsp/libQnnHtpV73Skel.so", + "containerPath": "/usr/lib/rfsa/adsp/libQnnHtpV73Skel.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/rfsa/adsp/libQnnSaver.so", + "containerPath": "/usr/lib/rfsa/adsp/libQnnSaver.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/rfsa/adsp/libQnnSystem.so", + "containerPath": "/usr/lib/rfsa/adsp/libQnnSystem.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/rfsa/adsp/libSnpeHtpV73Skel.so", + "containerPath": "/usr/lib/rfsa/adsp/libSnpeHtpV73Skel.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/rfsa/adsp/libqnnhtpv73.cat", + "containerPath": "/usr/lib/rfsa/adsp/libqnnhtpv73.cat", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/rfsa/adsp/libsnpehtpv73.cat", + "containerPath": "/usr/lib/rfsa/adsp/libsnpehtpv73.cat", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/libcdsprpc.so", + "containerPath": "/usr/lib/aarch64-linux-gnu/libcdsprpc.so", + "options": [ + "bind" + ] + }, + { + "hostPath": "/usr/lib/aarch64-linux-gnu/libadsprpc.so", + "containerPath": "/usr/lib/aarch64-linux-gnu/libadsprpc.so", + "options": [ + "bind" + ] + } + ] + } + } + ] +} diff --git a/GenAI-Solutions/GenAI-Studio/web-ui/README.md b/GenAI-Solutions/GenAI-Studio/web-ui/README.md deleted file mode 100644 index dc5b982..0000000 --- a/GenAI-Solutions/GenAI-Studio/web-ui/README.md +++ /dev/null @@ -1,32 +0,0 @@ -# Client - -1. Create docker image - ``` - docker buildx build --platform=linux/arm64/v8 --load --output type=docker -t web-ui . --no-cache - ``` -2. Save docker image - ``` - docker save web-ui -o web-ui - ``` -3. Push docker image to device -4. Load image - ``` - docker load -i web-ui - ``` -5. ``` - docker run --name web-ui --net host -d web-ui - ``` -6. Check logs - ``` - docker logs -f web-ui - ``` -7. Check device IP - ``` - adb shell "ifconfig" - ``` -8. Open "" to access webpage - ``` - http://10.92.211.143:8501/ - ``` - -9. You can check more about [streamlit](https://streamlit.io/) diff --git a/GenAI-Solutions/GenAI-Studio/web-ui/app.py b/GenAI-Solutions/GenAI-Studio/web-ui/app.py index 5afec8d..b56d44b 100644 --- a/GenAI-Solutions/GenAI-Studio/web-ui/app.py +++ b/GenAI-Solutions/GenAI-Studio/web-ui/app.py @@ -8,7 +8,7 @@ st.set_page_config(page_title="Main Page", layout="wide") st.title("GenAI Studio") -col1, col2, col3 = st.columns(3) +col1, col2, col3, col4 = st.columns(4) with col1: st.image("static/assets/ASR.png", use_container_width=True) if st.button("\U0001F399 Automatic speech recognition (ASR)"): @@ -20,4 +20,8 @@ with col3: st.image("static/assets/Text2Text.png", use_container_width=True) if st.button("\U0001F4DD Text2Text"): - st.switch_page("pages/Text2Text.py") \ No newline at end of file + st.switch_page("pages/Text2Text.py") +with col4: + st.image("static/assets/Text2Speech.png", use_container_width=True) + if st.button("\U0001F4DD Text2Speech"): + st.switch_page("pages/Text2Speech.py") \ No newline at end of file diff --git a/GenAI-Solutions/GenAI-Studio/web-ui/pages/Text2Speech.py b/GenAI-Solutions/GenAI-Studio/web-ui/pages/Text2Speech.py new file mode 100644 index 0000000..0243831 --- /dev/null +++ b/GenAI-Solutions/GenAI-Studio/web-ui/pages/Text2Speech.py @@ -0,0 +1,66 @@ +# --------------------------------------------------------------------- +# Copyright (c) Qualcomm Innovation Center, Inc. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# --------------------------------------------------------------------- + +import streamlit as st +import requests +import io + +# Set page config +st.set_page_config(page_title="GenAI Studio") + +# Page-specific key prefix +PAGE_KEY = "tts" + +# Sidebar layout +with st.sidebar: + st.title('GenAI Studio') + st.subheader('Text to Audio') + st.markdown('More info [doc](https://docs.qualcomm.com/bundle/publicresource/topics/80-70018-115/qualcomm-linux-docs-home.html?vproduct=1601111740013072&version=1.4)!') + +# Server URL for TTS +server_url = "http://0.0.0.0:8083/generate" + +# Initialize chat history +if f"{PAGE_KEY}_messages" not in st.session_state: + st.session_state[f"{PAGE_KEY}_messages"] = [] + +# Display history +st.subheader("Text-to-Speech") +for message in st.session_state[f"{PAGE_KEY}_messages"]: + with st.chat_message("user"): + st.write(message["text"]) + with st.chat_message("assistant"): + st.audio(message["audio"], format="audio/mp3") + st.markdown("---") + +# Text input +text_input = st.text_area("Enter text to convert to speech") + +# Button to trigger TTS +if st.button("🔊 Generate Audio"): + if text_input.strip(): + with st.spinner("Generating audio..."): + try: + # Send POST request with text + response = requests.post(server_url, json={"text": text_input}) + + if response.status_code == 200: + # Convert response content to BytesIO for playback + audio_bytes = io.BytesIO(response.content) + + # Save to history + st.session_state[f"{PAGE_KEY}_messages"].append({ + "text": text_input, + "audio": audio_bytes + }) + + st.success("Audio generated successfully!") + st.audio(audio_bytes, format="audio/mp3") # or "audio/wav" depending on backend + else: + st.error(f"Error: {response.status_code} - {response.text}") + except Exception as e: + st.error(f"Request failed: {e}") + else: + st.warning("Please enter some text.") diff --git a/GenAI-Solutions/GenAI-Studio/web-ui/static/assets/ASR.png b/GenAI-Solutions/GenAI-Studio/web-ui/static/assets/ASR.png index 4d08e35..4825749 100644 Binary files a/GenAI-Solutions/GenAI-Studio/web-ui/static/assets/ASR.png and b/GenAI-Solutions/GenAI-Studio/web-ui/static/assets/ASR.png differ diff --git a/GenAI-Solutions/GenAI-Studio/web-ui/static/assets/Text2Image.png b/GenAI-Solutions/GenAI-Studio/web-ui/static/assets/Text2Image.png index 606f77e..4d2d4fd 100644 Binary files a/GenAI-Solutions/GenAI-Studio/web-ui/static/assets/Text2Image.png and b/GenAI-Solutions/GenAI-Studio/web-ui/static/assets/Text2Image.png differ diff --git a/GenAI-Solutions/GenAI-Studio/web-ui/static/assets/Text2Speech.png b/GenAI-Solutions/GenAI-Studio/web-ui/static/assets/Text2Speech.png new file mode 100644 index 0000000..1174fe9 Binary files /dev/null and b/GenAI-Solutions/GenAI-Studio/web-ui/static/assets/Text2Speech.png differ diff --git a/GenAI-Solutions/GenAI-Studio/web-ui/static/assets/Text2Text.png b/GenAI-Solutions/GenAI-Studio/web-ui/static/assets/Text2Text.png index 09b5be8..05bc2a6 100644 Binary files a/GenAI-Solutions/GenAI-Studio/web-ui/static/assets/Text2Text.png and b/GenAI-Solutions/GenAI-Studio/web-ui/static/assets/Text2Text.png differ