Skip to content

Commit c1b03cf

Browse files
add vibevoice snippet
1 parent e841a53 commit c1b03cf

File tree

2 files changed

+31
-0
lines changed

2 files changed

+31
-0
lines changed

packages/tasks/src/model-libraries-snippets.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1684,6 +1684,29 @@ image = sana(
16841684
) `,
16851685
];
16861686

1687+
export const vibevoice = (model: ModelData): string[] => [
1688+
`import torch, soundfile as sf, librosa, numpy as np
1689+
from vibevoice.processor.vibevoice_processor import VibeVoiceProcessor
1690+
from vibevoice.modular.modeling_vibevoice_inference import VibeVoiceForConditionalGenerationInference
1691+
1692+
# Load voice sample (should be 24kHz mono)
1693+
voice, sr = sf.read("path/to/voice_sample.wav")
1694+
if voice.ndim > 1: voice = voice.mean(axis=1)
1695+
if sr != 24000: voice = librosa.resample(voice, sr, 24000)
1696+
1697+
processor = VibeVoiceProcessor.from_pretrained("${model.id}")
1698+
model = VibeVoiceForConditionalGenerationInference.from_pretrained(
1699+
"${model.id}", torch_dtype=torch.bfloat16
1700+
).to("cuda").eval()
1701+
model.set_ddpm_inference_steps(5)
1702+
1703+
inputs = processor(text=["Speaker 0: Hello!\\nSpeaker 1: Hi there!"],
1704+
voice_samples=[[voice]], return_tensors="pt")
1705+
audio = model.generate(**inputs, cfg_scale=1.3,
1706+
tokenizer=processor.tokenizer).speech_outputs[0]
1707+
sf.write("output.wav", audio.cpu().numpy().squeeze(), 24000)`,
1708+
];
1709+
16871710
export const videoprism = (model: ModelData): string[] => [
16881711
`# Install from https://github.com/google-deepmind/videoprism
16891712
import jax

packages/tasks/src/model-libraries.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,6 +1170,14 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
11701170
countDownloads: `path_extension:"pt"`,
11711171
snippets: snippets.vui,
11721172
},
1173+
vibevoice: {
1174+
prettyLabel: "VibeVoice",
1175+
repoName: "VibeVoice",
1176+
repoUrl: "https://github.com/microsoft/VibeVoice",
1177+
snippets: snippets.vibevoice,
1178+
filter: false,
1179+
countDownloads: `path:"pytorch_model.bin" OR path:"model.safetensors"`,
1180+
},
11731181
"wan2.2": {
11741182
prettyLabel: "Wan2.2",
11751183
repoName: "Wan2.2",

0 commit comments

Comments
 (0)