From e51105c3afd0020a1518462d5f8c6e63135beba7 Mon Sep 17 00:00:00 2001 From: Moaz0009 Date: Sat, 1 Mar 2025 17:46:33 +0200 Subject: [PATCH] Added support for NumPy array input alongside .wav files --- klaam/utils/utils.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/klaam/utils/utils.py b/klaam/utils/utils.py index 526e62c..c172160 100644 --- a/klaam/utils/utils.py +++ b/klaam/utils/utils.py @@ -1,5 +1,6 @@ import librosa import torch +import numpy as np from klaam.external.FastSpeech2.buckwalter import bw2ar @@ -8,7 +9,15 @@ def load_file_to_data(file, srate=16_000): batch = {} - speech, sampling_rate = librosa.load(file, sr=srate) + + if isinstance(file, str): # If it's a file path + speech, sampling_rate = librosa.load(file, sr=srate) + elif isinstance(file, np.ndarray): # If it's a NumPy array + speech = file + sampling_rate = srate + else: + raise TypeError(f"Unsupported input type: {type(file)}. Expected str (file path) or np.ndarray.") + batch["speech"] = speech batch["sampling_rate"] = sampling_rate return batch