Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 29 additions & 15 deletions src/Providers/Gemini/Gemini.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,33 @@
namespace Prism\Prism\Providers\Gemini;

use Generator;
use Illuminate\Http\Client\PendingRequest;
use Illuminate\Http\Client\RequestException;
use Prism\Prism\Concerns\InitializesClient;
use Prism\Prism\Contracts\Message;
use Prism\Prism\Embeddings\Request as EmbeddingRequest;
use Prism\Prism\Embeddings\Response as EmbeddingResponse;
use Prism\Prism\Providers\Provider;
use Prism\Prism\Audio\AudioResponse as TextToSpeechResponse;
use Illuminate\Http\Client\PendingRequest;
use Prism\Prism\Audio\TextToSpeechRequest;
use Prism\Prism\Exceptions\PrismException;
use Prism\Prism\Exceptions\PrismProviderOverloadedException;
use Prism\Prism\Exceptions\PrismRateLimitedException;
use Prism\Prism\Concerns\InitializesClient;
use Illuminate\Http\Client\RequestException;
use Prism\Prism\Text\Request as TextRequest;
use Prism\Prism\Text\Response as TextResponse;
use Prism\Prism\Providers\Gemini\Handlers\Text;
use Prism\Prism\Images\Request as ImagesRequest;
use Prism\Prism\Images\Response as ImagesResponse;
use Prism\Prism\Providers\Gemini\Handlers\Audio;
use Prism\Prism\Providers\Gemini\Handlers\Cache;
use Prism\Prism\Providers\Gemini\Handlers\Embeddings;
use Prism\Prism\Providers\Gemini\Handlers\Images;
use Prism\Prism\Providers\Gemini\Handlers\Stream;
use Prism\Prism\Images\Response as ImagesResponse;
use Prism\Prism\ValueObjects\Messages\SystemMessage;
use Prism\Prism\Exceptions\PrismRateLimitedException;
use Prism\Prism\Providers\Gemini\Handlers\Embeddings;
use Prism\Prism\Providers\Gemini\Handlers\Structured;
use Prism\Prism\Providers\Gemini\Handlers\Text;
use Prism\Prism\Providers\Gemini\ValueObjects\GeminiCachedObject;
use Prism\Prism\Providers\Provider;
use Prism\Prism\Embeddings\Request as EmbeddingRequest;
use Prism\Prism\Structured\Request as StructuredRequest;
use Prism\Prism\Embeddings\Response as EmbeddingResponse;
use Prism\Prism\Structured\Response as StructuredResponse;
use Prism\Prism\Text\Request as TextRequest;
use Prism\Prism\Text\Response as TextResponse;
use Prism\Prism\ValueObjects\Messages\SystemMessage;
use Prism\Prism\Exceptions\PrismProviderOverloadedException;
use Prism\Prism\Providers\Gemini\ValueObjects\GeminiCachedObject;

class Gemini extends Provider
{
Expand Down Expand Up @@ -83,6 +86,17 @@ public function images(ImagesRequest $request): ImagesResponse
return $handler->handle($request);
}

#[\Override]
public function textToSpeech(TextToSpeechRequest $request): TextToSpeechResponse
{
$handler = new Audio($this->client(
$request->clientOptions(),
$request->clientRetry()
));

return $handler->handleTextToSpeech($request);
}

#[\Override]
public function stream(TextRequest $request): Generator
{
Expand Down
39 changes: 39 additions & 0 deletions src/Providers/Gemini/Handlers/Audio.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<?php

namespace Prism\Prism\Providers\Gemini\Handlers;

use Exception;
use Prism\Prism\Audio\AudioResponse;
use Illuminate\Http\Client\PendingRequest;
use Prism\Prism\Audio\TextToSpeechRequest;
use Prism\Prism\ValueObjects\GeneratedAudio;
use Prism\Prism\Providers\Gemini\Concerns\ValidatesResponse;

class Audio
{
use ValidatesResponse;

public function __construct(protected PendingRequest $client) {}

public function handleTextToSpeech(TextToSpeechRequest $request): AudioResponse
{
$mapper = new TextToSpeechRequestMapper($request);

$response = $this->client->post("{$request->model()}:generateContent", $mapper->toPayload());

if (! $response->successful()) {
throw new Exception('Failed to generate audio: '.$response->body());
}

$data = $response->json();

$base64Audio = $data['candidates'][0]['content']['parts'][0]['inlineData']['data']
?? throw new Exception('No audio data returned from TTS API');

return new AudioResponse(
audio: new GeneratedAudio(
base64: $base64Audio,
),
);
}
}
122 changes: 122 additions & 0 deletions src/Providers/Gemini/Handlers/TextToSpeechRequestMapper.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
<?php

namespace Prism\Prism\Providers\Gemini\Handlers;

use Illuminate\Support\Arr;
use Prism\Prism\Enums\Provider;
use Prism\Prism\Audio\TextToSpeechRequest;

class TextToSpeechRequestMapper
{
public function __construct(
public readonly TextToSpeechRequest $request
) {}

/**
* @return array<string, mixed>
*/
public function toPayload(): array
{
$providerOptions = $this->request->providerOptions();

$contents = [
'parts' => [
[
'text' => $this->request->input(),
],
],
];

$baseData = [
'model' => $this->request->model(),
'contents' => [$contents],
];

$speechConfig = $this->buildSpeechConfig($providerOptions);

$generationConfig = Arr::whereNotNull([
'responseModalities' => $providerOptions['responseModalities'] ?? ['AUDIO'],
'speechConfig' => $speechConfig !== [] ? $speechConfig : null,
]);

$supportedOptions = Arr::whereNotNull([
'generationConfig' => $generationConfig !== [] ? $generationConfig : null,
]);

return array_merge(
$baseData,
$supportedOptions,
);
}


/**
* @param array<string, mixed> $providerOptions
* @return array<string, mixed>
*/
protected function buildSpeechConfig(array $providerOptions): array
{
if (isset($providerOptions['multiSpeaker']) && is_array($providerOptions['multiSpeaker'])) {
$multiSpeakerConfig = $this->buildMultiSpeakerConfig($providerOptions['multiSpeaker']);

if ($multiSpeakerConfig !== []) {
return $multiSpeakerConfig;
}
}

if ($this->request->voice()) {
return $this->buildSingleVoiceConfig($this->request->voice());
}

return [];
}

/**
* @return array<string, array<string, array<string, string>>>
*/
protected function buildSingleVoiceConfig(string $voiceName): array
{
return [
'voiceConfig' => [
'prebuiltVoiceConfig' => [
'voiceName' => $voiceName,
],
],
];
}

/**
* @param array<int, array{speaker?: string, voiceName?: string}> $speakers
* @return array<string, mixed>
*/
protected function buildMultiSpeakerConfig(array $speakers): array
{
$speakerVoiceConfigs = [];

foreach ($speakers as $speaker) {
if (!isset($speaker['speaker']) || !isset($speaker['voiceName'])) {
continue;
}

$speakerVoiceConfigs[] = [
'speaker' => $speaker['speaker'],
'voiceConfig' => [
'prebuiltVoiceConfig' => [
'voiceName' => $speaker['voiceName'],
],
],
];
}

return $speakerVoiceConfigs !== [] ? [
'multiSpeakerVoiceConfig' => [
'speakerVoiceConfigs' => $speakerVoiceConfigs,
],
] : [];
}

protected function provider(): string|Provider
{
return Provider::Gemini;
}
}
16 changes: 16 additions & 0 deletions tests/Fixtures/gemini/tts-flash-1-1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"candidates": [
{
"content": {
"parts": [
{
"inlineData": {
"mimeType": "audio/wav",
"data": "UklGRiQAAABXQVZFZm10IBAAAAABAAEAQB8AAEAfAAABAAgAZGF0YQAAAAA="
}
}
]
}
}
]
}
16 changes: 16 additions & 0 deletions tests/Fixtures/gemini/tts-flash-multi-speaker-1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"candidates": [
{
"content": {
"parts": [
{
"inlineData": {
"mimeType": "audio/wav",
"data": "UklGRiQAAABXQVZFZm10IBAAAAABAAEAQB8AAEAfAAABAAgAZGF0YQAAAAA="
}
}
]
}
}
]
}
16 changes: 16 additions & 0 deletions tests/Fixtures/gemini/tts-flash-voice-option-1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"candidates": [
{
"content": {
"parts": [
{
"inlineData": {
"mimeType": "audio/wav",
"data": "UklGRiQAAABXQVZFZm10IBAAAAABAAEAQB8AAEAfAAABAAgAZGF0YQAAAAA="
}
}
]
}
}
]
}
16 changes: 16 additions & 0 deletions tests/Fixtures/gemini/tts-pro-1-1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"candidates": [
{
"content": {
"parts": [
{
"inlineData": {
"mimeType": "audio/wav",
"data": "UklGRiQAAABXQVZFZm10IBAAAAABAAEAQB8AAEAfAAABAAgAZGF0YQAAAAA="
}
}
]
}
}
]
}
16 changes: 16 additions & 0 deletions tests/Fixtures/gemini/tts-pro-multi-speaker-1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"candidates": [
{
"content": {
"parts": [
{
"inlineData": {
"mimeType": "audio/wav",
"data": "UklGRiQAAABXQVZFZm10IBAAAAABAAEAQB8AAEAfAAABAAgAZGF0YQAAAAA="
}
}
]
}
}
]
}
16 changes: 16 additions & 0 deletions tests/Fixtures/gemini/tts-pro-voice-option-1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"candidates": [
{
"content": {
"parts": [
{
"inlineData": {
"mimeType": "audio/wav",
"data": "UklGRiQAAABXQVZFZm10IBAAAAABAAEAQB8AAEAfAAABAAgAZGF0YQAAAAA="
}
}
]
}
}
]
}
Loading