diff --git a/README.md b/README.md
index 17b8b42..59110ac 100644
--- a/README.md
+++ b/README.md
@@ -78,6 +78,32 @@ const App = () => {
## Language Model
+### Model Options
+
+Choose model quantization and NPU acceleration with Pro models.
+
+```typescript
+import { CactusLM } from 'cactus-react-native';
+
+// Use int4 for faster performance and smaller file size
+const cactusLM = new CactusLM({
+ model: 'lfm2-vl-450m',
+ options: {
+ quantization: 'int4', // 'int4' or 'int8'
+ pro: false
+ }
+});
+
+// Use pro models for NPU acceleration
+const cactusPro = new CactusLM({
+ model: 'lfm2-vl-450m',
+ options: {
+ quantization: 'int4',
+ pro: true
+ }
+});
+```
+
### Completion
Generate text responses from the model by providing a conversation history.
@@ -559,6 +585,60 @@ const App = () => {
};
```
+### Streaming Transcription
+
+Transcribe audio in real-time with incremental results.
+
+#### Class
+
+```typescript
+import { CactusSTT } from 'cactus-react-native';
+
+const cactusSTT = new CactusSTT({ model: 'whisper-small' });
+
+await cactusSTT.streamTranscribeInit();
+
+const audioChunk: number[] = [/* PCM samples */];
+await cactusSTT.streamTranscribeInsert({ audio: audioChunk });
+
+const result = await cactusSTT.streamTranscribeProcess({
+ options: { confirmationThreshold: 0.95 }
+});
+
+console.log('Confirmed:', result.confirmed);
+console.log('Pending:', result.pending);
+
+const final = await cactusSTT.streamTranscribeFinalize();
+await cactusSTT.streamTranscribeDestroy();
+```
+
+#### Hook
+
+```tsx
+import { useCactusSTT } from 'cactus-react-native';
+
+const App = () => {
+ const cactusSTT = useCactusSTT({ model: 'whisper-small' });
+
+ const handleStream = async () => {
+ await cactusSTT.streamTranscribeInit();
+
+ const audioChunk: number[] = [/* PCM samples */];
+ await cactusSTT.streamTranscribeInsert({ audio: audioChunk });
+
+ await cactusSTT.streamTranscribeProcess();
+ };
+
+ return (
+ <>
+
+ {cactusSTT.streamTranscribeConfirmed}
+ {cactusSTT.streamTranscribePending}
+ >
+ );
+};
+```
+
### Audio Embedding
Generate embeddings from audio files for audio understanding.
@@ -854,9 +934,12 @@ const App = () => {
**`new CactusLM(params?: CactusLMParams)`**
**Parameters:**
-- `model` - Model slug or absolute path to Cactus model (default: `'qwen3-0.6'`).
+- `model` - Model slug or absolute path to Cactus model (default: `'qwen3-0.6b'`).
- `contextSize` - Context window size (default: `2048`).
- `corpusDir` - Directory containing text files for RAG (default: `undefined`).
+- `options` - Model options for quantization and NPU acceleration:
+ - `quantization` - Quantization type: `'int4'` | `'int8'` (default: `'int4'`).
+ - `pro` - Enable NPU-accelerated models (default: `false`).
#### Methods
@@ -932,13 +1015,13 @@ Resets the model's internal state, clearing any cached context. Automatically ca
Releases all resources associated with the model. Automatically calls `stop()` first. Safe to call even if the model is not initialized.
-**`getModels(): Promise`**
+**`getModels(): CactusModel[]`**
-Fetches available models from the database and checks their download status.
+Returns available models.
### useCactusLM Hook
-The `useCactusLM` hook manages a `CactusLM` instance with reactive state. When model parameters (`model`, `contextSize`, or `corpusDir`) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
+The `useCactusLM` hook manages a `CactusLM` instance with reactive state. When model parameters (`model`, `contextSize`, `corpusDir`, `options`) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
#### State
@@ -962,7 +1045,7 @@ The `useCactusLM` hook manages a `CactusLM` instance with reactive state. When m
- `stop(): Promise` - Stops ongoing generation. Clears any errors.
- `reset(): Promise` - Resets the model's internal state, clearing cached context. Also clears the `completion` state.
- `destroy(): Promise` - Releases all resources associated with the model. Clears the `completion` state. Automatically called when the component unmounts.
-- `getModels(): Promise` - Fetches available models from the database and checks their download status.
+- `getModels(): CactusModel[]` - Returns available models.
### CactusSTT Class
@@ -971,8 +1054,11 @@ The `useCactusLM` hook manages a `CactusLM` instance with reactive state. When m
**`new CactusSTT(params?: CactusSTTParams)`**
**Parameters:**
-- `model` - Model slug or absolute path to Cactus model (default: `'qwen3-0.6'`).
+- `model` - Model slug or absolute path to Cactus model (default: `'whisper-small'`).
- `contextSize` - Context window size (default: `2048`).
+- `options` - Model options for quantization and NPU acceleration:
+ - `quantization` - Quantization type: `'int4'` | `'int8'` (default: `'int4'`).
+ - `pro` - Enable NPU-accelerated models (default: `false`).
#### Methods
@@ -1009,6 +1095,33 @@ Generates embeddings for the given audio file. Automatically calls `init()` if n
**Parameters:**
- `audioPath` - Path to the audio file.
+**`streamTranscribeInit(): Promise`**
+
+Initializes a streaming transcription session. Automatically calls `init()` if not already initialized.
+
+**`streamTranscribeInsert(params: CactusSTTStreamTranscribeInsertParams): Promise`**
+
+Inserts PCM audio samples into the streaming buffer.
+
+**Parameters:**
+- `audio` - Array of PCM audio samples.
+
+**`streamTranscribeProcess(params?: CactusSTTStreamTranscribeProcessParams): Promise`**
+
+Processes accumulated audio and returns incremental transcription results.
+
+**Parameters:**
+- `options` - Processing options:
+ - `confirmationThreshold` - Confidence threshold for confirming text.
+
+**`streamTranscribeFinalize(): Promise`**
+
+Finalizes the streaming session and returns remaining transcription text.
+
+**`streamTranscribeDestroy(): Promise`**
+
+Destroys the streaming session and releases resources.
+
**`stop(): Promise`**
Stops ongoing transcription or embedding generation.
@@ -1021,18 +1134,21 @@ Resets the model's internal state. Automatically calls `stop()` first.
Releases all resources associated with the model. Automatically calls `stop()` first. Safe to call even if the model is not initialized.
-**`getModels(): Promise`**
+**`getModels(): CactusModel[]`**
-Fetches available STT models from the database and checks their download status.
+Returns available speech-to-text models.
### useCactusSTT Hook
-The `useCactusSTT` hook manages a `CactusSTT` instance with reactive state. When model parameters (`model`, `contextSize`) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
+The `useCactusSTT` hook manages a `CactusSTT` instance with reactive state. When model parameters (`model`, `contextSize`, `options`) change, the hook creates a new instance and resets all state. The hook automatically cleans up resources when the component unmounts.
#### State
- `transcription: string` - Current transcription text. Automatically accumulated during streaming. Cleared before each new transcription and when calling `reset()` or `destroy()`.
+- `streamTranscribeConfirmed: string` - Accumulated confirmed text from streaming transcription.
+- `streamTranscribePending: string` - Current pending text from streaming transcription.
- `isGenerating: boolean` - Whether the model is currently generating (transcription or embedding). Both operations share this flag.
+- `isStreamTranscribing: boolean` - Whether a streaming transcription session is active.
- `isInitializing: boolean` - Whether the model is initializing.
- `isDownloaded: boolean` - Whether the model is downloaded locally. Automatically checked when the hook mounts or model changes.
- `isDownloading: boolean` - Whether the model is being downloaded.
@@ -1045,10 +1161,15 @@ The `useCactusSTT` hook manages a `CactusSTT` instance with reactive state. When
- `init(): Promise` - Initializes the model for inference. Sets `isInitializing` to `true` during initialization.
- `transcribe(params: CactusSTTTranscribeParams): Promise` - Transcribes audio to text. Automatically accumulates tokens in the `transcription` state during streaming. Sets `isGenerating` to `true` while generating. Clears `transcription` before starting.
- `audioEmbed(params: CactusSTTAudioEmbedParams): Promise` - Generates embeddings for the given audio. Sets `isGenerating` to `true` during operation.
+- `streamTranscribeInit(): Promise` - Initializes a streaming transcription session. Sets `isStreamTranscribing` to `true`.
+- `streamTranscribeInsert(params: CactusSTTStreamTranscribeInsertParams): Promise` - Inserts audio chunks into the streaming buffer.
+- `streamTranscribeProcess(params?: CactusSTTStreamTranscribeProcessParams): Promise` - Processes audio and returns results. Automatically accumulates confirmed text in `streamTranscribeConfirmed` and updates `streamTranscribePending`.
+- `streamTranscribeFinalize(): Promise` - Finalizes streaming and returns remaining text.
+- `streamTranscribeDestroy(): Promise` - Destroys the streaming session. Sets `isStreamTranscribing` to `false`.
- `stop(): Promise` - Stops ongoing generation. Clears any errors.
- `reset(): Promise` - Resets the model's internal state. Also clears the `transcription` state.
- `destroy(): Promise` - Releases all resources associated with the model. Clears the `transcription` state. Automatically called when the component unmounts.
-- `getModels(): Promise` - Fetches available STT models from the database and checks their download status.
+- `getModels(): CactusModel[]` - Returns available speech-to-text models.
### CactusIndex Class
@@ -1137,6 +1258,7 @@ interface CactusLMParams {
model?: string;
contextSize?: number;
corpusDir?: string;
+ options?: ModelOptions;
}
```
@@ -1293,28 +1415,36 @@ interface CactusLMImageEmbedResult {
```typescript
interface CactusModel {
- name: string;
- slug: string;
- quantization: number;
- sizeMb: number;
- downloadUrl: string;
- supportsToolCalling: boolean;
- supportsVision: boolean;
- supportsCompletion: boolean;
- createdAt: Date;
- isDownloaded: boolean;
+ completion: boolean;
+ tools: boolean;
+ vision: boolean;
+ embed: boolean;
+ speech: boolean;
+ quantization: {
+ int4: {
+ sizeMb: number;
+ url: string;
+ pro?: {
+ apple: string;
+ };
+ };
+ int8: {
+ sizeMb: number;
+ url: string;
+ pro?: {
+ apple: string;
+ };
+ };
+ };
}
```
-### CactusSTTModel
+### ModelOptions
```typescript
-interface CactusSTTModel {
- slug: string;
- sizeMb: number;
- downloadUrl: string;
- createdAt: Date;
- isDownloaded: boolean;
+interface ModelOptions {
+ quantization: 'int4' | 'int8';
+ pro: boolean;
}
```
@@ -1324,6 +1454,7 @@ interface CactusSTTModel {
interface CactusSTTParams {
model?: string;
contextSize?: number;
+ options?: ModelOptions;
}
```
@@ -1391,6 +1522,49 @@ interface CactusSTTAudioEmbedResult {
}
```
+### CactusSTTStreamTranscribeInsertParams
+
+```typescript
+interface CactusSTTStreamTranscribeInsertParams {
+ audio: number[];
+}
+```
+
+### StreamTranscribeProcessOptions
+
+```typescript
+interface StreamTranscribeProcessOptions {
+ confirmationThreshold?: number;
+}
+```
+
+### CactusSTTStreamTranscribeProcessParams
+
+```typescript
+interface CactusSTTStreamTranscribeProcessParams {
+ options?: StreamTranscribeProcessOptions;
+}
+```
+
+### CactusSTTStreamTranscribeProcessResult
+
+```typescript
+interface CactusSTTStreamTranscribeProcessResult {
+ success: boolean;
+ confirmed: string;
+ pending: string;
+}
+```
+
+### CactusSTTStreamTranscribeFinalizeResult
+
+```typescript
+interface CactusSTTStreamTranscribeFinalizeResult {
+ success: boolean;
+ confirmed: string;
+}
+```
+
### CactusIndexParams
```typescript
@@ -1491,6 +1665,17 @@ import { CactusConfig } from 'cactus-react-native';
CactusConfig.cactusToken = 'your-cactus-token-here';
```
+### Cactus Pro
+
+Enable NPU-accelerated models for enhanced performance.
+
+```typescript
+import { CactusConfig } from 'cactus-react-native';
+
+// Set your Cactus Pro key
+CactusConfig.cactusProKey = 'your-cactus-pro-key-here';
+```
+
## Performance Tips
- **Model Selection** - Choose smaller models for faster inference on mobile devices.
diff --git a/android/src/main/jniLibs/arm64-v8a/libcactus.a b/android/src/main/jniLibs/arm64-v8a/libcactus.a
index 0a72e9d..91ce826 100644
Binary files a/android/src/main/jniLibs/arm64-v8a/libcactus.a and b/android/src/main/jniLibs/arm64-v8a/libcactus.a differ
diff --git a/cpp/HybridCactus.cpp b/cpp/HybridCactus.cpp
index 7cce17a..a6282d3 100644
--- a/cpp/HybridCactus.cpp
+++ b/cpp/HybridCactus.cpp
@@ -325,9 +325,128 @@ std::shared_ptr> HybridCactus::destroy() {
throw std::runtime_error("Cactus model is not initialized");
}
+ if (this->_streamTranscribe) {
+ cactus_stream_transcribe_destroy(this->_streamTranscribe);
+ this->_streamTranscribe = nullptr;
+ }
+
cactus_destroy(this->_model);
this->_model = nullptr;
});
}
+std::shared_ptr> HybridCactus::streamTranscribeInit() {
+ return Promise::async([this]() -> void {
+ std::lock_guard lock(this->_modelMutex);
+
+ if (!this->_model) {
+ throw std::runtime_error("Cactus model is not initialized");
+ }
+
+ if (this->_streamTranscribe) {
+ throw std::runtime_error(
+ "Cactus stream transcribe is already initialized");
+ }
+
+ this->_streamTranscribe = cactus_stream_transcribe_init(this->_model);
+ if (!this->_streamTranscribe) {
+ throw std::runtime_error("Cactus stream transcribe init failed: " +
+ std::string(cactus_get_last_error()));
+ }
+ });
+}
+
+std::shared_ptr>
+HybridCactus::streamTranscribeInsert(const std::vector &audio) {
+ return Promise::async([this, audio]() -> void {
+ std::lock_guard lock(this->_modelMutex);
+
+ if (!this->_streamTranscribe) {
+ throw std::runtime_error("Cactus stream transcribe is not initialized");
+ }
+
+ std::vector audioBytes;
+ audioBytes.reserve(audio.size());
+ for (double d : audio) {
+ d = std::clamp(d, 0.0, 255.0);
+ audioBytes.emplace_back(static_cast(d));
+ }
+
+ int result = cactus_stream_transcribe_insert(
+ this->_streamTranscribe, audioBytes.data(), audioBytes.size());
+
+ if (result < 0) {
+ throw std::runtime_error("Cactus stream transcribe insert failed: " +
+ std::string(cactus_get_last_error()));
+ }
+ });
+}
+
+std::shared_ptr> HybridCactus::streamTranscribeProcess(
+ const std::optional &optionsJson) {
+ return Promise::async([this, optionsJson]() -> std::string {
+ std::lock_guard lock(this->_modelMutex);
+
+ if (!this->_streamTranscribe) {
+ throw std::runtime_error("Cactus stream transcribe is not initialized");
+ }
+
+ std::string responseBuffer;
+ responseBuffer.resize(32768);
+
+ int result = cactus_stream_transcribe_process(
+ this->_streamTranscribe, responseBuffer.data(), responseBuffer.size(),
+ optionsJson ? optionsJson->c_str() : nullptr);
+
+ if (result < 0) {
+ throw std::runtime_error("Cactus stream transcribe process failed: " +
+ std::string(cactus_get_last_error()));
+ }
+
+ // Remove null terminator
+ responseBuffer.resize(strlen(responseBuffer.c_str()));
+
+ return responseBuffer;
+ });
+}
+
+std::shared_ptr> HybridCactus::streamTranscribeFinalize() {
+ return Promise::async([this]() -> std::string {
+ std::lock_guard lock(this->_modelMutex);
+
+ if (!this->_streamTranscribe) {
+ throw std::runtime_error("Cactus stream transcribe is not initialized");
+ }
+
+ std::string responseBuffer;
+ responseBuffer.resize(32768);
+
+ int result = cactus_stream_transcribe_finalize(
+ this->_streamTranscribe, responseBuffer.data(), responseBuffer.size());
+
+ if (result < 0) {
+ throw std::runtime_error("Cactus stream transcribe finalize failed: " +
+ std::string(cactus_get_last_error()));
+ }
+
+ // Remove null terminator
+ responseBuffer.resize(strlen(responseBuffer.c_str()));
+
+ return responseBuffer;
+ });
+}
+
+std::shared_ptr> HybridCactus::streamTranscribeDestroy() {
+ return Promise::async([this]() -> void {
+ std::lock_guard lock(this->_modelMutex);
+
+ if (!this->_streamTranscribe) {
+ throw std::runtime_error("Cactus stream transcribe is not initialized");
+ }
+
+ cactus_stream_transcribe_destroy(this->_streamTranscribe);
+ this->_streamTranscribe = nullptr;
+ });
+}
+
} // namespace margelo::nitro::cactus
diff --git a/cpp/HybridCactus.hpp b/cpp/HybridCactus.hpp
index fd49a1e..27e84e3 100644
--- a/cpp/HybridCactus.hpp
+++ b/cpp/HybridCactus.hpp
@@ -38,6 +38,18 @@ class HybridCactus : public HybridCactusSpec {
double /* tokenId */)>> &callback)
override;
+ std::shared_ptr> streamTranscribeInit() override;
+
+ std::shared_ptr>
+ streamTranscribeInsert(const std::vector &audio) override;
+
+ std::shared_ptr> streamTranscribeProcess(
+ const std::optional &optionsJson) override;
+
+ std::shared_ptr> streamTranscribeFinalize() override;
+
+ std::shared_ptr> streamTranscribeDestroy() override;
+
std::shared_ptr>>
embed(const std::string &text, double embeddingBufferSize,
bool normalize) override;
@@ -56,6 +68,7 @@ class HybridCactus : public HybridCactusSpec {
private:
cactus_model_t _model = nullptr;
+ cactus_stream_transcribe_t _streamTranscribe = nullptr;
size_t _contextSize;
std::mutex _modelMutex;
diff --git a/cpp/cactus_ffi.h b/cpp/cactus_ffi.h
index e00b391..bb57657 100644
--- a/cpp/cactus_ffi.h
+++ b/cpp/cactus_ffi.h
@@ -67,6 +67,30 @@ CACTUS_FFI_EXPORT int cactus_transcribe(
size_t pcm_buffer_size
);
+typedef void* cactus_stream_transcribe_t;
+
+CACTUS_FFI_EXPORT cactus_stream_transcribe_t cactus_stream_transcribe_init(cactus_model_t model);
+
+CACTUS_FFI_EXPORT int cactus_stream_transcribe_insert(
+ cactus_stream_transcribe_t stream,
+ const uint8_t* pcm_buffer,
+ size_t pcm_buffer_size
+);
+
+CACTUS_FFI_EXPORT int cactus_stream_transcribe_process(
+ cactus_stream_transcribe_t stream,
+ char* response_buffer,
+ size_t buffer_size,
+ const char* options_json
+);
+
+CACTUS_FFI_EXPORT int cactus_stream_transcribe_finalize(
+ cactus_stream_transcribe_t stream,
+ char* response_buffer,
+ size_t buffer_size
+);
+
+CACTUS_FFI_EXPORT void cactus_stream_transcribe_destroy(cactus_stream_transcribe_t stream);
CACTUS_FFI_EXPORT int cactus_embed(
cactus_model_t model,
diff --git a/example/ios/Podfile.lock b/example/ios/Podfile.lock
index f1bbe42..993c199 100644
--- a/example/ios/Podfile.lock
+++ b/example/ios/Podfile.lock
@@ -1,6 +1,6 @@
PODS:
- boost (1.84.0)
- - Cactus (1.4.0):
+ - Cactus (1.5.0):
- boost
- DoubleConversion
- fast_float
@@ -2643,7 +2643,7 @@ EXTERNAL SOURCES:
SPEC CHECKSUMS:
boost: 7e761d76ca2ce687f7cc98e698152abd03a18f90
- Cactus: 83c36f3d76eb2102a79020b41201a3aae8b71956
+ Cactus: 336fab89b318d196bcc0f87cf4634acd57c83bad
DoubleConversion: cb417026b2400c8f53ae97020b2be961b59470cb
fast_float: b32c788ed9c6a8c584d114d0047beda9664e7cc6
FBLazyVector: b8f1312d48447cca7b4abc21ed155db14742bd03
diff --git a/example/src/App.tsx b/example/src/App.tsx
index 0033107..33e24e1 100644
--- a/example/src/App.tsx
+++ b/example/src/App.tsx
@@ -12,8 +12,14 @@ import VisionScreen from './VisionScreen';
import ToolCallingScreen from './ToolCallingScreen';
import RAGScreen from './RAGScreen';
import STTScreen from './STTScreen';
+import StreamSTTScreen from './StreamSTTScreen';
import ChatScreen from './ChatScreen';
import IndexScreen from './IndexScreen';
+import { CactusConfig } from 'cactus-react-native';
+
+// Set your Cactus Pro API key here
+// To enable NPU acceleration set also the pro option in model options where applicable
+CactusConfig.cactusProKey = '';
type Screen =
| 'Home'
@@ -22,6 +28,7 @@ type Screen =
| 'ToolCalling'
| 'RAG'
| 'STT'
+ | 'StreamSTT'
| 'Chat'
| 'Index';
@@ -52,6 +59,10 @@ const App = () => {
setSelectedScreen('STT');
};
+ const handleGoToStreamSTT = () => {
+ setSelectedScreen('StreamSTT');
+ };
+
const handleGoToChat = () => {
setSelectedScreen('Chat');
};
@@ -72,6 +83,8 @@ const App = () => {
return ;
case 'STT':
return ;
+ case 'StreamSTT':
+ return ;
case 'Chat':
return ;
case 'Index':
@@ -142,6 +155,16 @@ const App = () => {
+
+ Stream Transcription
+
+ Real-time streaming audio transcription
+
+
+
Chat
diff --git a/example/src/CompletionScreen.tsx b/example/src/CompletionScreen.tsx
index a3a4656..b66d1ce 100644
--- a/example/src/CompletionScreen.tsx
+++ b/example/src/CompletionScreen.tsx
@@ -16,7 +16,7 @@ import {
} from 'cactus-react-native';
const CompletionScreen = () => {
- const cactusLM = useCactusLM({ model: 'qwen3-0.6' });
+ const cactusLM = useCactusLM({ model: 'qwen3-0.6b' });
const [input, setInput] = useState('What is the capital of France?');
const [result, setResult] = useState(null);
const [embedResult, setEmbedResult] = useState(
diff --git a/example/src/StreamSTTScreen.tsx b/example/src/StreamSTTScreen.tsx
new file mode 100644
index 0000000..47b2f0e
--- /dev/null
+++ b/example/src/StreamSTTScreen.tsx
@@ -0,0 +1,284 @@
+import { useEffect, useState } from 'react';
+import {
+ View,
+ Text,
+ TouchableOpacity,
+ ScrollView,
+ StyleSheet,
+ ActivityIndicator,
+} from 'react-native';
+import { useCactusSTT } from 'cactus-react-native';
+import * as DocumentPicker from '@react-native-documents/picker';
+import * as RNFS from '@dr.pogodin/react-native-fs';
+
+// 2 seconds of 16kHz audio (2 bytes per sample)
+const CHUNK_SIZE = 16000 * 2 * 3;
+
+const StreamSTTScreen = () => {
+ const cactusSTT = useCactusSTT({ model: 'whisper-small' });
+ const [audioFile, setAudioFile] = useState(null);
+ const [audioFileName, setAudioFileName] = useState('');
+
+ useEffect(() => {
+ if (!cactusSTT.isDownloaded) {
+ cactusSTT.download();
+ }
+ // eslint-disable-next-line react-hooks/exhaustive-deps
+ }, [cactusSTT.isDownloaded]);
+
+ const handleSelectAudio = async () => {
+ try {
+ const res = await DocumentPicker.pick({
+ type: [DocumentPicker.types.audio],
+ });
+ if (res && res.length > 0) {
+ const fileName = `audio_${Date.now()}.wav`;
+ const destPath = `${RNFS.CachesDirectoryPath}/${fileName}`;
+ await RNFS.copyFile(res[0].uri, destPath);
+ setAudioFile(destPath);
+ setAudioFileName(res[0].name || 'Unknown');
+ }
+ } catch (err) {
+ console.error(err);
+ }
+ };
+
+ const readAudioFile = async (filePath: string): Promise => {
+ const base64Audio = await RNFS.readFile(filePath, 'base64');
+ const binaryString = atob(base64Audio);
+ const bytes = new Uint8Array(binaryString.length);
+ for (let i = 0; i < binaryString.length; i++) {
+ bytes[i] = binaryString.charCodeAt(i);
+ }
+ // Skip WAV header (44 bytes)
+ return bytes.slice(44);
+ };
+
+ const handleStreamTranscribe = async () => {
+ if (!audioFile) return;
+ try {
+ // Initialize streaming
+ await cactusSTT.streamTranscribeInit();
+
+ // Read audio file
+ const pcmData = await readAudioFile(audioFile);
+
+ // Stream audio in 3-second chunks
+ for (let i = 0; i < pcmData.length; i += CHUNK_SIZE) {
+ const chunk = pcmData.slice(i, i + CHUNK_SIZE);
+ const pcmSamples = Array.from(chunk);
+
+ // Insert chunk
+ await cactusSTT.streamTranscribeInsert({ audio: pcmSamples });
+
+ // Process and get results
+ await cactusSTT.streamTranscribeProcess({
+ options: { confirmationThreshold: 0.95 },
+ });
+ }
+
+ // Finalize to get remaining text
+ await cactusSTT.streamTranscribeFinalize();
+ } catch (err) {
+ console.error('Stream error:', err);
+ }
+ };
+
+ const handleStop = async () => {
+ try {
+ await cactusSTT.streamTranscribeDestroy();
+ } catch (err) {
+ console.error('Stop error:', err);
+ }
+ };
+
+ if (cactusSTT.isDownloading) {
+ return (
+
+
+
+ Downloading: {Math.round(cactusSTT.downloadProgress * 100)}%
+
+
+ );
+ }
+
+ return (
+
+
+
+ {audioFile ? `Selected: ${audioFileName}` : 'Select Audio File'}
+
+
+
+
+
+
+ {cactusSTT.isStreamTranscribing
+ ? 'Streaming...'
+ : 'Stream Transcribe'}
+
+
+
+
+ Stop
+
+
+
+ {cactusSTT.isStreamTranscribing && (
+
+ ● Streaming...
+
+ )}
+
+ {cactusSTT.streamTranscribeConfirmed && (
+
+ Confirmed Text:
+
+
+ {cactusSTT.streamTranscribeConfirmed}
+
+
+
+ )}
+
+ {cactusSTT.streamTranscribePending && (
+
+ Pending Text:
+
+
+ {cactusSTT.streamTranscribePending}
+
+
+
+ )}
+
+ {cactusSTT.error && (
+
+ {cactusSTT.error}
+
+ )}
+
+ );
+};
+
+export default StreamSTTScreen;
+
+const styles = StyleSheet.create({
+ container: {
+ flex: 1,
+ backgroundColor: '#fff',
+ },
+ content: {
+ padding: 20,
+ },
+ centerContainer: {
+ flex: 1,
+ justifyContent: 'center',
+ alignItems: 'center',
+ padding: 20,
+ },
+ progressText: {
+ marginTop: 16,
+ fontSize: 16,
+ color: '#000',
+ },
+ selectButton: {
+ padding: 16,
+ backgroundColor: '#f3f3f3',
+ borderRadius: 8,
+ marginBottom: 16,
+ alignItems: 'center',
+ },
+ selectButtonText: {
+ fontSize: 16,
+ color: '#000',
+ },
+ buttonContainer: {
+ flexDirection: 'row',
+ flexWrap: 'wrap',
+ gap: 8,
+ marginBottom: 16,
+ },
+ button: {
+ backgroundColor: '#000',
+ paddingVertical: 12,
+ paddingHorizontal: 16,
+ borderRadius: 8,
+ alignItems: 'center',
+ },
+ buttonDisabled: {
+ backgroundColor: '#ccc',
+ },
+ buttonText: {
+ color: '#fff',
+ fontSize: 16,
+ fontWeight: '600',
+ },
+ statusContainer: {
+ backgroundColor: '#f3f3f3',
+ padding: 12,
+ borderRadius: 8,
+ marginBottom: 16,
+ },
+ statusText: {
+ fontSize: 14,
+ color: '#2e7d32',
+ fontWeight: '600',
+ },
+ resultContainer: {
+ marginTop: 16,
+ },
+ resultLabel: {
+ fontSize: 16,
+ fontWeight: '600',
+ marginBottom: 8,
+ color: '#000',
+ },
+ resultBox: {
+ backgroundColor: '#f3f3f3',
+ padding: 12,
+ borderRadius: 8,
+ minHeight: 60,
+ },
+ resultText: {
+ fontSize: 14,
+ color: '#000',
+ lineHeight: 20,
+ },
+ pendingBox: {
+ backgroundColor: '#f3f3f3',
+ padding: 12,
+ borderRadius: 8,
+ minHeight: 60,
+ opacity: 0.7,
+ },
+ pendingText: {
+ fontStyle: 'italic',
+ },
+ errorContainer: {
+ backgroundColor: '#000',
+ padding: 12,
+ borderRadius: 8,
+ marginTop: 16,
+ },
+ errorText: {
+ color: '#fff',
+ fontSize: 14,
+ },
+});
diff --git a/example/src/ToolCallingScreen.tsx b/example/src/ToolCallingScreen.tsx
index 60d5282..e66bbd4 100644
--- a/example/src/ToolCallingScreen.tsx
+++ b/example/src/ToolCallingScreen.tsx
@@ -33,7 +33,7 @@ const tools: Tool[] = [
];
const ToolCallingScreen = () => {
- const cactusLM = useCactusLM({ model: 'qwen3-0.6' });
+ const cactusLM = useCactusLM({ model: 'qwen3-0.6b' });
const [input, setInput] = useState("What's the weather in San Francisco?");
const [result, setResult] = useState(null);
diff --git a/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h b/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h
index e00b391..bb57657 100644
--- a/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h
+++ b/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_ffi.h
@@ -67,6 +67,30 @@ CACTUS_FFI_EXPORT int cactus_transcribe(
size_t pcm_buffer_size
);
+typedef void* cactus_stream_transcribe_t;
+
+CACTUS_FFI_EXPORT cactus_stream_transcribe_t cactus_stream_transcribe_init(cactus_model_t model);
+
+CACTUS_FFI_EXPORT int cactus_stream_transcribe_insert(
+ cactus_stream_transcribe_t stream,
+ const uint8_t* pcm_buffer,
+ size_t pcm_buffer_size
+);
+
+CACTUS_FFI_EXPORT int cactus_stream_transcribe_process(
+ cactus_stream_transcribe_t stream,
+ char* response_buffer,
+ size_t buffer_size,
+ const char* options_json
+);
+
+CACTUS_FFI_EXPORT int cactus_stream_transcribe_finalize(
+ cactus_stream_transcribe_t stream,
+ char* response_buffer,
+ size_t buffer_size
+);
+
+CACTUS_FFI_EXPORT void cactus_stream_transcribe_destroy(cactus_stream_transcribe_t stream);
CACTUS_FFI_EXPORT int cactus_embed(
cactus_model_t model,
diff --git a/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h b/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h
index 06dfebe..bd03313 100644
--- a/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h
+++ b/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/cactus_utils.h
@@ -63,6 +63,14 @@ struct ToolFunction {
std::unordered_map parameters;
};
+} // namespace ffi
+} // namespace cactus
+
+#include "gemma_tools.h"
+
+namespace cactus {
+namespace ffi {
+
inline void handle_error_response(const std::string& error_message, char* response_buffer, size_t buffer_size) {
std::string sanitized_msg = error_message;
for (auto& c : sanitized_msg) {
@@ -303,11 +311,43 @@ inline void parse_function_calls_from_response(const std::string& response_text,
regular_response = response_text;
function_calls.clear();
+ gemma::parse_function_calls(regular_response, function_calls);
+
+ // Parse Qwen-style function calls: {"name": "...", "arguments": {...}}
+ const std::string QWEN_TOOL_START = "";
+ const std::string QWEN_TOOL_END = "";
+ size_t qwen_start_pos = 0;
+
+ while ((qwen_start_pos = regular_response.find(QWEN_TOOL_START, qwen_start_pos)) != std::string::npos) {
+ size_t content_start = qwen_start_pos + QWEN_TOOL_START.length();
+ size_t qwen_end_pos = regular_response.find(QWEN_TOOL_END, content_start);
+
+ if (qwen_end_pos != std::string::npos) {
+ std::string json_content = regular_response.substr(content_start, qwen_end_pos - content_start);
+
+ size_t first = json_content.find_first_not_of(" \t\n\r");
+ size_t last = json_content.find_last_not_of(" \t\n\r");
+ if (first != std::string::npos && last != std::string::npos) {
+ json_content = json_content.substr(first, last - first + 1);
+ }
+
+ if (json_content.size() > 2 && json_content[0] == '{' &&
+ json_content.find("\"name\"") != std::string::npos) {
+ function_calls.push_back(json_content);
+ }
+
+ regular_response.erase(qwen_start_pos, qwen_end_pos + QWEN_TOOL_END.length() - qwen_start_pos);
+ } else {
+ break;
+ }
+ }
+
+ // Parse LFM2-style function calls: <|tool_call_start|>[name(args)]<|tool_call_end|>
const std::string TOOL_CALL_START = "<|tool_call_start|>";
const std::string TOOL_CALL_END = "<|tool_call_end|>";
size_t tool_start_pos = 0;
- while ((tool_start_pos = response_text.find(TOOL_CALL_START, tool_start_pos)) != std::string::npos) {
+ while ((tool_start_pos = regular_response.find(TOOL_CALL_START, tool_start_pos)) != std::string::npos) {
size_t content_start = tool_start_pos + TOOL_CALL_START.length();
size_t tool_end_pos = response_text.find(TOOL_CALL_END, content_start);
diff --git a/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h b/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h
index 601e818..a7ef002 100644
--- a/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h
+++ b/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/engine.h
@@ -131,9 +131,12 @@ struct MergeRule {
struct ChatMessage {
std::string role;
std::string content;
+ std::string name;
std::vector images;
};
+
+
class Tokenizer {
public:
virtual ~Tokenizer() = default;
@@ -329,6 +332,8 @@ struct KVCache {
struct LayerCache {
std::vector keys;
std::vector values;
+ std::vector key_scales;
+ std::vector value_scales;
};
std::vector layer_caches;
@@ -354,13 +359,11 @@ struct KVCache {
const std::vector& v_nodes, size_t seq_len,
size_t num_layers, size_t kv_heads, size_t head_dim);
- // Update KV cache from NPU prefill outputs
- // NPU outputs are in shape [num_tokens, num_kv_heads, head_dim]
- // This handles transposition to cache format and sliding window
void update_from_npu(size_t layer_idx, const __fp16* k_data, const __fp16* v_data,
size_t num_tokens, size_t kv_heads, size_t head_dim);
bool is_empty() const { return current_seq_len == 0; }
+ bool is_int8() const { return precision == Precision::INT8; }
void* get_key_ptr(size_t layer);
void* get_value_ptr(size_t layer);
@@ -374,33 +377,44 @@ struct KVCache {
CircularView get_key_view(size_t layer);
CircularView get_value_view(size_t layer);
+
+ const int8_t* get_keys_int8(size_t layer) const;
+ const int8_t* get_values_int8(size_t layer) const;
+ const float* get_key_scales(size_t layer) const;
+ const float* get_value_scales(size_t layer) const;
};
class ToolCallConstrainer {
public:
enum class State {
- START, // -> expect {
- EXPECT_FC_KEY, // -> expect "function_call"
- EXPECT_FC_COLON, // -> expect :
- EXPECT_FC_OPEN_BRACE, // -> expect {
- EXPECT_NAME_KEY, // -> expect "name"
- EXPECT_NAME_COLON, // -> expect :
- EXPECT_NAME_VALUE, // -> expect ""
- EXPECT_COMMA, // -> expect ,
- EXPECT_ARGS_KEY, // -> expect "arguments"
- EXPECT_ARGS_COLON, // -> expect :
- IN_ARGUMENTS, // -> free JSON, track brace depth
- EXPECT_INNER_CLOSE, // -> expect } to close inner object
- EXPECT_OUTER_CLOSE, // -> expect } to close outer object
- DONE, // complete
-
- LFM_START, // -> expect <|tool_call_start|>
- LFM_EXPECT_BRACKET, // -> expect [
- LFM_IN_FUNC_NAME, // -> expect function name
- LFM_EXPECT_PAREN, // -> expect (
- LFM_IN_ARGUMENTS, // -> arguments until )
- LFM_EXPECT_BRACKET_CLOSE, // -> expect ]
- LFM_EXPECT_END // -> expect <|tool_call_end|>
+ DONE,
+
+ QWEN_START,
+ QWEN_EXPECT_OPEN_BRACE,
+ QWEN_EXPECT_NAME_KEY,
+ QWEN_EXPECT_NAME_COLON,
+ QWEN_EXPECT_NAME_VALUE,
+ QWEN_EXPECT_COMMA,
+ QWEN_EXPECT_ARGS_KEY,
+ QWEN_EXPECT_ARGS_COLON,
+ QWEN_IN_ARGUMENTS,
+ QWEN_EXPECT_CLOSE_BRACE,
+ QWEN_EXPECT_END,
+
+ LFM_START,
+ LFM_EXPECT_BRACKET,
+ LFM_IN_FUNC_NAME,
+ LFM_EXPECT_PAREN,
+ LFM_IN_ARGUMENTS,
+ LFM_EXPECT_BRACKET_CLOSE,
+ LFM_EXPECT_END,
+
+ GEMMA_START,
+ GEMMA_EXPECT_CALL,
+ GEMMA_IN_FUNC_NAME,
+ GEMMA_EXPECT_BRACE,
+ GEMMA_IN_ARGUMENTS,
+ GEMMA_EXPECT_END
};
void init(Config::ModelType model_type,
@@ -417,36 +431,40 @@ class ToolCallConstrainer {
private:
bool active_ = false;
- State state_ = State::START;
+ State state_ = State::QWEN_START;
Config::ModelType model_type_ = Config::ModelType::QWEN;
Tokenizer* tokenizer_ = nullptr;
std::vector function_names_;
std::string generated_text_;
- int brace_depth_ = 0; // Track nested braces in arguments
-
- // Pre-tokenized token sets for each grammar element
- std::unordered_set open_brace_tokens_; // {
- std::unordered_set close_brace_tokens_; // }
- std::unordered_set colon_tokens_; // :
- std::unordered_set comma_tokens_; // ,
- std::unordered_set fc_key_tokens_; // "function_call"
- std::unordered_set name_key_tokens_; // "name"
- std::unordered_set args_key_tokens_; // "arguments"
- std::unordered_set quote_tokens_; // "
- std::unordered_set backtick_tokens_; // ` (to block markdown code fences)
- std::unordered_set response_starter_tokens_; // Common response starters to block (I, I'm, Sorry, etc.)
- std::unordered_set all_func_name_tokens_; // All function name tokens combined
- std::unordered_map> func_name_sequences_; // Full token sequence per function
-
- // LFM2-specific tokens
+ int brace_depth_ = 0;
+
+ std::unordered_set qwen_tool_call_start_tokens_;
+ std::unordered_set qwen_tool_call_end_tokens_;
+ std::unordered_set open_brace_tokens_;
+ std::unordered_set close_brace_tokens_;
+ std::unordered_set colon_tokens_;
+ std::unordered_set comma_tokens_;
+ std::unordered_set name_key_tokens_;
+ std::unordered_set args_key_tokens_;
+ std::unordered_set quote_tokens_;
+ std::unordered_set backtick_tokens_;
+ std::unordered_set all_func_name_tokens_;
+ std::unordered_map> func_name_sequences_;
+
std::unordered_set tool_start_tokens_;
std::unordered_set tool_end_tokens_;
- std::unordered_set bracket_open_tokens_; // [
- std::unordered_set bracket_close_tokens_; // ]
- std::unordered_set paren_open_tokens_; // (
- std::unordered_set paren_close_tokens_; // )
- std::unordered_set equals_tokens_; // =
+ std::unordered_set bracket_open_tokens_;
+ std::unordered_set bracket_close_tokens_;
+ std::unordered_set paren_open_tokens_;
+ std::unordered_set paren_close_tokens_;
+ std::unordered_set equals_tokens_;
+
+ std::unordered_set gemma_call_start_tokens_;
+ std::unordered_set gemma_call_end_tokens_;
+ std::unordered_set gemma_response_start_tokens_;
+ std::unordered_set gemma_call_prefix_tokens_;
+ std::unordered_set escape_tokens_;
std::unordered_map current_bias_;
diff --git a/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/gemma_tools.h b/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/gemma_tools.h
new file mode 100644
index 0000000..912de57
--- /dev/null
+++ b/ios/cactus.xcframework/ios-arm64-simulator/cactus.framework/Headers/gemma_tools.h
@@ -0,0 +1,549 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include