diff --git a/src/vs/workbench/contrib/void/browser/autocompleteService.ts b/src/vs/workbench/contrib/void/browser/autocompleteService.ts index 22c86eb6afc..366292469be 100644 --- a/src/vs/workbench/contrib/void/browser/autocompleteService.ts +++ b/src/vs/workbench/contrib/void/browser/autocompleteService.ts @@ -795,67 +795,71 @@ export class AutocompleteService extends Disposable implements IAutocompleteServ const modelSelectionOptions = modelSelection ? this._settingsService.state.optionsOfModelSelection[featureName][modelSelection.providerName]?.[modelSelection.modelName] : undefined // set parameters of `newAutocompletion` appropriately - newAutocompletion.llmPromise = new Promise((resolve, reject) => { - - const requestId = this._llmMessageService.sendLLMMessage({ - messagesType: 'FIMMessage', - messages: this._convertToLLMMessageService.prepareFIMMessage({ - messages: { - prefix: llmPrefix, - suffix: llmSuffix, - stopTokens: stopTokens, - } - }), - modelSelection, - modelSelectionOptions, - overridesOfModel, - logging: { loggingName: 'Autocomplete' }, - onText: () => { }, // unused in FIMMessage - // onText: async ({ fullText, newText }) => { - - // newAutocompletion.insertText = fullText - - // // count newlines in newText - // const numNewlines = newText.match(/\n|\r\n/g)?.length || 0 - // newAutocompletion._newlineCount += numNewlines - - // // if too many newlines, resolve up to last newline - // if (newAutocompletion._newlineCount > 10) { - // const lastNewlinePos = fullText.lastIndexOf('\n') - // newAutocompletion.insertText = fullText.substring(0, lastNewlinePos) - // resolve(newAutocompletion.insertText) - // return - // } - - // // if (!getAutocompletionMatchup({ prefix: this._lastPrefix, autocompletion: newAutocompletion })) { - // // reject('LLM response did not match user\'s text.') - // // } - // }, - onFinalMessage: ({ fullText }) => { - - // console.log('____res: ', JSON.stringify(newAutocompletion.insertText)) - - newAutocompletion.endTime = Date.now() - newAutocompletion.status = 'finished' - const [text, _] = extractCodeFromRegular({ text: fullText, recentlyAddedTextLen: 0 }) - newAutocompletion.insertText = processStartAndEndSpaces(text) - - // handle special case for predicting starting on the next line, add a newline character - if (newAutocompletion.type === 'multi-line-start-on-next-line') { - newAutocompletion.insertText = _ln + newAutocompletion.insertText - } - - resolve(newAutocompletion.insertText) - - }, - onError: ({ message }) => { - newAutocompletion.endTime = Date.now() - newAutocompletion.status = 'error' - reject(message) - }, - onAbort: () => { reject('Aborted autocomplete') }, - }) - newAutocompletion.requestId = requestId + newAutocompletion.llmPromise = new Promise(async (resolve, reject) => { + + try { + const requestId = await this._llmMessageService.sendLLMMessage({ + messagesType: 'FIMMessage', + messages: this._convertToLLMMessageService.prepareFIMMessage({ + messages: { + prefix: llmPrefix, + suffix: llmSuffix, + stopTokens: stopTokens, + } + }), + modelSelection, + modelSelectionOptions, + overridesOfModel, + logging: { loggingName: 'Autocomplete' }, + onText: () => { }, // unused in FIMMessage + // onText: async ({ fullText, newText }) => { + + // newAutocompletion.insertText = fullText + + // // count newlines in newText + // const numNewlines = newText.match(/\n|\r\n/g)?.length || 0 + // newAutocompletion._newlineCount += numNewlines + + // // if too many newlines, resolve up to last newline + // if (newAutocompletion._newlineCount > 10) { + // const lastNewlinePos = fullText.lastIndexOf('\n') + // newAutocompletion.insertText = fullText.substring(0, lastNewlinePos) + // resolve(newAutocompletion.insertText) + // return + // } + + // // if (!getAutocompletionMatchup({ prefix: this._lastPrefix, autocompletion: newAutocompletion })) { + // // reject('LLM response did not match user\'s text.') + // // } + // }, + onFinalMessage: ({ fullText }) => { + + // console.log('____res: ', JSON.stringify(newAutocompletion.insertText)) + + newAutocompletion.endTime = Date.now() + newAutocompletion.status = 'finished' + const [text, _] = extractCodeFromRegular({ text: fullText, recentlyAddedTextLen: 0 }) + newAutocompletion.insertText = processStartAndEndSpaces(text) + + // handle special case for predicting starting on the next line, add a newline character + if (newAutocompletion.type === 'multi-line-start-on-next-line') { + newAutocompletion.insertText = _ln + newAutocompletion.insertText + } + + resolve(newAutocompletion.insertText) + + }, + onError: ({ message }) => { + newAutocompletion.endTime = Date.now() + newAutocompletion.status = 'error' + reject(message) + }, + onAbort: () => { reject('Aborted autocomplete') }, + }) + newAutocompletion.requestId = requestId + } catch (error) { + reject(error) + } // if the request hasnt resolved in TIMEOUT_TIME seconds, reject it setTimeout(() => { diff --git a/src/vs/workbench/contrib/void/browser/chatThreadService.ts b/src/vs/workbench/contrib/void/browser/chatThreadService.ts index 30f38f10ba8..09fb2693195 100644 --- a/src/vs/workbench/contrib/void/browser/chatThreadService.ts +++ b/src/vs/workbench/contrib/void/browser/chatThreadService.ts @@ -799,33 +799,38 @@ class ChatThreadService extends Disposable implements IChatThreadService { | { type: 'llmError', error?: { message: string; fullError: Error | null; } } | { type: 'llmAborted' } - let resMessageIsDonePromise: (res: ResTypes) => void // resolves when user approves this tool use (or if tool doesn't require approval) + let resMessageIsDonePromise: (res: ResTypes) => void = () => { } // resolves when user approves this tool use (or if tool doesn't require approval) const messageIsDonePromise = new Promise((res, rej) => { resMessageIsDonePromise = res }) - const llmCancelToken = this._llmMessageService.sendLLMMessage({ - messagesType: 'chatMessages', - chatMode, - messages: messages, - modelSelection, - modelSelectionOptions, - overridesOfModel, - logging: { loggingName: `Chat - ${chatMode}`, loggingExtras: { threadId, nMessagesSent, chatMode } }, - separateSystemMessage: separateSystemMessage, - onText: ({ fullText, fullReasoning, toolCall }) => { - this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: fullText, reasoningSoFar: fullReasoning, toolCallSoFar: toolCall ?? null }, interrupt: Promise.resolve(() => { if (llmCancelToken) this._llmMessageService.abort(llmCancelToken) }) }) - }, - onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, }) => { - resMessageIsDonePromise({ type: 'llmDone', toolCall, info: { fullText, fullReasoning, anthropicReasoning } }) // resolve with tool calls - }, - onError: async (error) => { - resMessageIsDonePromise({ type: 'llmError', error: error }) - }, - onAbort: () => { - // stop the loop to free up the promise, but don't modify state (already handled by whatever stopped it) - resMessageIsDonePromise({ type: 'llmAborted' }) - this._metricsService.capture('Agent Loop Done (Aborted)', { nMessagesSent, chatMode }) - }, - }) + let llmCancelToken: string | null = null; + try { + llmCancelToken = await this._llmMessageService.sendLLMMessage({ + messagesType: 'chatMessages', + chatMode, + messages: messages, + modelSelection, + modelSelectionOptions, + overridesOfModel, + logging: { loggingName: `Chat - ${chatMode}`, loggingExtras: { threadId, nMessagesSent, chatMode } }, + separateSystemMessage: separateSystemMessage, + onText: ({ fullText, fullReasoning, toolCall }) => { + this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: fullText, reasoningSoFar: fullReasoning, toolCallSoFar: toolCall ?? null }, interrupt: Promise.resolve(() => { if (llmCancelToken) this._llmMessageService.abort(llmCancelToken) }) }) + }, + onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, }) => { + resMessageIsDonePromise({ type: 'llmDone', toolCall, info: { fullText, fullReasoning, anthropicReasoning } }) // resolve with tool calls + }, + onError: async (error) => { + resMessageIsDonePromise({ type: 'llmError', error: error }) + }, + onAbort: () => { + // stop the loop to free up the promise, but don't modify state (already handled by whatever stopped it) + resMessageIsDonePromise({ type: 'llmAborted' }) + this._metricsService.capture('Agent Loop Done (Aborted)', { nMessagesSent, chatMode }) + }, + }) + } catch (error) { + resMessageIsDonePromise({ type: 'llmError', error: { message: `Rate limit check failed: ${error}`, fullError: error } }) + } // mark as streaming if (!llmCancelToken) { diff --git a/src/vs/workbench/contrib/void/browser/editCodeService.ts b/src/vs/workbench/contrib/void/browser/editCodeService.ts index 80ee4bc9925..7cb43c18dd0 100644 --- a/src/vs/workbench/contrib/void/browser/editCodeService.ts +++ b/src/vs/workbench/contrib/void/browser/editCodeService.ts @@ -1503,54 +1503,59 @@ class EditCodeService extends Disposable implements IEditCodeService { let aborted = false let weAreAborting = false + try { + streamRequestIdRef.current = await this._llmMessageService.sendLLMMessage({ + messagesType: 'chatMessages', + logging: { loggingName: `Edit (Writeover) - ${from}` }, + messages, + modelSelection, + modelSelectionOptions, + overridesOfModel, + separateSystemMessage, + chatMode: null, // not chat + onText: (params) => { + const { fullText: fullText_ } = params + const newText_ = fullText_.substring(fullTextSoFar.length, Infinity) + + const newText = prevIgnoredSuffix + newText_ // add the previously ignored suffix because it's no longer the suffix! + fullTextSoFar += newText // full text, including ```, etc + + const [croppedText, deltaCroppedText, croppedSuffix] = extractText(fullTextSoFar, newText.length) + const { endLineInLlmTextSoFar } = this._writeStreamedDiffZoneLLMText(uri, originalCode, croppedText, deltaCroppedText, latestStreamLocationMutable) + diffZone._streamState.line = (diffZone.startLine - 1) + endLineInLlmTextSoFar // change coordinate systems from originalCode to full file + + this._refreshStylesAndDiffsInURI(uri) + + prevIgnoredSuffix = croppedSuffix + }, + onFinalMessage: (params) => { + const { fullText } = params + // console.log('DONE! FULL TEXT\n', extractText(fullText), diffZone.startLine, diffZone.endLine) + // at the end, re-write whole thing to make sure no sync errors + const [croppedText, _1, _2] = extractText(fullText, 0) + this._writeURIText(uri, croppedText, + { startLineNumber: diffZone.startLine, startColumn: 1, endLineNumber: diffZone.endLine, endColumn: Number.MAX_SAFE_INTEGER }, // 1-indexed + { shouldRealignDiffAreas: true } + ) - streamRequestIdRef.current = this._llmMessageService.sendLLMMessage({ - messagesType: 'chatMessages', - logging: { loggingName: `Edit (Writeover) - ${from}` }, - messages, - modelSelection, - modelSelectionOptions, - overridesOfModel, - separateSystemMessage, - chatMode: null, // not chat - onText: (params) => { - const { fullText: fullText_ } = params - const newText_ = fullText_.substring(fullTextSoFar.length, Infinity) - - const newText = prevIgnoredSuffix + newText_ // add the previously ignored suffix because it's no longer the suffix! - fullTextSoFar += newText // full text, including ```, etc - - const [croppedText, deltaCroppedText, croppedSuffix] = extractText(fullTextSoFar, newText.length) - const { endLineInLlmTextSoFar } = this._writeStreamedDiffZoneLLMText(uri, originalCode, croppedText, deltaCroppedText, latestStreamLocationMutable) - diffZone._streamState.line = (diffZone.startLine - 1) + endLineInLlmTextSoFar // change coordinate systems from originalCode to full file - - this._refreshStylesAndDiffsInURI(uri) + onDone() + resMessageDonePromise() + }, + onError: (e) => { + onError(e) + }, + onAbort: () => { + if (weAreAborting) return + // stop the loop to free up the promise, but don't modify state (already handled by whatever stopped it) + aborted = true + resMessageDonePromise() + }, + }) + } catch (error) { + onError({ message: `Rate limit check failed: ${error}`, fullError: error }) + return + } - prevIgnoredSuffix = croppedSuffix - }, - onFinalMessage: (params) => { - const { fullText } = params - // console.log('DONE! FULL TEXT\n', extractText(fullText), diffZone.startLine, diffZone.endLine) - // at the end, re-write whole thing to make sure no sync errors - const [croppedText, _1, _2] = extractText(fullText, 0) - this._writeURIText(uri, croppedText, - { startLineNumber: diffZone.startLine, startColumn: 1, endLineNumber: diffZone.endLine, endColumn: Number.MAX_SAFE_INTEGER }, // 1-indexed - { shouldRealignDiffAreas: true } - ) - - onDone() - resMessageDonePromise() - }, - onError: (e) => { - onError(e) - }, - onAbort: () => { - if (weAreAborting) return - // stop the loop to free up the promise, but don't modify state (already handled by whatever stopped it) - aborted = true - resMessageDonePromise() - }, - }) // should never happen, just for safety if (streamRequestIdRef.current === null) { return } @@ -1950,7 +1955,7 @@ class EditCodeService extends Disposable implements IEditCodeService { this._refreshStylesAndDiffsInURI(uri) } - streamRequestIdRef.current = this._llmMessageService.sendLLMMessage({ + streamRequestIdRef.current = await this._llmMessageService.sendLLMMessage({ messagesType: 'chatMessages', logging: { loggingName: `Edit (Search/Replace) - ${from}` }, messages, diff --git a/src/vs/workbench/contrib/void/browser/rateLimiter.contribution.ts b/src/vs/workbench/contrib/void/browser/rateLimiter.contribution.ts new file mode 100644 index 00000000000..8bbaf215047 --- /dev/null +++ b/src/vs/workbench/contrib/void/browser/rateLimiter.contribution.ts @@ -0,0 +1,6 @@ +import { registerSingleton, InstantiationType } from '../../../../platform/instantiation/common/extensions.js'; +import { IRateLimiterService } from '../common/rateLimiterService.js'; +import { RateLimiterService } from '../common/rateLimiterService.js'; + +// Register the rate limiter service +registerSingleton(IRateLimiterService, RateLimiterService, InstantiationType.Delayed); diff --git a/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx b/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx index c8ba4da475c..2169c1b4b8a 100644 --- a/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx +++ b/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx @@ -1387,6 +1387,21 @@ export const Settings = () => { {/* General section */}
+ {/* API Rate Limiting */} +
+

API Rate Limiting

+

+ Control the maximum number of API requests per minute to manage usage and costs. +

+ + +
+ Max Requests per Minute: + +
+
+
+ {/* One-Click Switch section */}
@@ -1531,3 +1546,32 @@ Use Model Context Protocol to provide Agent mode with more tools.
); } + +const RateLimitSlider = () => { + const accessor = useAccessor() + const voidSettingsService = accessor.get('IVoidSettingsService') + const settingsState = useSettingsState() + + + // console.log('RateLimitSlider value:', settingsState.globalSettings.maxRequestsPerMinute) + + const onChangeValue = useCallback((newVal: number) => { + voidSettingsService.setGlobalSetting('maxRequestsPerMinute', newVal) + }, [voidSettingsService]) + + return ( +
+ onChangeValue(parseInt(e.target.value))} + className="w-32 bg-void-bg-2" + /> + + {settingsState.globalSettings.maxRequestsPerMinute} requests/min + +
+ ) +} diff --git a/src/vs/workbench/contrib/void/browser/void.contribution.ts b/src/vs/workbench/contrib/void/browser/void.contribution.ts index 35c89184c08..4292aa1ac7f 100644 --- a/src/vs/workbench/contrib/void/browser/void.contribution.ts +++ b/src/vs/workbench/contrib/void/browser/void.contribution.ts @@ -14,6 +14,8 @@ import './sidebarPane.js' // register quick edit (Ctrl+K) import './quickEditActions.js' +// register rate limiter service +import './rateLimiter.contribution.js' // register Autocomplete import './autocompleteService.js' diff --git a/src/vs/workbench/contrib/void/browser/voidSCMService.ts b/src/vs/workbench/contrib/void/browser/voidSCMService.ts index e93c4882935..a026c351cb8 100644 --- a/src/vs/workbench/contrib/void/browser/voidSCMService.ts +++ b/src/vs/workbench/contrib/void/browser/voidSCMService.ts @@ -135,32 +135,36 @@ class GenerateCommitMessageService extends Disposable implements IGenerateCommit /** LLM Functions */ - private sendLLMMessage(messages: LLMChatMessage[], separateSystemMessage: string, modelOptions: ModelOptions): Promise { - return new Promise((resolve, reject) => { - - this.llmRequestId = this.llmMessageService.sendLLMMessage({ - messagesType: 'chatMessages', - messages, - separateSystemMessage, - chatMode: null, - modelSelection: modelOptions.modelSelection, - modelSelectionOptions: modelOptions.modelSelectionOptions, - overridesOfModel: modelOptions.overridesOfModel, - onText: () => { }, - onFinalMessage: (params: { fullText: string }) => { - const match = params.fullText.match(/([\s\S]*?)<\/output>/i) - const commitMessage = match ? match[1].trim() : '' - resolve(commitMessage) - }, - onError: (error) => { - console.error(error) - reject(error) - }, - onAbort: () => { - reject(new CancellationError()) - }, - logging: { loggingName: 'VoidSCM - Commit Message' }, - }) + private async sendLLMMessage(messages: LLMChatMessage[], separateSystemMessage: string, modelOptions: ModelOptions): Promise { + return new Promise(async (resolve, reject) => { + + try { + this.llmRequestId = await this.llmMessageService.sendLLMMessage({ + messagesType: 'chatMessages', + messages, + separateSystemMessage, + chatMode: null, + modelSelection: modelOptions.modelSelection, + modelSelectionOptions: modelOptions.modelSelectionOptions, + overridesOfModel: modelOptions.overridesOfModel, + onText: () => { }, + onFinalMessage: (params: { fullText: string }) => { + const match = params.fullText.match(/([\s\S]*?)<\/output>/i) + const commitMessage = match ? match[1].trim() : '' + resolve(commitMessage) + }, + onError: (error) => { + console.error(error) + reject(error) + }, + onAbort: () => { + reject(new CancellationError()) + }, + logging: { loggingName: 'VoidSCM - Commit Message' }, + }) + } catch (error) { + reject(error) + } }) } diff --git a/src/vs/workbench/contrib/void/common/rateLimiterService.ts b/src/vs/workbench/contrib/void/common/rateLimiterService.ts new file mode 100644 index 00000000000..b8a08172af2 --- /dev/null +++ b/src/vs/workbench/contrib/void/common/rateLimiterService.ts @@ -0,0 +1,92 @@ +import { createDecorator } from '../../../../platform/instantiation/common/instantiation.js'; +import { IVoidSettingsService } from './voidSettingsService.js'; +import { ILogService } from '../../../../platform/log/common/log.js'; +import { Disposable } from '../../../../base/common/lifecycle.js'; +import { INotificationService } from '../../../../platform/notification/common/notification.js'; + +export const IRateLimiterService = createDecorator('rateLimiterService'); + +export interface IRateLimiterService { + readonly _serviceBrand: undefined; + checkLLMRateLimit(): Promise; + // testRateLimit(): Promise; //For testing +} + +export class RateLimiterService extends Disposable implements IRateLimiterService { + declare readonly _serviceBrand: undefined; + + private llmRequestTimestamps: number[] = []; + private readonly windowSize = 60 * 1000; // 1 minute in milliseconds + + constructor( + @IVoidSettingsService private readonly voidSettingsService: IVoidSettingsService, + @ILogService private readonly logService: ILogService, + @INotificationService private readonly notificationService: INotificationService + ) { + super(); + this.logService.info('[RateLimiterService] LLM Rate Limiter Service initialized'); + + // Wait for settings to be initialized + this.voidSettingsService.waitForInitState.then(() => { + this.logService.info(`[RateLimiterService] Initial maxLLMRequests: ${this.voidSettingsService.state.globalSettings.maxRequestsPerMinute}`); + }).catch(error => { + this.logService.error('[RateLimiterService] Error waiting for settings:', error); + }); + } + + async checkLLMRateLimit(): Promise { + // Wait for settings to be initialized + await this.voidSettingsService.waitForInitState; + + const maxRequests = this.voidSettingsService.state.globalSettings.maxRequestsPerMinute; + + this.logService.info(`[RateLimiterService] Current maxLLMRequests: ${maxRequests}`); + this.logService.info(`[RateLimiterService] Current LLM request count: ${this.llmRequestTimestamps.length}`); + + // Clean up old timestamps + const now = Date.now(); + this.llmRequestTimestamps = this.llmRequestTimestamps.filter(timestamp => now - timestamp < this.windowSize); + this.logService.info(`[RateLimiterService] LLM request count after cleanup: ${this.llmRequestTimestamps.length}`); + + // Check if we've hit the rate limit + if (this.llmRequestTimestamps.length >= maxRequests) { + const oldestTimestamp = this.llmRequestTimestamps[0]; + const timeToWait = this.windowSize - (now - oldestTimestamp); + + this.logService.warn(`[RateLimiterService] LLM rate limit reached. Waiting ${timeToWait}ms before next request.`); + this.logService.info(`[RateLimiterService] Oldest LLM request: ${new Date(oldestTimestamp).toISOString()}`); + this.logService.info(`[RateLimiterService] Current time: ${new Date(now).toISOString()}`); + + // Notify user + this.notificationService.info(`LLM rate limit reached. Waiting ${Math.ceil(timeToWait / 1000)} seconds before next request.`); + + // Wait until we can make another request + await new Promise(resolve => setTimeout(resolve, timeToWait)); + + this.logService.info('[RateLimiterService] Wait period completed, proceeding with LLM request'); + } + + // Add current timestamp + this.llmRequestTimestamps.push(now); + this.logService.info(`[RateLimiterService] Added new LLM request timestamp. Total LLM requests: ${this.llmRequestTimestamps.length}`); + + return true; + } + // Commented out for now, but can be used for testing + // async testRateLimit(): Promise { + // this.logService.info('[RateLimiterService] Starting LLM rate limit test...'); + + // // Simulate multiple rapid LLM requests + // const promises = []; + // for (let i = 0; i < 5; i++) { + // promises.push(this.checkLLMRateLimit()); + // } + + // try { + // await Promise.all(promises); + // this.logService.info('[RateLimiterService] LLM rate limit test completed successfully'); + // } catch (error) { + // this.logService.error('[RateLimiterService] LLM rate limit test failed:', error); + // } + // } +} diff --git a/src/vs/workbench/contrib/void/common/sendLLMMessageService.ts b/src/vs/workbench/contrib/void/common/sendLLMMessageService.ts index 7618e7365ac..5d09d60ca9f 100644 --- a/src/vs/workbench/contrib/void/common/sendLLMMessageService.ts +++ b/src/vs/workbench/contrib/void/common/sendLLMMessageService.ts @@ -14,13 +14,14 @@ import { Event } from '../../../../base/common/event.js'; import { Disposable } from '../../../../base/common/lifecycle.js'; import { IVoidSettingsService } from './voidSettingsService.js'; import { IMCPService } from './mcpService.js'; +import { IRateLimiterService } from './rateLimiterService.js'; // calls channel to implement features export const ILLMMessageService = createDecorator('llmMessageService'); export interface ILLMMessageService { readonly _serviceBrand: undefined; - sendLLMMessage: (params: ServiceSendLLMMessageParams) => string | null; + sendLLMMessage: (params: ServiceSendLLMMessageParams) => Promise; abort: (requestId: string) => void; ollamaList: (params: ServiceModelListParams) => void; openAICompatibleList: (params: ServiceModelListParams) => void; @@ -63,6 +64,7 @@ export class LLMMessageService extends Disposable implements ILLMMessageService @IVoidSettingsService private readonly voidSettingsService: IVoidSettingsService, // @INotificationService private readonly notificationService: INotificationService, @IMCPService private readonly mcpService: IMCPService, + @IRateLimiterService private readonly rateLimiterService: IRateLimiterService, ) { super() @@ -100,7 +102,7 @@ export class LLMMessageService extends Disposable implements ILLMMessageService } - sendLLMMessage(params: ServiceSendLLMMessageParams) { + async sendLLMMessage(params: ServiceSendLLMMessageParams) { const { onText, onFinalMessage, onError, onAbort, modelSelection, ...proxyParams } = params; // throw an error if no model/provider selected (this should usually never be reached, the UI should check this first, but might happen in cases like Apply where we haven't built much UI/checks yet, good practice to have check logic on backend) @@ -116,6 +118,14 @@ export class LLMMessageService extends Disposable implements ILLMMessageService return null } + // Check rate limit before making LLM call + try { + await this.rateLimiterService.checkLLMRateLimit(); + } catch (error) { + onError({ message: `Rate limit check failed: ${error}`, fullError: error }); + return null; + } + const { settingsOfProvider, } = this.voidSettingsService.state const mcpTools = this.mcpService.getMCPTools() diff --git a/src/vs/workbench/contrib/void/common/voidSettingsTypes.ts b/src/vs/workbench/contrib/void/common/voidSettingsTypes.ts index 549b6534024..cea78d79eab 100644 --- a/src/vs/workbench/contrib/void/common/voidSettingsTypes.ts +++ b/src/vs/workbench/contrib/void/common/voidSettingsTypes.ts @@ -455,6 +455,7 @@ export type GlobalSettings = { isOnboardingComplete: boolean; disableSystemMessage: boolean; autoAcceptLLMChanges: boolean; + maxRequestsPerMinute: number; } export const defaultGlobalSettings: GlobalSettings = { @@ -471,6 +472,7 @@ export const defaultGlobalSettings: GlobalSettings = { isOnboardingComplete: false, disableSystemMessage: false, autoAcceptLLMChanges: false, + maxRequestsPerMinute: 60, // Default to 60 requests per minute } export type GlobalSettingName = keyof GlobalSettings