diff --git a/src/vs/workbench/contrib/void/browser/autocompleteService.ts b/src/vs/workbench/contrib/void/browser/autocompleteService.ts
index 22c86eb6afc..366292469be 100644
--- a/src/vs/workbench/contrib/void/browser/autocompleteService.ts
+++ b/src/vs/workbench/contrib/void/browser/autocompleteService.ts
@@ -795,67 +795,71 @@ export class AutocompleteService extends Disposable implements IAutocompleteServ
 		const modelSelectionOptions = modelSelection ? this._settingsService.state.optionsOfModelSelection[featureName][modelSelection.providerName]?.[modelSelection.modelName] : undefined
 
 		// set parameters of `newAutocompletion` appropriately
-		newAutocompletion.llmPromise = new Promise((resolve, reject) => {
-
-			const requestId = this._llmMessageService.sendLLMMessage({
-				messagesType: 'FIMMessage',
-				messages: this._convertToLLMMessageService.prepareFIMMessage({
-					messages: {
-						prefix: llmPrefix,
-						suffix: llmSuffix,
-						stopTokens: stopTokens,
-					}
-				}),
-				modelSelection,
-				modelSelectionOptions,
-				overridesOfModel,
-				logging: { loggingName: 'Autocomplete' },
-				onText: () => { }, // unused in FIMMessage
-				// onText: async ({ fullText, newText }) => {
-
-				// 	newAutocompletion.insertText = fullText
-
-				// 	// count newlines in newText
-				// 	const numNewlines = newText.match(/\n|\r\n/g)?.length || 0
-				// 	newAutocompletion._newlineCount += numNewlines
-
-				// 	// if too many newlines, resolve up to last newline
-				// 	if (newAutocompletion._newlineCount > 10) {
-				// 		const lastNewlinePos = fullText.lastIndexOf('\n')
-				// 		newAutocompletion.insertText = fullText.substring(0, lastNewlinePos)
-				// 		resolve(newAutocompletion.insertText)
-				// 		return
-				// 	}
-
-				// 	// if (!getAutocompletionMatchup({ prefix: this._lastPrefix, autocompletion: newAutocompletion })) {
-				// 	// 	reject('LLM response did not match user\'s text.')
-				// 	// }
-				// },
-				onFinalMessage: ({ fullText }) => {
-
-					// console.log('____res: ', JSON.stringify(newAutocompletion.insertText))
-
-					newAutocompletion.endTime = Date.now()
-					newAutocompletion.status = 'finished'
-					const [text, _] = extractCodeFromRegular({ text: fullText, recentlyAddedTextLen: 0 })
-					newAutocompletion.insertText = processStartAndEndSpaces(text)
-
-					// handle special case for predicting starting on the next line, add a newline character
-					if (newAutocompletion.type === 'multi-line-start-on-next-line') {
-						newAutocompletion.insertText = _ln + newAutocompletion.insertText
-					}
-
-					resolve(newAutocompletion.insertText)
-
-				},
-				onError: ({ message }) => {
-					newAutocompletion.endTime = Date.now()
-					newAutocompletion.status = 'error'
-					reject(message)
-				},
-				onAbort: () => { reject('Aborted autocomplete') },
-			})
-			newAutocompletion.requestId = requestId
+		newAutocompletion.llmPromise = new Promise(async (resolve, reject) => {
+
+			try {
+				const requestId = await this._llmMessageService.sendLLMMessage({
+					messagesType: 'FIMMessage',
+					messages: this._convertToLLMMessageService.prepareFIMMessage({
+						messages: {
+							prefix: llmPrefix,
+							suffix: llmSuffix,
+							stopTokens: stopTokens,
+						}
+					}),
+					modelSelection,
+					modelSelectionOptions,
+					overridesOfModel,
+					logging: { loggingName: 'Autocomplete' },
+					onText: () => { }, // unused in FIMMessage
+					// onText: async ({ fullText, newText }) => {
+
+					// 	newAutocompletion.insertText = fullText
+
+					// 	// count newlines in newText
+					// 	const numNewlines = newText.match(/\n|\r\n/g)?.length || 0
+					// 	newAutocompletion._newlineCount += numNewlines
+
+					// 	// if too many newlines, resolve up to last newline
+					// 	if (newAutocompletion._newlineCount > 10) {
+					// 		const lastNewlinePos = fullText.lastIndexOf('\n')
+					// 		newAutocompletion.insertText = fullText.substring(0, lastNewlinePos)
+					// 		resolve(newAutocompletion.insertText)
+					// 		return
+					// 	}
+
+					// 	// if (!getAutocompletionMatchup({ prefix: this._lastPrefix, autocompletion: newAutocompletion })) {
+					// 	// 	reject('LLM response did not match user\'s text.')
+					// 	// }
+					// },
+					onFinalMessage: ({ fullText }) => {
+
+						// console.log('____res: ', JSON.stringify(newAutocompletion.insertText))
+
+						newAutocompletion.endTime = Date.now()
+						newAutocompletion.status = 'finished'
+						const [text, _] = extractCodeFromRegular({ text: fullText, recentlyAddedTextLen: 0 })
+						newAutocompletion.insertText = processStartAndEndSpaces(text)
+
+						// handle special case for predicting starting on the next line, add a newline character
+						if (newAutocompletion.type === 'multi-line-start-on-next-line') {
+							newAutocompletion.insertText = _ln + newAutocompletion.insertText
+						}
+
+						resolve(newAutocompletion.insertText)
+
+					},
+					onError: ({ message }) => {
+						newAutocompletion.endTime = Date.now()
+						newAutocompletion.status = 'error'
+						reject(message)
+					},
+					onAbort: () => { reject('Aborted autocomplete') },
+				})
+				newAutocompletion.requestId = requestId
+			} catch (error) {
+				reject(error)
+			}
 
 			// if the request hasnt resolved in TIMEOUT_TIME seconds, reject it
 			setTimeout(() => {
diff --git a/src/vs/workbench/contrib/void/browser/chatThreadService.ts b/src/vs/workbench/contrib/void/browser/chatThreadService.ts
index 30f38f10ba8..09fb2693195 100644
--- a/src/vs/workbench/contrib/void/browser/chatThreadService.ts
+++ b/src/vs/workbench/contrib/void/browser/chatThreadService.ts
@@ -799,33 +799,38 @@ class ChatThreadService extends Disposable implements IChatThreadService {
 					| { type: 'llmError', error?: { message: string; fullError: Error | null; } }
 					| { type: 'llmAborted' }
 
-				let resMessageIsDonePromise: (res: ResTypes) => void // resolves when user approves this tool use (or if tool doesn't require approval)
+				let resMessageIsDonePromise: (res: ResTypes) => void = () => { } // resolves when user approves this tool use (or if tool doesn't require approval)
 				const messageIsDonePromise = new Promise<ResTypes>((res, rej) => { resMessageIsDonePromise = res })
 
-				const llmCancelToken = this._llmMessageService.sendLLMMessage({
-					messagesType: 'chatMessages',
-					chatMode,
-					messages: messages,
-					modelSelection,
-					modelSelectionOptions,
-					overridesOfModel,
-					logging: { loggingName: `Chat - ${chatMode}`, loggingExtras: { threadId, nMessagesSent, chatMode } },
-					separateSystemMessage: separateSystemMessage,
-					onText: ({ fullText, fullReasoning, toolCall }) => {
-						this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: fullText, reasoningSoFar: fullReasoning, toolCallSoFar: toolCall ?? null }, interrupt: Promise.resolve(() => { if (llmCancelToken) this._llmMessageService.abort(llmCancelToken) }) })
-					},
-					onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, }) => {
-						resMessageIsDonePromise({ type: 'llmDone', toolCall, info: { fullText, fullReasoning, anthropicReasoning } }) // resolve with tool calls
-					},
-					onError: async (error) => {
-						resMessageIsDonePromise({ type: 'llmError', error: error })
-					},
-					onAbort: () => {
-						// stop the loop to free up the promise, but don't modify state (already handled by whatever stopped it)
-						resMessageIsDonePromise({ type: 'llmAborted' })
-						this._metricsService.capture('Agent Loop Done (Aborted)', { nMessagesSent, chatMode })
-					},
-				})
+				let llmCancelToken: string | null = null;
+				try {
+					llmCancelToken = await this._llmMessageService.sendLLMMessage({
+						messagesType: 'chatMessages',
+						chatMode,
+						messages: messages,
+						modelSelection,
+						modelSelectionOptions,
+						overridesOfModel,
+						logging: { loggingName: `Chat - ${chatMode}`, loggingExtras: { threadId, nMessagesSent, chatMode } },
+						separateSystemMessage: separateSystemMessage,
+						onText: ({ fullText, fullReasoning, toolCall }) => {
+							this._setStreamState(threadId, { isRunning: 'LLM', llmInfo: { displayContentSoFar: fullText, reasoningSoFar: fullReasoning, toolCallSoFar: toolCall ?? null }, interrupt: Promise.resolve(() => { if (llmCancelToken) this._llmMessageService.abort(llmCancelToken) }) })
+						},
+						onFinalMessage: async ({ fullText, fullReasoning, toolCall, anthropicReasoning, }) => {
+							resMessageIsDonePromise({ type: 'llmDone', toolCall, info: { fullText, fullReasoning, anthropicReasoning } }) // resolve with tool calls
+						},
+						onError: async (error) => {
+							resMessageIsDonePromise({ type: 'llmError', error: error })
+						},
+						onAbort: () => {
+							// stop the loop to free up the promise, but don't modify state (already handled by whatever stopped it)
+							resMessageIsDonePromise({ type: 'llmAborted' })
+							this._metricsService.capture('Agent Loop Done (Aborted)', { nMessagesSent, chatMode })
+						},
+					})
+				} catch (error) {
+					resMessageIsDonePromise({ type: 'llmError', error: { message: `Rate limit check failed: ${error}`, fullError: error } })
+				}
 
 				// mark as streaming
 				if (!llmCancelToken) {
diff --git a/src/vs/workbench/contrib/void/browser/editCodeService.ts b/src/vs/workbench/contrib/void/browser/editCodeService.ts
index 80ee4bc9925..7cb43c18dd0 100644
--- a/src/vs/workbench/contrib/void/browser/editCodeService.ts
+++ b/src/vs/workbench/contrib/void/browser/editCodeService.ts
@@ -1503,54 +1503,59 @@ class EditCodeService extends Disposable implements IEditCodeService {
 				let aborted = false
 				let weAreAborting = false
 
+				try {
+					streamRequestIdRef.current = await this._llmMessageService.sendLLMMessage({
+						messagesType: 'chatMessages',
+						logging: { loggingName: `Edit (Writeover) - ${from}` },
+						messages,
+						modelSelection,
+						modelSelectionOptions,
+						overridesOfModel,
+						separateSystemMessage,
+						chatMode: null, // not chat
+						onText: (params) => {
+							const { fullText: fullText_ } = params
+							const newText_ = fullText_.substring(fullTextSoFar.length, Infinity)
+
+							const newText = prevIgnoredSuffix + newText_ // add the previously ignored suffix because it's no longer the suffix!
+							fullTextSoFar += newText // full text, including ```, etc
+
+							const [croppedText, deltaCroppedText, croppedSuffix] = extractText(fullTextSoFar, newText.length)
+							const { endLineInLlmTextSoFar } = this._writeStreamedDiffZoneLLMText(uri, originalCode, croppedText, deltaCroppedText, latestStreamLocationMutable)
+							diffZone._streamState.line = (diffZone.startLine - 1) + endLineInLlmTextSoFar // change coordinate systems from originalCode to full file
+
+							this._refreshStylesAndDiffsInURI(uri)
+
+							prevIgnoredSuffix = croppedSuffix
+						},
+						onFinalMessage: (params) => {
+							const { fullText } = params
+							// console.log('DONE! FULL TEXT\n', extractText(fullText), diffZone.startLine, diffZone.endLine)
+							// at the end, re-write whole thing to make sure no sync errors
+							const [croppedText, _1, _2] = extractText(fullText, 0)
+							this._writeURIText(uri, croppedText,
+								{ startLineNumber: diffZone.startLine, startColumn: 1, endLineNumber: diffZone.endLine, endColumn: Number.MAX_SAFE_INTEGER }, // 1-indexed
+								{ shouldRealignDiffAreas: true }
+							)
 
-				streamRequestIdRef.current = this._llmMessageService.sendLLMMessage({
-					messagesType: 'chatMessages',
-					logging: { loggingName: `Edit (Writeover) - ${from}` },
-					messages,
-					modelSelection,
-					modelSelectionOptions,
-					overridesOfModel,
-					separateSystemMessage,
-					chatMode: null, // not chat
-					onText: (params) => {
-						const { fullText: fullText_ } = params
-						const newText_ = fullText_.substring(fullTextSoFar.length, Infinity)
-
-						const newText = prevIgnoredSuffix + newText_ // add the previously ignored suffix because it's no longer the suffix!
-						fullTextSoFar += newText // full text, including ```, etc
-
-						const [croppedText, deltaCroppedText, croppedSuffix] = extractText(fullTextSoFar, newText.length)
-						const { endLineInLlmTextSoFar } = this._writeStreamedDiffZoneLLMText(uri, originalCode, croppedText, deltaCroppedText, latestStreamLocationMutable)
-						diffZone._streamState.line = (diffZone.startLine - 1) + endLineInLlmTextSoFar // change coordinate systems from originalCode to full file
-
-						this._refreshStylesAndDiffsInURI(uri)
+							onDone()
+							resMessageDonePromise()
+						},
+						onError: (e) => {
+							onError(e)
+						},
+						onAbort: () => {
+							if (weAreAborting) return
+							// stop the loop to free up the promise, but don't modify state (already handled by whatever stopped it)
+							aborted = true
+							resMessageDonePromise()
+						},
+					})
+				} catch (error) {
+					onError({ message: `Rate limit check failed: ${error}`, fullError: error })
+					return
+				}
 
-						prevIgnoredSuffix = croppedSuffix
-					},
-					onFinalMessage: (params) => {
-						const { fullText } = params
-						// console.log('DONE! FULL TEXT\n', extractText(fullText), diffZone.startLine, diffZone.endLine)
-						// at the end, re-write whole thing to make sure no sync errors
-						const [croppedText, _1, _2] = extractText(fullText, 0)
-						this._writeURIText(uri, croppedText,
-							{ startLineNumber: diffZone.startLine, startColumn: 1, endLineNumber: diffZone.endLine, endColumn: Number.MAX_SAFE_INTEGER }, // 1-indexed
-							{ shouldRealignDiffAreas: true }
-						)
-
-						onDone()
-						resMessageDonePromise()
-					},
-					onError: (e) => {
-						onError(e)
-					},
-					onAbort: () => {
-						if (weAreAborting) return
-						// stop the loop to free up the promise, but don't modify state (already handled by whatever stopped it)
-						aborted = true
-						resMessageDonePromise()
-					},
-				})
 				// should never happen, just for safety
 				if (streamRequestIdRef.current === null) { return }
 
@@ -1950,7 +1955,7 @@ class EditCodeService extends Disposable implements IEditCodeService {
 					this._refreshStylesAndDiffsInURI(uri)
 				}
 
-				streamRequestIdRef.current = this._llmMessageService.sendLLMMessage({
+				streamRequestIdRef.current = await this._llmMessageService.sendLLMMessage({
 					messagesType: 'chatMessages',
 					logging: { loggingName: `Edit (Search/Replace) - ${from}` },
 					messages,
diff --git a/src/vs/workbench/contrib/void/browser/rateLimiter.contribution.ts b/src/vs/workbench/contrib/void/browser/rateLimiter.contribution.ts
new file mode 100644
index 00000000000..8bbaf215047
--- /dev/null
+++ b/src/vs/workbench/contrib/void/browser/rateLimiter.contribution.ts
@@ -0,0 +1,6 @@
+import { registerSingleton, InstantiationType } from '../../../../platform/instantiation/common/extensions.js';
+import { IRateLimiterService } from '../common/rateLimiterService.js';
+import { RateLimiterService } from '../common/rateLimiterService.js';
+
+// Register the rate limiter service
+registerSingleton(IRateLimiterService, RateLimiterService, InstantiationType.Delayed);
diff --git a/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx b/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx
index c8ba4da475c..2169c1b4b8a 100644
--- a/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx
+++ b/src/vs/workbench/contrib/void/browser/react/src/void-settings-tsx/Settings.tsx
@@ -1387,6 +1387,21 @@ export const Settings = () => {
 
 							{/* General section */}
 							<div className={`${shouldShowTab('general') ? `` : 'hidden'} flex flex-col gap-12`}>
+								{/* API Rate Limiting */}
+								<div>
+									<h2 className='text-3xl mb-2'>API Rate Limiting</h2>
+									<h4 className='text-void-fg-3 mb-4'>
+										Control the maximum number of API requests per minute to manage usage and costs.
+									</h4>
+
+									<ErrorBoundary>
+										<div className='flex items-center gap-x-2 my-2'>
+											<span className='text-void-fg-3 text-xs'>Max Requests per Minute:</span>
+											<RateLimitSlider />
+										</div>
+									</ErrorBoundary>
+								</div>
+
 								{/* One-Click Switch section */}
 								<div>
 									<ErrorBoundary>
@@ -1531,3 +1546,32 @@ Use Model Context Protocol to provide Agent mode with more tools.
 		</div>
 	);
 }
+
+const RateLimitSlider = () => {
+	const accessor = useAccessor()
+	const voidSettingsService = accessor.get('IVoidSettingsService')
+	const settingsState = useSettingsState()
+
+
+	// console.log('RateLimitSlider value:', settingsState.globalSettings.maxRequestsPerMinute)
+
+	const onChangeValue = useCallback((newVal: number) => {
+		voidSettingsService.setGlobalSetting('maxRequestsPerMinute', newVal)
+	}, [voidSettingsService])
+
+	return (
+		<div className='flex items-center gap-x-2 my-2'>
+			<input
+				type="range"
+				min="1"
+				max="120"
+				value={settingsState.globalSettings.maxRequestsPerMinute}
+				onChange={(e) => onChangeValue(parseInt(e.target.value))}
+				className="w-32 bg-void-bg-2"
+			/>
+			<span className='text-void-fg-3 text-xs pointer-events-none'>
+				{settingsState.globalSettings.maxRequestsPerMinute} requests/min
+			</span>
+		</div>
+	)
+}
diff --git a/src/vs/workbench/contrib/void/browser/void.contribution.ts b/src/vs/workbench/contrib/void/browser/void.contribution.ts
index 35c89184c08..4292aa1ac7f 100644
--- a/src/vs/workbench/contrib/void/browser/void.contribution.ts
+++ b/src/vs/workbench/contrib/void/browser/void.contribution.ts
@@ -14,6 +14,8 @@ import './sidebarPane.js'
 // register quick edit (Ctrl+K)
 import './quickEditActions.js'
 
+// register rate limiter service
+import './rateLimiter.contribution.js'
 
 // register Autocomplete
 import './autocompleteService.js'
diff --git a/src/vs/workbench/contrib/void/browser/voidSCMService.ts b/src/vs/workbench/contrib/void/browser/voidSCMService.ts
index e93c4882935..a026c351cb8 100644
--- a/src/vs/workbench/contrib/void/browser/voidSCMService.ts
+++ b/src/vs/workbench/contrib/void/browser/voidSCMService.ts
@@ -135,32 +135,36 @@ class GenerateCommitMessageService extends Disposable implements IGenerateCommit
 
 	/** LLM Functions */
 
-	private sendLLMMessage(messages: LLMChatMessage[], separateSystemMessage: string, modelOptions: ModelOptions): Promise<string> {
-		return new Promise((resolve, reject) => {
-
-			this.llmRequestId = this.llmMessageService.sendLLMMessage({
-				messagesType: 'chatMessages',
-				messages,
-				separateSystemMessage,
-				chatMode: null,
-				modelSelection: modelOptions.modelSelection,
-				modelSelectionOptions: modelOptions.modelSelectionOptions,
-				overridesOfModel: modelOptions.overridesOfModel,
-				onText: () => { },
-				onFinalMessage: (params: { fullText: string }) => {
-					const match = params.fullText.match(/<output>([\s\S]*?)<\/output>/i)
-					const commitMessage = match ? match[1].trim() : ''
-					resolve(commitMessage)
-				},
-				onError: (error) => {
-					console.error(error)
-					reject(error)
-				},
-				onAbort: () => {
-					reject(new CancellationError())
-				},
-				logging: { loggingName: 'VoidSCM - Commit Message' },
-			})
+	private async sendLLMMessage(messages: LLMChatMessage[], separateSystemMessage: string, modelOptions: ModelOptions): Promise<string> {
+		return new Promise(async (resolve, reject) => {
+
+			try {
+				this.llmRequestId = await this.llmMessageService.sendLLMMessage({
+					messagesType: 'chatMessages',
+					messages,
+					separateSystemMessage,
+					chatMode: null,
+					modelSelection: modelOptions.modelSelection,
+					modelSelectionOptions: modelOptions.modelSelectionOptions,
+					overridesOfModel: modelOptions.overridesOfModel,
+					onText: () => { },
+					onFinalMessage: (params: { fullText: string }) => {
+						const match = params.fullText.match(/<output>([\s\S]*?)<\/output>/i)
+						const commitMessage = match ? match[1].trim() : ''
+						resolve(commitMessage)
+					},
+					onError: (error) => {
+						console.error(error)
+						reject(error)
+					},
+					onAbort: () => {
+						reject(new CancellationError())
+					},
+					logging: { loggingName: 'VoidSCM - Commit Message' },
+				})
+			} catch (error) {
+				reject(error)
+			}
 		})
 	}
 
diff --git a/src/vs/workbench/contrib/void/common/rateLimiterService.ts b/src/vs/workbench/contrib/void/common/rateLimiterService.ts
new file mode 100644
index 00000000000..b8a08172af2
--- /dev/null
+++ b/src/vs/workbench/contrib/void/common/rateLimiterService.ts
@@ -0,0 +1,92 @@
+import { createDecorator } from '../../../../platform/instantiation/common/instantiation.js';
+import { IVoidSettingsService } from './voidSettingsService.js';
+import { ILogService } from '../../../../platform/log/common/log.js';
+import { Disposable } from '../../../../base/common/lifecycle.js';
+import { INotificationService } from '../../../../platform/notification/common/notification.js';
+
+export const IRateLimiterService = createDecorator<IRateLimiterService>('rateLimiterService');
+
+export interface IRateLimiterService {
+	readonly _serviceBrand: undefined;
+	checkLLMRateLimit(): Promise<boolean>;
+	// testRateLimit(): Promise<void>;		//For testing
+}
+
+export class RateLimiterService extends Disposable implements IRateLimiterService {
+	declare readonly _serviceBrand: undefined;
+
+	private llmRequestTimestamps: number[] = [];
+	private readonly windowSize = 60 * 1000; // 1 minute in milliseconds
+
+	constructor(
+		@IVoidSettingsService private readonly voidSettingsService: IVoidSettingsService,
+		@ILogService private readonly logService: ILogService,
+		@INotificationService private readonly notificationService: INotificationService
+	) {
+		super();
+		this.logService.info('[RateLimiterService] LLM Rate Limiter Service initialized');
+
+		// Wait for settings to be initialized
+		this.voidSettingsService.waitForInitState.then(() => {
+			this.logService.info(`[RateLimiterService] Initial maxLLMRequests: ${this.voidSettingsService.state.globalSettings.maxRequestsPerMinute}`);
+		}).catch(error => {
+			this.logService.error('[RateLimiterService] Error waiting for settings:', error);
+		});
+	}
+
+	async checkLLMRateLimit(): Promise<boolean> {
+		// Wait for settings to be initialized
+		await this.voidSettingsService.waitForInitState;
+
+		const maxRequests = this.voidSettingsService.state.globalSettings.maxRequestsPerMinute;
+
+		this.logService.info(`[RateLimiterService] Current maxLLMRequests: ${maxRequests}`);
+		this.logService.info(`[RateLimiterService] Current LLM request count: ${this.llmRequestTimestamps.length}`);
+
+		// Clean up old timestamps
+		const now = Date.now();
+		this.llmRequestTimestamps = this.llmRequestTimestamps.filter(timestamp => now - timestamp < this.windowSize);
+		this.logService.info(`[RateLimiterService] LLM request count after cleanup: ${this.llmRequestTimestamps.length}`);
+
+		// Check if we've hit the rate limit
+		if (this.llmRequestTimestamps.length >= maxRequests) {
+			const oldestTimestamp = this.llmRequestTimestamps[0];
+			const timeToWait = this.windowSize - (now - oldestTimestamp);
+
+			this.logService.warn(`[RateLimiterService] LLM rate limit reached. Waiting ${timeToWait}ms before next request.`);
+			this.logService.info(`[RateLimiterService] Oldest LLM request: ${new Date(oldestTimestamp).toISOString()}`);
+			this.logService.info(`[RateLimiterService] Current time: ${new Date(now).toISOString()}`);
+
+			// Notify user
+			this.notificationService.info(`LLM rate limit reached. Waiting ${Math.ceil(timeToWait / 1000)} seconds before next request.`);
+
+			// Wait until we can make another request
+			await new Promise(resolve => setTimeout(resolve, timeToWait));
+
+			this.logService.info('[RateLimiterService] Wait period completed, proceeding with LLM request');
+		}
+
+		// Add current timestamp
+		this.llmRequestTimestamps.push(now);
+		this.logService.info(`[RateLimiterService] Added new LLM request timestamp. Total LLM requests: ${this.llmRequestTimestamps.length}`);
+
+		return true;
+	}
+	// Commented out for now, but can be used for testing
+	// async testRateLimit(): Promise<void> {
+	// 	this.logService.info('[RateLimiterService] Starting LLM rate limit test...');
+
+	// 	// Simulate multiple rapid LLM requests
+	// 	const promises = [];
+	// 	for (let i = 0; i < 5; i++) {
+	// 		promises.push(this.checkLLMRateLimit());
+	// 	}
+
+	// 	try {
+	// 		await Promise.all(promises);
+	// 		this.logService.info('[RateLimiterService] LLM rate limit test completed successfully');
+	// 	} catch (error) {
+	// 		this.logService.error('[RateLimiterService] LLM rate limit test failed:', error);
+	// 	}
+	// }
+}
diff --git a/src/vs/workbench/contrib/void/common/sendLLMMessageService.ts b/src/vs/workbench/contrib/void/common/sendLLMMessageService.ts
index 7618e7365ac..5d09d60ca9f 100644
--- a/src/vs/workbench/contrib/void/common/sendLLMMessageService.ts
+++ b/src/vs/workbench/contrib/void/common/sendLLMMessageService.ts
@@ -14,13 +14,14 @@ import { Event } from '../../../../base/common/event.js';
 import { Disposable } from '../../../../base/common/lifecycle.js';
 import { IVoidSettingsService } from './voidSettingsService.js';
 import { IMCPService } from './mcpService.js';
+import { IRateLimiterService } from './rateLimiterService.js';
 
 // calls channel to implement features
 export const ILLMMessageService = createDecorator<ILLMMessageService>('llmMessageService');
 
 export interface ILLMMessageService {
 	readonly _serviceBrand: undefined;
-	sendLLMMessage: (params: ServiceSendLLMMessageParams) => string | null;
+	sendLLMMessage: (params: ServiceSendLLMMessageParams) => Promise<string | null>;
 	abort: (requestId: string) => void;
 	ollamaList: (params: ServiceModelListParams<OllamaModelResponse>) => void;
 	openAICompatibleList: (params: ServiceModelListParams<OpenaiCompatibleModelResponse>) => void;
@@ -63,6 +64,7 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
 		@IVoidSettingsService private readonly voidSettingsService: IVoidSettingsService,
 		// @INotificationService private readonly notificationService: INotificationService,
 		@IMCPService private readonly mcpService: IMCPService,
+		@IRateLimiterService private readonly rateLimiterService: IRateLimiterService,
 	) {
 		super()
 
@@ -100,7 +102,7 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
 
 	}
 
-	sendLLMMessage(params: ServiceSendLLMMessageParams) {
+	async sendLLMMessage(params: ServiceSendLLMMessageParams) {
 		const { onText, onFinalMessage, onError, onAbort, modelSelection, ...proxyParams } = params;
 
 		// throw an error if no model/provider selected (this should usually never be reached, the UI should check this first, but might happen in cases like Apply where we haven't built much UI/checks yet, good practice to have check logic on backend)
@@ -116,6 +118,14 @@ export class LLMMessageService extends Disposable implements ILLMMessageService
 			return null
 		}
 
+		// Check rate limit before making LLM call
+		try {
+			await this.rateLimiterService.checkLLMRateLimit();
+		} catch (error) {
+			onError({ message: `Rate limit check failed: ${error}`, fullError: error });
+			return null;
+		}
+
 		const { settingsOfProvider, } = this.voidSettingsService.state
 
 		const mcpTools = this.mcpService.getMCPTools()
diff --git a/src/vs/workbench/contrib/void/common/voidSettingsTypes.ts b/src/vs/workbench/contrib/void/common/voidSettingsTypes.ts
index 549b6534024..cea78d79eab 100644
--- a/src/vs/workbench/contrib/void/common/voidSettingsTypes.ts
+++ b/src/vs/workbench/contrib/void/common/voidSettingsTypes.ts
@@ -455,6 +455,7 @@ export type GlobalSettings = {
 	isOnboardingComplete: boolean;
 	disableSystemMessage: boolean;
 	autoAcceptLLMChanges: boolean;
+	maxRequestsPerMinute: number;
 }
 
 export const defaultGlobalSettings: GlobalSettings = {
@@ -471,6 +472,7 @@ export const defaultGlobalSettings: GlobalSettings = {
 	isOnboardingComplete: false,
 	disableSystemMessage: false,
 	autoAcceptLLMChanges: false,
+	maxRequestsPerMinute: 60, // Default to 60 requests per minute
 }
 
 export type GlobalSettingName = keyof GlobalSettings