gradio-app · aliabd · Aug 27, 2025 · Aug 27, 2025 · Aug 28, 2025 · Aug 28, 2025
diff --git a/.changeset/soft-baths-chew.md b/.changeset/soft-baths-chew.md
@@ -0,0 +1,6 @@
+---
+"@gradio/vibeeditor": minor
+"gradio": minor
+---
+
+feat:Adding images to vibe editor 
diff --git a/gradio/data_classes.py b/gradio/data_classes.py
@@ -127,6 +127,7 @@ class ResetBody(BaseModel):
 
 class VibeEditBody(BaseModel):
     prompt: str
+    files: list[FileData] = []
 
 
 class VibeCodeBody(BaseModel):

diff --git a/gradio/routes.py b/gradio/routes.py
@@ -2003,7 +2003,54 @@ async def vibe_edit(body: VibeEditBody):
             client = InferenceClient()
 
             content = ""
-            prompt = f"""
+            system_prompt = load_system_prompt()
+
+            has_images = len(body.files) > 0
+
+            if has_images:
+                # Use GLM-4.5V model for image processing
+                model = "zai-org/GLM-4.5V"
+
+                image_messages = []
+                for file in body.files:
+                    if file.mime_type and "image" in file.mime_type:
+                        import base64
+
+                        with open(file.path, "rb") as img_file:
+                            img_data = base64.b64encode(img_file.read()).decode("utf-8")
+                            image_messages.append(
+                                {
+                                    "type": "image_url",
+                                    "image_url": {
+                                        "url": f"data:{file.mime_type};base64,{img_data}"
+                                    },
+                                }
+                            )
+
+                prompt = f"""
+You are a code generator for Gradio apps. Given the following existing code, prompt, and images, return the full new code.
+Existing code:
+```python
+{original_code}
+```
+
+Prompt:
+{body.prompt}
+
+Please analyze the provided images and generate code based on the visual content and the text prompt.
+"""
+
+                messages = [
+                    {"role": "system", "content": system_prompt},
+                    {
+                        "role": "user",
+                        "content": [{"type": "text", "text": prompt}] + image_messages,
+                    },
+                ]
+            else:
+                # Use GPT-OSS for text-only prompts
+                model = "openai/gpt-oss-120b"
+                prompt = f"""
 You are a code generator for Gradio apps. Given the following existing code and prompt, return the full new code.
 Existing code:
 ```python
@@ -2012,15 +2059,15 @@ async def vibe_edit(body: VibeEditBody):
 
 Prompt:
 {body.prompt}"""
-            system_prompt = load_system_prompt()
+                messages = [
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": prompt},
+                ]
             content = (
                 client.chat_completion(
-                    model="openai/gpt-oss-120b",
-                    messages=[
-                        {"role": "system", "content": system_prompt},
-                        {"role": "user", "content": prompt},
-                    ],
-                    max_tokens=1000,
+                    model=model,
+                    messages=messages,
+                    max_tokens=3000,
                 )
                 .choices[0]
                 .message.content

diff --git a/js/vibeeditor/Index.svelte b/js/vibeeditor/Index.svelte
@@ -2,20 +2,24 @@
 	import { Client } from "@gradio/client";
 	import { onMount } from "svelte";
 	import { BaseCode } from "@gradio/code";
+	import { BaseMultimodalTextbox } from "@gradio/multimodaltextbox";
+	import type { FileData } from "@gradio/client";
 
 	export let app: Client;
 	export let root: string;
-	let prompt = "";
+	let prompt: { text: string; files: FileData[] } = { text: "", files: [] };
 	let editorWidth = 350;
 	let isResizing = false;
 	let editorElement: HTMLDivElement;
 	let activeTab: "chat" | "code" = "chat";
 
 	let codeValue = "";
 	let diffStats: { lines_added: number; lines_removed: number } | null = null;
+	let hasImages = false;
 
 	interface Message {
 		text: string;
+		files: FileData[];
 		isBot: boolean;
 		isPending?: boolean;
 		hash?: string;
@@ -24,25 +28,35 @@
 	let message_history: Message[] = [];
 
 	const submit = (): void => {
-		if (prompt.trim() === "") return;
+		if (prompt.text.trim() === "" && prompt.files.length === 0) return;
 
 		// Clear diff stats when submitting new prompt
 		diffStats = null;
 
 		const userMessageIndex = message_history.length;
-		message_history = [...message_history, { text: prompt, isBot: false }];
+		message_history = [
+			...message_history,
+			{ text: prompt.text, files: prompt.files, isBot: false }
+		];
 
 		const botMessageIndex = message_history.length;
 		message_history = [
 			...message_history,
-			{ text: "Working...", isBot: true, isPending: true }
+			{ text: "Working...", files: [], isBot: true, isPending: true }
 		];
 
-		const userPrompt = prompt;
-		prompt = "";
+		const userPrompt = prompt.text;
+		const userFiles = prompt.files;
+		prompt = { text: "", files: [] };
+
+		// Check if there are images to determine which model to use
+		hasImages = userFiles.some(
+			(file) => file.mime_type && file.mime_type.includes("image")
+		);
 
 		const post = app.post_data(`${root}/gradio_api/vibe-edit/`, {
-			prompt: userPrompt
+			prompt: userPrompt,
+			files: userFiles
 		});
 		post
 			.then(([response, status_code]) => {
@@ -70,6 +84,7 @@
 					index === botMessageIndex
 						? {
 								text: responseData.reasoning ? responseData.reasoning : "Done.",
+								files: [],
 								isBot: true,
 								isPending: false
 							}
@@ -79,7 +94,12 @@
 			.catch((error) => {
 				message_history = message_history.map((msg, index) =>
 					index === botMessageIndex
-						? { text: "Error occurred.", isBot: true, isPending: false }
+						? {
+								text: "Error occurred.",
+								files: [],
+								isBot: true,
+								isPending: false
+							}
 						: msg
 				);
 			});
@@ -96,7 +116,7 @@
 			diffStats = null;
 
 			const messageToUndo = message_history[messageIndex];
-			prompt = messageToUndo.text;
+			prompt = { text: messageToUndo.text, files: messageToUndo.files };
 
 			message_history = message_history.slice(0, messageIndex);
 		} catch (error) {
@@ -222,7 +242,18 @@
 						class:user-message={!message.isBot}
 					>
 						<div class="message-content">
-							<span class="message-text">{message.text}</span>
+							<div class="message-text">
+								<span>{message.text}</span>
+								{#if message.files && message.files.length > 0}
+									<div class="message-files">
+										{#each message.files as file}
+											{#if file.mime_type && file.mime_type.includes("image")}
+												<img src={file.url} alt="" class="message-image" />
+											{/if}
+										{/each}
+									</div>
+								{/if}
+							</div>
 							{#if !message.isBot && message.hash && !message.isPending}
 								<button
 									class="undo-button"
@@ -263,22 +294,36 @@
 	</div>
 
 	<div class="input-section">
-		<div class="powered-by">Powered by: <code>gpt-oss</code></div>
-		<textarea
-			on:keydown={(e) => {
-				if (e.key === "Enter" && !e.shiftKey) {
-					e.preventDefault();
-					submit();
-				}
-			}}
+		<div class="powered-by">
+			Powered by: <a
+				style="text-decoration: underline;"
+				href={hasImages
+					? "https://hf.co/zai-org/GLM-4.5V"
+					: "https://hf.co/openai/gpt-oss-120b"}
+				target="_blank">{hasImages ? "GLM-4.5V" : "gpt-oss"}</a
+			>
+		</div>
+		<BaseMultimodalTextbox
 			bind:value={prompt}
 			placeholder="What can I add or change?"
-			class="prompt-input"
+			lines={1}
+			max_lines={10}
+			file_types={["image"]}
+			file_count="multiple"
+			sources={["upload"]}
+			submit_btn={false}
+			show_label={false}
+			label=""
+			i18n={(value) => value || ""}
+			waveform_settings={{}}
+			{root}
+			upload={(...args) => app.upload(...args)}
+			stream_handler={(...args) => app.stream(...args)}
 		/>
 		<button
 			on:click={submit}
 			class="submit-button"
-			disabled={prompt.trim() === ""}
+			disabled={prompt.text.trim() === "" && prompt.files.length === 0}
 		>
 			Send
 		</button>
@@ -433,6 +478,20 @@
 		flex: 1;
 	}
 
+	.message-files {
+		margin-top: 8px;
+		display: flex;
+		flex-direction: column;
+		gap: 4px;
+	}
+
+	.message-image {
+		max-width: 200px;
+		max-height: 150px;
+		border-radius: var(--radius-sm);
+		object-fit: cover;
+	}
+
 	.undo-button {
 		background: var(--button-secondary-background-fill);
 		color: var(--button-secondary-text-color);
@@ -466,24 +525,6 @@
 		gap: 12px;
 	}
 
-	.prompt-input {
-		width: 100%;
-		min-height: 80px;
-		background: var(--input-background-fill);
-		border: 1px solid var(--border-color-primary);
-		border-radius: var(--input-radius);
-		padding: 12px;
-		resize: vertical;
-		outline: none;
-		font-family: inherit;
-		font-size: 14px;
-		color: var(--body-text-color);
-	}
-
-	.prompt-input:focus {
-		border-color: var(--color-accent);
-	}
-
 	.submit-button {
 		background: var(--button-primary-background-fill);
 		color: var(--button-primary-text-color);
@@ -526,4 +567,8 @@
 	.diff-stats .removed {
 		color: #ef4444;
 	}
+
+	:global(.upload-button) {
+		margin-right: 0.5rem !important;
+	}
 </style>