Skip to content
Closed
6 changes: 6 additions & 0 deletions .changeset/soft-baths-chew.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"@gradio/vibeeditor": minor
"gradio": minor
---

feat:Adding images to vibe editor
1 change: 1 addition & 0 deletions gradio/data_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ class ResetBody(BaseModel):

class VibeEditBody(BaseModel):
prompt: str
files: list[FileData] = []


class VibeCodeBody(BaseModel):
Expand Down
63 changes: 55 additions & 8 deletions gradio/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2003,7 +2003,54 @@ async def vibe_edit(body: VibeEditBody):
client = InferenceClient()

content = ""
prompt = f"""
system_prompt = load_system_prompt()

has_images = len(body.files) > 0

if has_images:
# Use GLM-4.5V model for image processing
model = "zai-org/GLM-4.5V"

image_messages = []
for file in body.files:
if file.mime_type and "image" in file.mime_type:
import base64

with open(file.path, "rb") as img_file:
img_data = base64.b64encode(img_file.read()).decode("utf-8")
image_messages.append(
{
"type": "image_url",
"image_url": {
"url": f"data:{file.mime_type};base64,{img_data}"
},
}
)

prompt = f"""
You are a code generator for Gradio apps. Given the following existing code, prompt, and images, return the full new code.
Existing code:
```python
{original_code}
```

Prompt:
{body.prompt}

Please analyze the provided images and generate code based on the visual content and the text prompt.
"""

messages = [
{"role": "system", "content": system_prompt},
{
"role": "user",
"content": [{"type": "text", "text": prompt}] + image_messages,
},
]
else:
# Use GPT-OSS for text-only prompts
model = "openai/gpt-oss-120b"
prompt = f"""
You are a code generator for Gradio apps. Given the following existing code and prompt, return the full new code.
Existing code:
```python
Expand All @@ -2012,15 +2059,15 @@ async def vibe_edit(body: VibeEditBody):

Prompt:
{body.prompt}"""
system_prompt = load_system_prompt()
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
]
content = (
client.chat_completion(
model="openai/gpt-oss-120b",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
],
max_tokens=1000,
model=model,
messages=messages,
max_tokens=3000,
)
.choices[0]
.message.content
Expand Down
121 changes: 83 additions & 38 deletions js/vibeeditor/Index.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,24 @@
import { Client } from "@gradio/client";
import { onMount } from "svelte";
import { BaseCode } from "@gradio/code";
import { BaseMultimodalTextbox } from "@gradio/multimodaltextbox";
import type { FileData } from "@gradio/client";

export let app: Client;
export let root: string;
let prompt = "";
let prompt: { text: string; files: FileData[] } = { text: "", files: [] };
let editorWidth = 350;
let isResizing = false;
let editorElement: HTMLDivElement;
let activeTab: "chat" | "code" = "chat";

let codeValue = "";
let diffStats: { lines_added: number; lines_removed: number } | null = null;
let hasImages = false;

interface Message {
text: string;
files: FileData[];
isBot: boolean;
isPending?: boolean;
hash?: string;
Expand All @@ -24,25 +28,35 @@
let message_history: Message[] = [];

const submit = (): void => {
if (prompt.trim() === "") return;
if (prompt.text.trim() === "" && prompt.files.length === 0) return;

// Clear diff stats when submitting new prompt
diffStats = null;

const userMessageIndex = message_history.length;
message_history = [...message_history, { text: prompt, isBot: false }];
message_history = [
...message_history,
{ text: prompt.text, files: prompt.files, isBot: false }
];

const botMessageIndex = message_history.length;
message_history = [
...message_history,
{ text: "Working...", isBot: true, isPending: true }
{ text: "Working...", files: [], isBot: true, isPending: true }
];

const userPrompt = prompt;
prompt = "";
const userPrompt = prompt.text;
const userFiles = prompt.files;
prompt = { text: "", files: [] };

// Check if there are images to determine which model to use
hasImages = userFiles.some(
(file) => file.mime_type && file.mime_type.includes("image")
);

const post = app.post_data(`${root}/gradio_api/vibe-edit/`, {
prompt: userPrompt
prompt: userPrompt,
files: userFiles
});
post
.then(([response, status_code]) => {
Expand Down Expand Up @@ -70,6 +84,7 @@
index === botMessageIndex
? {
text: responseData.reasoning ? responseData.reasoning : "Done.",
files: [],
isBot: true,
isPending: false
}
Expand All @@ -79,7 +94,12 @@
.catch((error) => {
message_history = message_history.map((msg, index) =>
index === botMessageIndex
? { text: "Error occurred.", isBot: true, isPending: false }
? {
text: "Error occurred.",
files: [],
isBot: true,
isPending: false
}
: msg
);
});
Expand All @@ -96,7 +116,7 @@
diffStats = null;

const messageToUndo = message_history[messageIndex];
prompt = messageToUndo.text;
prompt = { text: messageToUndo.text, files: messageToUndo.files };

message_history = message_history.slice(0, messageIndex);
} catch (error) {
Expand Down Expand Up @@ -222,7 +242,18 @@
class:user-message={!message.isBot}
>
<div class="message-content">
<span class="message-text">{message.text}</span>
<div class="message-text">
<span>{message.text}</span>
{#if message.files && message.files.length > 0}
<div class="message-files">
{#each message.files as file}
{#if file.mime_type && file.mime_type.includes("image")}
<img src={file.url} alt="" class="message-image" />
{/if}
{/each}
</div>
{/if}
</div>
{#if !message.isBot && message.hash && !message.isPending}
<button
class="undo-button"
Expand Down Expand Up @@ -263,22 +294,36 @@
</div>

<div class="input-section">
<div class="powered-by">Powered by: <code>gpt-oss</code></div>
<textarea
on:keydown={(e) => {
if (e.key === "Enter" && !e.shiftKey) {
e.preventDefault();
submit();
}
}}
<div class="powered-by">
Powered by: <a
style="text-decoration: underline;"
href={hasImages
? "https://hf.co/zai-org/GLM-4.5V"
: "https://hf.co/openai/gpt-oss-120b"}
target="_blank">{hasImages ? "GLM-4.5V" : "gpt-oss"}</a
>
</div>
<BaseMultimodalTextbox
bind:value={prompt}
placeholder="What can I add or change?"
class="prompt-input"
lines={1}
max_lines={10}
file_types={["image"]}
file_count="multiple"
sources={["upload"]}
submit_btn={false}
show_label={false}
label=""
i18n={(value) => value || ""}
waveform_settings={{}}
{root}
upload={(...args) => app.upload(...args)}
stream_handler={(...args) => app.stream(...args)}
/>
<button
on:click={submit}
class="submit-button"
disabled={prompt.trim() === ""}
disabled={prompt.text.trim() === "" && prompt.files.length === 0}
>
Send
</button>
Expand Down Expand Up @@ -433,6 +478,20 @@
flex: 1;
}

.message-files {
margin-top: 8px;
display: flex;
flex-direction: column;
gap: 4px;
}

.message-image {
max-width: 200px;
max-height: 150px;
border-radius: var(--radius-sm);
object-fit: cover;
}

.undo-button {
background: var(--button-secondary-background-fill);
color: var(--button-secondary-text-color);
Expand Down Expand Up @@ -466,24 +525,6 @@
gap: 12px;
}

.prompt-input {
width: 100%;
min-height: 80px;
background: var(--input-background-fill);
border: 1px solid var(--border-color-primary);
border-radius: var(--input-radius);
padding: 12px;
resize: vertical;
outline: none;
font-family: inherit;
font-size: 14px;
color: var(--body-text-color);
}

.prompt-input:focus {
border-color: var(--color-accent);
}

.submit-button {
background: var(--button-primary-background-fill);
color: var(--button-primary-text-color);
Expand Down Expand Up @@ -526,4 +567,8 @@
.diff-stats .removed {
color: #ef4444;
}

:global(.upload-button) {
margin-right: 0.5rem !important;
}
</style>
Loading