From dc0692f4eb71a4f20eff77e44705da6f5ae51c30 Mon Sep 17 00:00:00 2001 From: aliabd Date: Wed, 27 Aug 2025 19:17:41 +0300 Subject: [PATCH 1/9] uploading images --- gradio/data_classes.py | 1 + gradio/routes.py | 61 ++++++++++++++-- js/vibeeditor/Index.svelte | 138 +++++++++++++++++++++++++++---------- 3 files changed, 155 insertions(+), 45 deletions(-) diff --git a/gradio/data_classes.py b/gradio/data_classes.py index 3e167baeb1..e17e087b85 100644 --- a/gradio/data_classes.py +++ b/gradio/data_classes.py @@ -128,6 +128,7 @@ class ResetBody(BaseModel): class VibeEditBody(BaseModel): prompt: str + files: list[FileData] = [] class VibeCodeBody(BaseModel): diff --git a/gradio/routes.py b/gradio/routes.py index 4a59ee9f01..8424db6a19 100644 --- a/gradio/routes.py +++ b/gradio/routes.py @@ -1969,7 +1969,54 @@ async def vibe_edit(body: VibeEditBody): client = InferenceClient() content = "" - prompt = f""" + system_prompt = load_system_prompt() + + + has_images = len(body.files) > 0 + + if has_images: + # Use ERNIE model for image processing + model = "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT" + + image_messages = [] + for file in body.files: + if file.mime_type and "image" in file.mime_type: + import base64 + with open(file.path, "rb") as img_file: + img_data = base64.b64encode(img_file.read()).decode('utf-8') + image_messages.append({ + "type": "image_url", + "image_url": { + "url": f"data:{file.mime_type};base64,{img_data}" + } + }) + + prompt = f""" +You are a code generator for Gradio apps. Given the following existing code, prompt, and images, return the full new code. +Existing code: +```python +{original_code} +``` + +Prompt: +{body.prompt} + +Please analyze the provided images and generate code based on the visual content and the text prompt. +""" + + messages = [ + {"role": "system", "content": system_prompt}, + { + "role": "user", + "content": [ + {"type": "text", "text": prompt} + ] + image_messages + } + ] + else: + # Use GPT-OSS for text-only prompts + model = "openai/gpt-oss-120b" + prompt = f""" You are a code generator for Gradio apps. Given the following existing code and prompt, return the full new code. Existing code: ```python @@ -1978,14 +2025,14 @@ async def vibe_edit(body: VibeEditBody): Prompt: {body.prompt}""" - system_prompt = load_system_prompt() + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt}, + ] content = ( client.chat_completion( - model="openai/gpt-oss-120b", - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": prompt}, - ], + model=model, + messages=messages, max_tokens=1000, ) .choices[0] diff --git a/js/vibeeditor/Index.svelte b/js/vibeeditor/Index.svelte index 6693864be3..2d9d9bbab6 100644 --- a/js/vibeeditor/Index.svelte +++ b/js/vibeeditor/Index.svelte @@ -2,10 +2,12 @@ import { Client } from "@gradio/client"; import { onMount } from "svelte"; import { BaseCode } from "@gradio/code"; + import { BaseMultimodalTextbox } from "@gradio/multimodaltextbox"; + import type { FileData } from "@gradio/client"; export let app: Client; export let root: string; - let prompt = ""; + let prompt: { text: string; files: FileData[] } = { text: "", files: [] }; let editorWidth = 350; let isResizing = false; let editorElement: HTMLDivElement; @@ -13,9 +15,11 @@ let codeValue = ""; let diffStats: { lines_added: number; lines_removed: number } | null = null; + let hasImages = false; interface Message { text: string; + files: FileData[]; isBot: boolean; isPending?: boolean; hash?: string; @@ -24,25 +28,35 @@ let message_history: Message[] = []; const submit = (): void => { - if (prompt.trim() === "") return; + if (prompt.text.trim() === "" && prompt.files.length === 0) return; // Clear diff stats when submitting new prompt diffStats = null; const userMessageIndex = message_history.length; - message_history = [...message_history, { text: prompt, isBot: false }]; + message_history = [ + ...message_history, + { text: prompt.text, files: prompt.files, isBot: false } + ]; const botMessageIndex = message_history.length; message_history = [ ...message_history, - { text: "Working...", isBot: true, isPending: true } + { text: "Working...", files: [], isBot: true, isPending: true } ]; - const userPrompt = prompt; - prompt = ""; + const userPrompt = prompt.text; + const userFiles = prompt.files; + prompt = { text: "", files: [] }; + + // Check if there are images to determine which model to use + hasImages = userFiles.some( + (file) => file.mime_type && file.mime_type.includes("image") + ); const post = app.post_data(`${root}/gradio_api/vibe-edit/`, { - prompt: userPrompt + prompt: userPrompt, + files: userFiles }); post .then(([response, status_code]) => { @@ -70,6 +84,7 @@ index === botMessageIndex ? { text: responseData.reasoning ? responseData.reasoning : "Done.", + files: [], isBot: true, isPending: false } @@ -79,7 +94,12 @@ .catch((error) => { message_history = message_history.map((msg, index) => index === botMessageIndex - ? { text: "Error occurred.", isBot: true, isPending: false } + ? { + text: "Error occurred.", + files: [], + isBot: true, + isPending: false + } : msg ); }); @@ -96,7 +116,7 @@ diffStats = null; const messageToUndo = message_history[messageIndex]; - prompt = messageToUndo.text; + prompt = { text: messageToUndo.text, files: messageToUndo.files }; message_history = message_history.slice(0, messageIndex); } catch (error) { @@ -222,7 +242,22 @@ class:user-message={!message.isBot} >
- {message.text} +
+ {message.text} + {#if message.files && message.files.length > 0} +
+ {#each message.files as file} + {#if file.mime_type && file.mime_type.includes("image")} + Uploaded image + {/if} + {/each} +
+ {/if} +
{#if !message.isBot && message.hash && !message.isPending}