Skip to content

Commit 5617f82

Browse files
cechetafrtibble
andauthored
feat: Include images in call to LLM (#971)
Co-authored-by: Frances Tibble <[email protected]>
1 parent 59f55ea commit 5617f82

File tree

9 files changed

+315
-45
lines changed

9 files changed

+315
-45
lines changed

code/backend/batch/utilities/tools/question_answer_tool.py

Lines changed: 56 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44

55
from ..common.answer import Answer
66
from ..common.source_document import SourceDocument
7+
from ..helpers.azure_blob_storage_client import AzureBlobStorageClient
78
from ..helpers.config.config_helper import ConfigHelper
89
from ..helpers.env_helper import EnvHelper
910
from ..helpers.llm_helper import LLMHelper
1011
from ..search.search import Search
1112
from .answering_tool_base import AnsweringToolBase
13+
from openai.types.chat import ChatCompletion
1214

1315
logger = logging.getLogger(__name__)
1416

@@ -62,6 +64,7 @@ def generate_on_your_data_messages(
6264
question: str,
6365
chat_history: list[dict],
6466
sources: list[SourceDocument],
67+
image_urls: list[str] = [],
6568
) -> list[dict]:
6669
examples = []
6770

@@ -122,20 +125,41 @@ def generate_on_your_data_messages(
122125
},
123126
*QuestionAnswerTool.clean_chat_history(chat_history),
124127
{
125-
"content": self.config.prompts.answering_user_prompt.format(
126-
sources=documents,
127-
question=question,
128-
),
128+
"content": [
129+
{
130+
"type": "text",
131+
"text": self.config.prompts.answering_user_prompt.format(
132+
sources=documents,
133+
question=question,
134+
),
135+
},
136+
*(
137+
[
138+
{
139+
"type": "image_url",
140+
"image_url": image_url,
141+
}
142+
for image_url in image_urls
143+
]
144+
),
145+
],
129146
"role": "user",
130147
},
131148
]
132149

133150
def answer_question(self, question: str, chat_history: list[dict], **kwargs):
134151
source_documents = Search.get_source_documents(self.search_handler, question)
135152

153+
if self.env_helper.USE_ADVANCED_IMAGE_PROCESSING:
154+
image_urls = self.create_image_url_list(source_documents)
155+
else:
156+
image_urls = []
157+
158+
model = self.env_helper.AZURE_OPENAI_VISION_MODEL if image_urls else None
159+
136160
if self.config.prompts.use_on_your_data_format:
137161
messages = self.generate_on_your_data_messages(
138-
question, chat_history, source_documents
162+
question, chat_history, source_documents, image_urls
139163
)
140164
else:
141165
warnings.warn(
@@ -145,8 +169,33 @@ def answer_question(self, question: str, chat_history: list[dict], **kwargs):
145169

146170
llm_helper = LLMHelper()
147171

148-
response = llm_helper.get_chat_completion(messages, temperature=0)
172+
response = llm_helper.get_chat_completion(messages, model=model, temperature=0)
173+
clean_answer = self.format_answer_from_response(
174+
response, question, source_documents
175+
)
149176

177+
return clean_answer
178+
179+
def create_image_url_list(self, source_documents):
180+
image_types = self.config.get_advanced_image_processing_image_types()
181+
182+
blob_client = AzureBlobStorageClient()
183+
container_sas = blob_client.get_container_sas()
184+
185+
image_urls = [
186+
doc.source.replace("_SAS_TOKEN_PLACEHOLDER_", container_sas)
187+
for doc in source_documents
188+
if doc.title is not None and doc.title.split(".")[-1] in image_types
189+
]
190+
191+
return image_urls
192+
193+
def format_answer_from_response(
194+
self,
195+
response: ChatCompletion,
196+
question: str,
197+
source_documents: list[SourceDocument],
198+
):
150199
answer = response.choices[0].message.content
151200
logger.debug(f"Answer: {answer}")
152201

@@ -158,4 +207,5 @@ def answer_question(self, question: str, chat_history: list[dict], **kwargs):
158207
prompt_tokens=response.usage.prompt_tokens,
159208
completion_tokens=response.usage.completion_tokens,
160209
)
210+
161211
return clean_answer

code/tests/functional/conftest.py

Lines changed: 4 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import re
12
import pytest
23
from pytest_httpserver import HTTPServer
34
from tests.functional.app_config import AppConfig
@@ -56,7 +57,9 @@ def setup_default_mocking(httpserver: HTTPServer, app_config: AppConfig):
5657
)
5758

5859
httpserver.expect_request(
59-
f"/openai/deployments/{app_config.get('AZURE_OPENAI_MODEL')}/chat/completions",
60+
re.compile(
61+
f"/openai/deployments/({app_config.get('AZURE_OPENAI_MODEL')}|{app_config.get('AZURE_OPENAI_VISION_MODEL')})/chat/completions"
62+
),
6063
method="POST",
6164
).respond_with_json(
6265
{
@@ -82,33 +85,6 @@ def setup_default_mocking(httpserver: HTTPServer, app_config: AppConfig):
8285
}
8386
)
8487

85-
httpserver.expect_request(
86-
f"/openai/deployments/{app_config.get('AZURE_OPENAI_VISION_MODEL')}/chat/completions",
87-
method="POST",
88-
).respond_with_json(
89-
{
90-
"id": "chatcmpl-6v7mkQj980V1yBec6ETrKPRqFjNw9",
91-
"object": "chat.completion",
92-
"created": 1679072642,
93-
"model": app_config.get("AZURE_OPENAI_VISION_MODEL"),
94-
"usage": {
95-
"prompt_tokens": 58,
96-
"completion_tokens": 68,
97-
"total_tokens": 126,
98-
},
99-
"choices": [
100-
{
101-
"message": {
102-
"role": "assistant",
103-
"content": "This is a caption for the image",
104-
},
105-
"finish_reason": "stop",
106-
"index": 0,
107-
}
108-
],
109-
}
110-
)
111-
11288
httpserver.expect_request(
11389
f"/indexes('{app_config.get('AZURE_SEARCH_CONVERSATIONS_LOG_INDEX')}')/docs/search.index",
11490
method="POST",

code/tests/functional/tests/backend_api/default/test_advanced_image_processing.py

Lines changed: 122 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import json
2+
import re
3+
from unittest.mock import ANY
14
import pytest
25
import requests
36
from pytest_httpserver import HTTPServer
@@ -68,7 +71,9 @@ def completions_mocking(httpserver: HTTPServer, app_config: AppConfig):
6871
)
6972

7073
httpserver.expect_oneshot_request(
71-
f"/openai/deployments/{app_config.get('AZURE_OPENAI_MODEL')}/chat/completions",
74+
re.compile(
75+
f"/openai/deployments/({app_config.get('AZURE_OPENAI_MODEL')}|{app_config.get('AZURE_OPENAI_VISION_MODEL')})/chat/completions"
76+
),
7277
method="POST",
7378
).respond_with_json(
7479
{
@@ -112,6 +117,30 @@ def completions_mocking(httpserver: HTTPServer, app_config: AppConfig):
112117
}
113118
)
114119

120+
httpserver.expect_oneshot_request(
121+
f"/indexes('{app_config.get('AZURE_SEARCH_INDEX')}')/docs/search.post.search",
122+
method="POST",
123+
).respond_with_json(
124+
{
125+
"value": [
126+
{
127+
"@search.score": 0.02916666865348816,
128+
"id": "doc_1",
129+
"content": "content",
130+
"content_vector": [
131+
-0.012909674,
132+
0.00838491,
133+
],
134+
"metadata": '{"id": "doc_1", "source": "https://source_SAS_TOKEN_PLACEHOLDER_", "title": "/documents/doc.png", "chunk": 95, "offset": 202738, "page_number": null}',
135+
"title": "/documents/doc.png",
136+
"source": "https://source_SAS_TOKEN_PLACEHOLDER_",
137+
"chunk": 95,
138+
"offset": 202738,
139+
}
140+
]
141+
}
142+
)
143+
115144

116145
def test_post_responds_successfully(app_url: str, app_config: AppConfig):
117146
# when
@@ -124,7 +153,7 @@ def test_post_responds_successfully(app_url: str, app_config: AppConfig):
124153
{
125154
"messages": [
126155
{
127-
"content": r'{"citations": [{"content": "[/documents/doc.pdf](https://source)\n\n\ncontent", "id": "doc_1", "chunk_id": 95, "title": "/documents/doc.pdf", "filepath": "source", "url": "[/documents/doc.pdf](https://source)", "metadata": {"offset": 202738, "source": "https://source", "markdown_url": "[/documents/doc.pdf](https://source)", "title": "/documents/doc.pdf", "original_url": "https://source", "chunk": 95, "key": "doc_1", "filename": "source"}}], "intent": "What is the meaning of life?"}',
156+
"content": ANY, # SAS URL changes each time
128157
"end_turn": False,
129158
"role": "tool",
130159
},
@@ -143,6 +172,32 @@ def test_post_responds_successfully(app_url: str, app_config: AppConfig):
143172
}
144173
assert response.headers["Content-Type"] == "application/json"
145174

175+
content = json.loads(response.json()["choices"][0]["messages"][0]["content"])
176+
177+
assert content == {
178+
"citations": [
179+
{
180+
"content": ANY,
181+
"id": "doc_1",
182+
"chunk_id": 95,
183+
"title": "/documents/doc.png",
184+
"filepath": "source",
185+
"url": ANY,
186+
"metadata": {
187+
"offset": 202738,
188+
"source": "https://source_SAS_TOKEN_PLACEHOLDER_",
189+
"markdown_url": ANY,
190+
"title": "/documents/doc.png",
191+
"original_url": "https://source_SAS_TOKEN_PLACEHOLDER_",
192+
"chunk": 95,
193+
"key": "doc_1",
194+
"filename": "source",
195+
},
196+
}
197+
],
198+
"intent": "What is the meaning of life?",
199+
}
200+
146201

147202
def test_text_passed_to_computer_vision_to_generate_text_embeddings(
148203
app_url: str, httpserver: HTTPServer, app_config: AppConfig
@@ -169,3 +224,68 @@ def test_text_passed_to_computer_vision_to_generate_text_embeddings(
169224
times=1,
170225
),
171226
)
227+
228+
229+
def test_image_urls_included_in_call_to_openai(
230+
app_url: str, app_config: AppConfig, httpserver: HTTPServer
231+
):
232+
# when
233+
requests.post(f"{app_url}{path}", json=body)
234+
235+
# then
236+
request = verify_request_made(
237+
mock_httpserver=httpserver,
238+
request_matcher=RequestMatcher(
239+
path=f"/openai/deployments/{app_config.get('AZURE_OPENAI_VISION_MODEL')}/chat/completions",
240+
method="POST",
241+
json={
242+
"messages": [
243+
{
244+
"content": "system prompt",
245+
"role": "system",
246+
},
247+
{
248+
"content": '## Retrieved Documents\n{"retrieved_documents":[{"[doc1]":{"content":"content"}}]}\n\n## User Question\nuser question',
249+
"name": "example_user",
250+
"role": "system",
251+
},
252+
{
253+
"content": "answer",
254+
"name": "example_assistant",
255+
"role": "system",
256+
},
257+
{
258+
"content": "You are an AI assistant that helps people find information.",
259+
"role": "system",
260+
},
261+
{"content": "Hello", "role": "user"},
262+
{"content": "Hi, how can I help?", "role": "assistant"},
263+
{
264+
"content": [
265+
{
266+
"type": "text",
267+
"text": '## Retrieved Documents\n{"retrieved_documents":[{"[doc1]":{"content":"content"}}]}\n\n## User Question\nWhat is the meaning of life?',
268+
},
269+
{"type": "image_url", "image_url": ANY},
270+
],
271+
"role": "user",
272+
},
273+
],
274+
"model": app_config.get("AZURE_OPENAI_VISION_MODEL"),
275+
"max_tokens": int(app_config.get("AZURE_OPENAI_MAX_TOKENS")),
276+
"temperature": 0,
277+
},
278+
headers={
279+
"Accept": "application/json",
280+
"Content-Type": "application/json",
281+
"Authorization": f"Bearer {app_config.get('AZURE_OPENAI_API_KEY')}",
282+
"Api-Key": app_config.get("AZURE_OPENAI_API_KEY"),
283+
},
284+
query_string="api-version=2024-02-01",
285+
times=1,
286+
),
287+
)[0]
288+
289+
assert request.json["messages"][6]["content"][1]["image_url"].startswith(
290+
"https://source"
291+
)

code/tests/functional/tests/backend_api/default/test_conversation.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,12 @@ def test_post_makes_correct_call_to_openai_chat_completions_with_documents(
570570
{"content": "Hello", "role": "user"},
571571
{"content": "Hi, how can I help?", "role": "assistant"},
572572
{
573-
"content": '## Retrieved Documents\n{"retrieved_documents":[{"[doc1]":{"content":"content"}}]}\n\n## User Question\nWhat is the meaning of life?',
573+
"content": [
574+
{
575+
"type": "text",
576+
"text": '## Retrieved Documents\n{"retrieved_documents":[{"[doc1]":{"content":"content"}}]}\n\n## User Question\nWhat is the meaning of life?',
577+
}
578+
],
574579
"role": "user",
575580
},
576581
],

code/tests/functional/tests/backend_api/default/test_post_prompt_tool.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
import re
23

34
import pytest
45
import requests
@@ -90,7 +91,9 @@ def completions_mocking(httpserver: HTTPServer, app_config: AppConfig):
9091
)
9192

9293
httpserver.expect_oneshot_request(
93-
f"/openai/deployments/{app_config.get('AZURE_OPENAI_MODEL')}/chat/completions",
94+
re.compile(
95+
f"/openai/deployments/({app_config.get('AZURE_OPENAI_MODEL')}|{app_config.get('AZURE_OPENAI_VISION_MODEL')})/chat/completions"
96+
),
9497
method="POST",
9598
).respond_with_json(
9699
{

code/tests/functional/tests/backend_api/integrated_vectorization_custom_conversation/test_iv_question_answer_tool.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,12 @@ def test_post_makes_correct_call_to_openai_chat_completions_in_question_answer_t
228228
{"content": "Hello", "role": "user"},
229229
{"content": "Hi, how can I help?", "role": "assistant"},
230230
{
231-
"content": '## Retrieved Documents\n{"retrieved_documents":[{"[doc1]":{"content":"content"}}]}\n\n## User Question\nWhat is the meaning of life?',
231+
"content": [
232+
{
233+
"type": "text",
234+
"text": '## Retrieved Documents\n{"retrieved_documents":[{"[doc1]":{"content":"content"}}]}\n\n## User Question\nWhat is the meaning of life?',
235+
}
236+
],
232237
"role": "user",
233238
},
234239
],

code/tests/functional/tests/backend_api/sk_orchestrator/test_response_with_search_documents_tool.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,12 @@ def test_post_makes_correct_call_to_openai_chat_completions_in_question_answer_t
243243
{"content": "Hello", "role": "user"},
244244
{"content": "Hi, how can I help?", "role": "assistant"},
245245
{
246-
"content": '## Retrieved Documents\n{"retrieved_documents":[{"[doc1]":{"content":"content"}}]}\n\n## User Question\nWhat is the meaning of life?',
246+
"content": [
247+
{
248+
"type": "text",
249+
"text": '## Retrieved Documents\n{"retrieved_documents":[{"[doc1]":{"content":"content"}}]}\n\n## User Question\nWhat is the meaning of life?',
250+
}
251+
],
247252
"role": "user",
248253
},
249254
],

0 commit comments

Comments
 (0)