Azure
diff --git a/‎sdk/evaluation/azure-ai-evaluation/samples/agent_evaluators/response_completeness.ipynb‎
Lines changed: 12 additions & 119 deletions b/‎sdk/evaluation/azure-ai-evaluation/samples/agent_evaluators/response_completeness.ipynb‎
Lines changed: 12 additions & 119 deletions
@@ -78,16 +78,6 @@
     "response_completeness_evaluator = ResponseCompletenessEvaluator(model_config=model_config)\n"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from azure.ai.evaluation import ResponseCompletenessEvaluator , AzureOpenAIModelConfiguration\n",
-    "from pprint import pprint\n"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -104,30 +94,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'response_completeness': 1,\n",
-       " 'response_completeness_result': 'fail',\n",
-       " 'response_completeness_threshold': 3,\n",
-       " 'response_completeness_reason': 'The response does not provide the answer (\"Tokyo\") and is missing all relevant information from the ground truth.',\n",
-       " 'response_completeness_prompt_tokens': 1354,\n",
-       " 'response_completeness_completion_tokens': 107,\n",
-       " 'response_completeness_total_tokens': 1461,\n",
-       " 'response_completeness_finish_reason': 'stop',\n",
-       " 'response_completeness_model': 'gpt-4.1-2025-04-14',\n",
-       " 'response_completeness_sample_input': '[{\"role\": \"user\", \"content\": \"{\\\\\"response\\\\\": \\\\\"The capital of Japan\\\\\", \\\\\"ground_truth\\\\\": \\\\\"The capital of Japan is Tokyo.\\\\\"}\"}]',\n",
-       " 'response_completeness_sample_output': '[{\"role\": \"assistant\", \"content\": \"<S0>Let\\'s think step by step: The ground truth states \\\\\"The capital of Japan is Tokyo.\\\\\" The response is \\\\\"The capital of Japan.\\\\\" The response does not specify what the capital is; it only repeats part of the question and omits the key information (\\\\\"Tokyo\\\\\"). Therefore, none of the necessary information is present in the response.</S0>\\\\n<S1>The response does not provide the answer (\\\\\"Tokyo\\\\\") and is missing all relevant information from the ground truth.</S1>\\\\n<S2>1</S2>\"}]'}"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "result = response_completeness_evaluator(\n",
     "    response=\"The capital of Japan\",\n",
@@ -138,30 +107,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'response_completeness': 5,\n",
-       " 'response_completeness_result': 'pass',\n",
-       " 'response_completeness_threshold': 3,\n",
-       " 'response_completeness_reason': 'The response is fully complete as it perfectly matches the ground truth statement.',\n",
-       " 'response_completeness_prompt_tokens': 1356,\n",
-       " 'response_completeness_completion_tokens': 85,\n",
-       " 'response_completeness_total_tokens': 1441,\n",
-       " 'response_completeness_finish_reason': 'stop',\n",
-       " 'response_completeness_model': 'gpt-4.1-2025-04-14',\n",
-       " 'response_completeness_sample_input': '[{\"role\": \"user\", \"content\": \"{\\\\\"response\\\\\": \\\\\"The capital of Japan is Tokyo.\\\\\", \\\\\"ground_truth\\\\\": \\\\\"The capital of Japan is Tokyo.\\\\\"}\"}]',\n",
-       " 'response_completeness_sample_output': '[{\"role\": \"assistant\", \"content\": \"<S0>Let\\'s think step by step: The ground truth contains a single statement: \\\\\"The capital of Japan is Tokyo.\\\\\" The response exactly matches this statement without omitting or altering any information. There are no missing or incorrect details, and all relevant information is included.</S0>\\\\n<S1>The response is fully complete as it perfectly matches the ground truth statement.</S1>\\\\n<S2>5</S2>\"}]'}"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "result = response_completeness_evaluator(\n",
     "    response=\"The capital of Japan is Tokyo.\",\n",
@@ -179,30 +127,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'response_completeness': 5,\n",
-       " 'response_completeness_result': 'pass',\n",
-       " 'response_completeness_threshold': 3,\n",
-       " 'response_completeness_reason': 'The response perfectly matches the ground truth and includes all necessary information.',\n",
-       " 'response_completeness_prompt_tokens': 1356,\n",
-       " 'response_completeness_completion_tokens': 95,\n",
-       " 'response_completeness_total_tokens': 1451,\n",
-       " 'response_completeness_finish_reason': 'stop',\n",
-       " 'response_completeness_model': 'gpt-4.1-2025-04-14',\n",
-       " 'response_completeness_sample_input': '[{\"role\": \"user\", \"content\": \"{\\\\\"response\\\\\": \\\\\"The capital of Japan is Tokyo.\\\\\", \\\\\"ground_truth\\\\\": \\\\\"The capital of Japan is Tokyo.\\\\\"}\"}]',\n",
-       " 'response_completeness_sample_output': '[{\"role\": \"assistant\", \"content\": \"<S0>Let\\'s think step by step: The ground truth contains a single statement: \\\\\"The capital of Japan is Tokyo.\\\\\" The response exactly matches this statement without missing any information or introducing errors. There are no additional claims or details in the ground truth that need to be included, and the response is fully accurate and complete.</S0>\\\\n<S1>The response perfectly matches the ground truth and includes all necessary information.</S1>\\\\n<S2>5</S2>\"}]'}"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from azure.ai.evaluation import ResponseCompletenessEvaluator , AzureOpenAIModelConfiguration\n",
     "from pprint import pprint\n",
@@ -227,30 +154,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'response_completeness': 3,\n",
-       " 'response_completeness_result': 'pass',\n",
-       " 'response_completeness_threshold': 3,\n",
-       " 'response_completeness_reason': 'The response provides only the Saturday forecast and omits the Sunday temperature and rain chance, so it is moderately complete.',\n",
-       " 'response_completeness_prompt_tokens': 1398,\n",
-       " 'response_completeness_completion_tokens': 150,\n",
-       " 'response_completeness_total_tokens': 1548,\n",
-       " 'response_completeness_finish_reason': 'stop',\n",
-       " 'response_completeness_model': 'gpt-4.1-2025-04-14',\n",
-       " 'response_completeness_sample_input': '[{\"role\": \"user\", \"content\": \"{\\\\\"response\\\\\": \\\\\"The weather in Seattle this weekend will be partly cloudy with temperatures around 15\\\\\\\\u00b0C on Saturday.\\\\\", \\\\\"ground_truth\\\\\": \\\\\"The weather in Seattle this weekend will be partly cloudy with temperatures around 15\\\\\\\\u00b0C on Saturday and 17\\\\\\\\u00b0C on Sunday, with a 20% chance of rain on Sunday afternoon.\\\\\"}\"}]',\n",
-       " 'response_completeness_sample_output': '[{\"role\": \"assistant\", \"content\": \"<S0>Let\\'s think step by step: First, I will identify the key statements in the ground truth: (1) partly cloudy weather in Seattle this weekend, (2) temperatures around 15\\\\u00b0C on Saturday, (3) temperatures around 17\\\\u00b0C on Sunday, (4) 20% chance of rain on Sunday afternoon. The response includes (1) and (2) but omits (3) and (4), which are important details for a complete weekend forecast. Therefore, the response contains about half of the necessary information.</S0>\\\\n<S1>The response provides only the Saturday forecast and omits the Sunday temperature and rain chance, so it is moderately complete.</S1>\\\\n<S2>3</S2>\"}]'}"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Conversation format with ground truth in context\n",
     "conversation = {\n",
@@ -281,22 +187,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "KeyError",
-     "evalue": "'AZURE_SUBSCRIPTION_ID'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[19], line 28\u001b[0m\n\u001b[1;32m     21\u001b[0m pd\u001b[38;5;241m.\u001b[39mDataFrame(data)\u001b[38;5;241m.\u001b[39mto_json(\n\u001b[1;32m     22\u001b[0m     file_path, orient\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrecords\u001b[39m\u001b[38;5;124m\"\u001b[39m, lines\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m     23\u001b[0m )\n\u001b[1;32m     25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mazure\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mai\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mevaluation\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m evaluate\n\u001b[1;32m     27\u001b[0m azure_ai_project\u001b[38;5;241m=\u001b[39m{\n\u001b[0;32m---> 28\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msubscription_id\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43menviron\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mAZURE_SUBSCRIPTION_ID\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m,\n\u001b[1;32m     29\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mproject_name\u001b[39m\u001b[38;5;124m\"\u001b[39m: os\u001b[38;5;241m.\u001b[39menviron[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPROJECT_NAME\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m     30\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mresource_group_name\u001b[39m\u001b[38;5;124m\"\u001b[39m: os\u001b[38;5;241m.\u001b[39menviron[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRESOURCE_GROUP_NAME\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m     31\u001b[0m     }\n\u001b[1;32m     33\u001b[0m response \u001b[38;5;241m=\u001b[39m evaluate(\n\u001b[1;32m     34\u001b[0m     data\u001b[38;5;241m=\u001b[39mfile_path,\n\u001b[1;32m     35\u001b[0m     evaluators\u001b[38;5;241m=\u001b[39m{\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     38\u001b[0m     azure_ai_project\u001b[38;5;241m=\u001b[39mazure_ai_project,\n\u001b[1;32m     39\u001b[0m )\n\u001b[1;32m     41\u001b[0m pprint(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mAI Foundry URL: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstudio_url\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n",
-      "File \u001b[0;32m~/miniconda3/envs/samples/lib/python3.10/os.py:680\u001b[0m, in \u001b[0;36m_Environ.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m    677\u001b[0m     value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_data[\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mencodekey(key)]\n\u001b[1;32m    678\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[1;32m    679\u001b[0m     \u001b[38;5;66;03m# raise KeyError with the original key value\u001b[39;00m\n\u001b[0;32m--> 680\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    681\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdecodevalue(value)\n",
-      "\u001b[0;31mKeyError\u001b[0m: 'AZURE_SUBSCRIPTION_ID'"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import json\n",
     "import pandas as pd\n",
@@ -344,7 +237,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "test_agent_evaluator_prp",
+   "display_name": "samples",
    "language": "python",
    "name": "python3"
   },
@@ -358,7 +251,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.9"
+   "version": "3.10.19"
   }
  },
  "nbformat": 4,