diff --git a/ads/llm/deploy.py b/ads/llm/deploy.py index ddae9eea5..1b1cf497e 100644 --- a/ads/llm/deploy.py +++ b/ads/llm/deploy.py @@ -19,13 +19,19 @@ class ChainDeployment(GenericModel): + """Represents a model deployment with LangChain. + """ def __init__(self, chain, **kwargs): self.chain = chain + if "model_input_serializer" not in kwargs: + kwargs["model_input_serializer"] = self.model_input_serializer_type.JSON super().__init__(**kwargs) def prepare(self, **kwargs) -> GenericModel: """Prepares the model artifact.""" chain_yaml_uri = os.path.join(self.artifact_dir, "chain.yaml") + if not os.path.exists(self.artifact_dir): + os.makedirs(self.artifact_dir) with open(chain_yaml_uri, "w", encoding="utf-8") as f: f.write(yaml.safe_dump(dump(self.chain))) diff --git a/ads/llm/guardrails/base.py b/ads/llm/guardrails/base.py index 555503afc..61d6e4714 100644 --- a/ads/llm/guardrails/base.py +++ b/ads/llm/guardrails/base.py @@ -156,7 +156,6 @@ class Guardrail(BaseTool): class Config: arbitrary_types_allowed = True - underscore_attrs_are_private = True name: str = "" description: str = "Guardrail" diff --git a/ads/llm/langchain/plugins/chat_models/oci_data_science.py b/ads/llm/langchain/plugins/chat_models/oci_data_science.py index 89d812b6e..a19dacc8f 100644 --- a/ads/llm/langchain/plugins/chat_models/oci_data_science.py +++ b/ads/llm/langchain/plugins/chat_models/oci_data_science.py @@ -3,23 +3,24 @@ # Copyright (c) 2023 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ +"""Chat model for OCI data science model deployment endpoint.""" - +import importlib import json import logging from operator import itemgetter from typing import ( Any, AsyncIterator, + Callable, Dict, Iterator, List, Literal, Optional, + Sequence, Type, Union, - Sequence, - Callable, ) from langchain_core.callbacks import ( @@ -33,21 +34,16 @@ generate_from_stream, ) from langchain_core.messages import AIMessageChunk, BaseMessage, BaseMessageChunk -from langchain_core.tools import BaseTool from langchain_core.output_parsers import ( JsonOutputParser, PydanticOutputParser, ) from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough +from langchain_core.tools import BaseTool from langchain_core.utils.function_calling import convert_to_openai_tool -from langchain_openai.chat_models.base import ( - _convert_delta_to_message_chunk, - _convert_message_to_dict, - _convert_dict_to_message, -) +from pydantic import BaseModel, Field, model_validator -from pydantic import BaseModel, Field from ads.llm.langchain.plugins.llms.oci_data_science_model_deployment_endpoint import ( DEFAULT_MODEL_NAME, BaseOCIModelDeployment, @@ -63,15 +59,40 @@ def _is_pydantic_class(obj: Any) -> bool: class ChatOCIModelDeployment(BaseChatModel, BaseOCIModelDeployment): """OCI Data Science Model Deployment chat model integration. - To use, you must provide the model HTTP endpoint from your deployed - chat model, e.g. https://modeldeployment..oci.customer-oci.com//predict. + Setup: + Install ``oracle-ads`` and ``langchain-openai``. - To authenticate, `oracle-ads` has been used to automatically load - credentials: https://accelerated-data-science.readthedocs.io/en/latest/user_guide/cli/authentication.html + .. code-block:: bash - Make sure to have the required policies to access the OCI Data - Science Model Deployment endpoint. See: - https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-policies-auth.htm#model_dep_policies_auth__predict-endpoint + pip install -U oracle-ads langchain-openai + + Use `ads.set_auth()` to configure authentication. + For example, to use OCI resource_principal for authentication: + + .. code-block:: python + + import ads + ads.set_auth("resource_principal") + + For more details on authentication, see: + https://accelerated-data-science.readthedocs.io/en/latest/user_guide/cli/authentication.html + + Make sure to have the required policies to access the OCI Data + Science Model Deployment endpoint. See: + https://docs.oracle.com/en-us/iaas/data-science/using/model-dep-policies-auth.htm + + + Key init args - completion params: + endpoint: str + The OCI model deployment endpoint. + temperature: float + Sampling temperature. + max_tokens: Optional[int] + Max number of tokens to generate. + + Key init args — client params: + auth: dict + ADS auth dictionary for OCI authentication. Instantiate: .. code-block:: python @@ -79,7 +100,7 @@ class ChatOCIModelDeployment(BaseChatModel, BaseOCIModelDeployment): from langchain_community.chat_models import ChatOCIModelDeployment chat = ChatOCIModelDeployment( - endpoint="https://modeldeployment.us-ashburn-1.oci.customer-oci.com//predict", + endpoint="https://modeldeployment..oci.customer-oci.com//predict", model="odsc-llm", streaming=True, max_retries=3, @@ -94,7 +115,7 @@ class ChatOCIModelDeployment(BaseChatModel, BaseOCIModelDeployment): .. code-block:: python messages = [ - ("system", "You are a helpful translator. Translate the user sentence to French."), + ("system", "Translate the user sentence to French."), ("human", "Hello World!"), ] chat.invoke(messages) @@ -102,7 +123,19 @@ class ChatOCIModelDeployment(BaseChatModel, BaseOCIModelDeployment): .. code-block:: python AIMessage( - content='Bonjour le monde!',response_metadata={'token_usage': {'prompt_tokens': 40, 'total_tokens': 50, 'completion_tokens': 10},'model_name': 'odsc-llm','system_fingerprint': '','finish_reason': 'stop'},id='run-cbed62da-e1b3-4abd-9df3-ec89d69ca012-0') + content='Bonjour le monde!', + response_metadata={ + 'token_usage': { + 'prompt_tokens': 40, + 'total_tokens': 50, + 'completion_tokens': 10 + }, + 'model_name': 'odsc-llm', + 'system_fingerprint': '', + 'finish_reason': 'stop' + }, + id='run-cbed62da-e1b3-4abd-9df3-ec89d69ca012-0' + ) Streaming: .. code-block:: python @@ -112,18 +145,18 @@ class ChatOCIModelDeployment(BaseChatModel, BaseOCIModelDeployment): .. code-block:: python - content='' id='run-23df02c6-c43f-42de-87c6-8ad382e125c3' - content='\n' id='run-23df02c6-c43f-42de-87c6-8ad382e125c3' - content='B' id='run-23df02c6-c43f-42de-87c6-8ad382e125c3' - content='on' id='run-23df02c6-c43f-42de-87c6-8ad382e125c3' - content='j' id='run-23df02c6-c43f-42de-87c6-8ad382e125c3' - content='our' id='run-23df02c6-c43f-42de-87c6-8ad382e125c3' - content=' le' id='run-23df02c6-c43f-42de-87c6-8ad382e125c3' - content=' monde' id='run-23df02c6-c43f-42de-87c6-8ad382e125c3' - content='!' id='run-23df02c6-c43f-42de-87c6-8ad382e125c3' - content='' response_metadata={'finish_reason': 'stop'} id='run-23df02c6-c43f-42de-87c6-8ad382e125c3' - - Asyc: + content='' id='run-02c6-c43f-42de' + content='\n' id='run-02c6-c43f-42de' + content='B' id='run-02c6-c43f-42de' + content='on' id='run-02c6-c43f-42de' + content='j' id='run-02c6-c43f-42de' + content='our' id='run-02c6-c43f-42de' + content=' le' id='run-02c6-c43f-42de' + content=' monde' id='run-02c6-c43f-42de' + content='!' id='run-02c6-c43f-42de' + content='' response_metadata={'finish_reason': 'stop'} id='run-02c6-c43f-42de' + + Async: .. code-block:: python await chat.ainvoke(messages) @@ -133,7 +166,11 @@ class ChatOCIModelDeployment(BaseChatModel, BaseOCIModelDeployment): .. code-block:: python - AIMessage(content='Bonjour le monde!', response_metadata={'finish_reason': 'stop'}, id='run-8657a105-96b7-4bb6-b98e-b69ca420e5d1-0') + AIMessage( + content='Bonjour le monde!', + response_metadata={'finish_reason': 'stop'}, + id='run-8657a105-96b7-4bb6-b98e-b69ca420e5d1-0' + ) Structured output: .. code-block:: python @@ -147,19 +184,22 @@ class Joke(BaseModel): structured_llm = chat.with_structured_output(Joke, method="json_mode") structured_llm.invoke( - "Tell me a joke about cats, respond in JSON with `setup` and `punchline` keys" + "Tell me a joke about cats, " + "respond in JSON with `setup` and `punchline` keys" ) .. code-block:: python - Joke(setup='Why did the cat get stuck in the tree?',punchline='Because it was chasing its tail!') + Joke( + setup='Why did the cat get stuck in the tree?', + punchline='Because it was chasing its tail!' + ) See ``ChatOCIModelDeployment.with_structured_output()`` for more. Customized Usage: - - You can inherit from base class and overwrite the `_process_response`, `_process_stream_response`, - `_construct_json_body` for satisfying customized needed. + You can inherit from base class and overwrite the `_process_response`, + `_process_stream_response`, `_construct_json_body` for customized usage. .. code-block:: python @@ -180,12 +220,31 @@ def _construct_json_body(self, messages: list, params: dict) -> dict: } chat = MyChatModel( - endpoint=f"https://modeldeployment.us-ashburn-1.oci.customer-oci.com/{ocid}/predict", + endpoint=f"https://modeldeployment..oci.customer-oci.com/{ocid}/predict", model="odsc-llm", } chat.invoke("tell me a joke") + Response metadata + .. code-block:: python + + ai_msg = chat.invoke(messages) + ai_msg.response_metadata + + .. code-block:: python + + { + 'token_usage': { + 'prompt_tokens': 40, + 'total_tokens': 50, + 'completion_tokens': 10 + }, + 'model_name': 'odsc-llm', + 'system_fingerprint': '', + 'finish_reason': 'stop' + } + """ # noqa: E501 model_kwargs: Dict[str, Any] = Field(default_factory=dict) @@ -198,6 +257,17 @@ def _construct_json_body(self, messages: list, params: dict) -> dict: """Stop words to use when generating. Model output is cut off at the first occurrence of any of these substrings.""" + @model_validator(mode="before") + @classmethod + def validate_openai(cls, values: Any) -> Any: + """Checks if langchain_openai is installed.""" + if not importlib.util.find_spec("langchain_openai"): + raise ImportError( + "Could not import langchain_openai package. " + "Please install it with `pip install langchain_openai`." + ) + return values + @property def _llm_type(self) -> str: """Return type of llm.""" @@ -552,6 +622,8 @@ def _construct_json_body(self, messages: list, params: dict) -> dict: converted messages and additional parameters. """ + from langchain_openai.chat_models.base import _convert_message_to_dict + return { "messages": [_convert_message_to_dict(m) for m in messages], **params, @@ -578,6 +650,8 @@ def _process_stream_response( ValueError: If the response JSON is not well-formed or does not contain the expected structure. """ + from langchain_openai.chat_models.base import _convert_delta_to_message_chunk + try: choice = response_json["choices"][0] if not isinstance(choice, dict): @@ -616,6 +690,8 @@ def _process_response(self, response_json: dict) -> ChatResult: contain the expected structure. """ + from langchain_openai.chat_models.base import _convert_dict_to_message + generations = [] try: choices = response_json["choices"] @@ -760,8 +836,9 @@ class ChatOCIModelDeploymentVLLM(ChatOCIModelDeployment): tool_choice: Optional[str] = None """Whether to use tool calling. Defaults to None, tool calling is disabled. - Tool calling requires model support and vLLM to be configured with `--tool-call-parser`. - Set this to `auto` for the model to determine whether to make tool calls automatically. + Tool calling requires model support and the vLLM to be configured + with `--tool-call-parser`. + Set this to `auto` for the model to make tool calls automatically. Set this to `required` to force the model to always call one or more tools. """ diff --git a/ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py b/ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py index 134266644..cca1da6f1 100644 --- a/ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +++ b/ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py @@ -5,8 +5,11 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ +"""LLM for OCI data science model deployment endpoint.""" + import json import logging +import traceback from typing import ( Any, AsyncIterator, @@ -21,7 +24,6 @@ import aiohttp import requests -import traceback from langchain_core.callbacks import ( AsyncCallbackManagerForLLMRun, CallbackManagerForLLMRun, @@ -29,9 +31,10 @@ from langchain_core.language_models.llms import BaseLLM, create_base_retry_decorator from langchain_core.load.serializable import Serializable from langchain_core.outputs import Generation, GenerationChunk, LLMResult -from langchain_core.utils import get_from_dict_or_env, pre_init +from langchain_core.utils import get_from_dict_or_env +from pydantic import Field, model_validator + from langchain_community.utilities.requests import Requests -from pydantic import Field logger = logging.getLogger(__name__) @@ -83,11 +86,12 @@ class BaseOCIModelDeployment(Serializable): max_retries: int = 3 """Maximum number of retries to make when generating.""" - @pre_init - def validate_environment( # pylint: disable=no-self-argument - cls, values: Dict - ) -> Dict: - """Validate that python package exists in environment.""" + @model_validator(mode="before") + @classmethod + def validate_environment(cls, values: Dict) -> Dict: + """Checks if oracle-ads is installed and + get credentials/endpoint from environment. + """ try: import ads @@ -256,7 +260,7 @@ def _check_response(self, response: Any) -> None: if hasattr(response, "status_code") else response.status ) - if status_code == 401 and self._refresh_signer(): + if status_code in [401, 404] and self._refresh_signer(): raise TokenExpiredError() from http_err raise ServerError( @@ -353,6 +357,11 @@ def _refresh_signer(self) -> bool: self.auth["signer"].refresh_security_token() return True return False + + @classmethod + def is_lc_serializable(cls) -> bool: + """Return whether this model can be serialized by LangChain.""" + return True class OCIModelDeploymentLLM(BaseLLM, BaseOCIModelDeployment): @@ -445,11 +454,6 @@ def _llm_type(self) -> str: """Return type of llm.""" return "oci_model_deployment_endpoint" - @classmethod - def is_lc_serializable(cls) -> bool: - """Return whether this model can be serialized by Langchain.""" - return True - @property def _default_params(self) -> Dict[str, Any]: """Get the default parameters.""" diff --git a/ads/llm/templates/score_chain.jinja2 b/ads/llm/templates/score_chain.jinja2 index 2a2d5c008..ccabefe52 100644 --- a/ads/llm/templates/score_chain.jinja2 +++ b/ads/llm/templates/score_chain.jinja2 @@ -3,7 +3,6 @@ import os import sys import json from functools import lru_cache -from langchain.chains import LLMChain from ads import set_auth from ads.llm.deploy import ChainDeployment from ads.llm.chain import LOG_ADS_GUARDRAIL_INFO diff --git a/docs/source/user_guide/large_language_model/deploy_langchain_application.rst b/docs/source/user_guide/large_language_model/deploy_langchain_application.rst index 153ecffd5..8f5a2a2a4 100644 --- a/docs/source/user_guide/large_language_model/deploy_langchain_application.rst +++ b/docs/source/user_guide/large_language_model/deploy_langchain_application.rst @@ -1,171 +1,224 @@ -############################ -Deploy LangChain Application -############################ +################################## +Deploy LLM Applications and Agents +################################## -Oracle ADS supports the deployment of LangChain application to OCI data science model deployment and you can easily do so just by writing a few lines of code. +Oracle ADS supports the deployment of LLM applications and agents, including LangChain application to OCI data science model deployment. -.. versionadded:: 2.9.1 - -.. admonition:: Installation +.. admonition:: IAM Policies :class: note - It is important to note that for ADS to serialize and deploy the LangChain application, all components used to build the application must be serializable. For more information regarding LLMs model serialization, see `here `_. + Ensure that you have configured the necessary `policies for model deployments `_. + For example, the following policy allows the dynamic group to use ``resource_principal`` to create model deployment. -Configuration -************* + .. code-block:: shell -Ensure that you have created the necessary `policies, authentication, and authorization for model deployments `_. -For example, the following policy allows the dynamic group to use ``resource_principal`` to create model deployment. + allow dynamic-group to manage data-science-model-deployments in compartment -.. code-block:: shell +The process of deploying LLM apps and agents involves: - allow dynamic-group to manage data-science-model-deployments in compartment +* Prepare your applications as model artifact +* Register the model artifact with OCI Data Science Model Catalog +* Build container image with dependencies, and push the image to OCI Container Registry +* Deploy the model artifact using the container image with OCI Data Science Model Deployment -LangChain Application -********************* +To get you started, we provide templates for `model artifacts `_ and `container image `_, so that you can focus on building you applications and agents. -Following is a simple LangChain application that build with a prompt template and large language model API. Here the ``Cohere`` model is used as an example. You may replace it with any other LangChain compatible LLM, including OCI Generative AI service. +.. figure:: figures/workflow.png + :width: 800 -.. code-block:: python3 +Prepare Model Artifacts +*********************** - import os - from langchain.llms import Cohere - from langchain.chains import LLMChain - from langchain.prompts import PromptTemplate - # Remember to replace the ```` with the actual cohere api key. - os.environ["COHERE_API_KEY"] = "" - - cohere = Cohere() - prompt = PromptTemplate.from_template("Tell me a joke about {subject}") - llm_chain = LLMChain(prompt=prompt, llm=cohere, verbose=True) +You can prepare your model artifact based on the `model artifact template `_ -Now you have a LangChain object ``llm_chain``. Try running it with the input ``{"subject": "animals"}`` and it should return a joke about animals. +First, create a template folder locally with the `score.py `_ file. For example, we can call it ``llm_apps_template``. -.. code-block:: python3 +.. code-block:: - llm_chain.invoke({"subject": "animals"}) + llm_apps_template + ├── score.py -Initialize the ChainDeployment -****************************** +The ``score.py`` serves as an agent for invoking your application with JSON payload. -ADS provides the ``ChainDeployment`` to handle the deployment of LangChain applications. -You can initialize ``ChainDeployment`` with the LangChain object ``llm_chain`` from previous section as parameter. -The ``artifact_dir`` is an optional parameter which points to the folder where the model artifacts will be put locally. -In this example, we're using a temporary folder generated by ``tempfile.mkdtemp()``. +Next, you can use ADS to create a generic model and save a copy of the template to a anther folder (e.g. ``my_apps``), which will be uploaded as model artifact. -.. code-block:: python3 +.. code-block:: python - import tempfile - from ads.llm.deploy import ChainDeployment - - artifact_dir = tempfile.mkdtemp() - - chain_deployment = ChainDeployment( - chain=llm_chain, - artifact_dir=artifact_dir - ) - -Prepare the Model Artifacts -*************************** + from ads.model.generic_model import GenericModel -Call ``prepare`` from ``ChainDeployment`` to generate the ``score.py`` and serialize the LangChain application to ``chain.yaml`` file under ``artifact_dir`` folder. -Parameters ``inference_conda_env`` and ``inference_python_version`` are passed to define the conda environment where your LangChain application will be running on OCI cloud. -Here, replace ``custom_conda_environment_uri`` with your conda environment uri that has the latest ADS 2.9.1 and replace ``python_version`` with your conda environment python version. + llm_app = GenericModel.from_model_artifact( + uri="llm_apps_template", # Contains the model artifact templates + artifact_dir="my_apps", # Location for the new model artifacts + model_input_serializer="cloudpickle" + ) + llm_app.reload_runtime_info() -.. note:: - For how to customize and publish conda environment, take reference to `Publishing a Conda Environment to an Object Storage Bucket `_ +Then, you can add your own applications to the my_apps folder. Here are some requirements: +* Each application should be a Python module. +* Each module should have an ``invoke()`` function as the entrypoint. +* The ``invoke()`` function should take a dictionary and return another dictionary. -.. code-block:: python3 +For example, following is an example LangChain application to translate English into French using a prompt template and output parser: - chain_deployment.prepare( - inference_conda_env="", - inference_python_version="", - ) - -Below is the ``chain.yaml`` file that was saved from ``llm_chain`` object. - -.. code-block:: YAML - - _type: llm_chain - llm: - _type: cohere - frequency_penalty: 0.0 - k: 0 - max_tokens: 256 - model: null - p: 1 - presence_penalty: 0.0 - temperature: 0.75 - truncate: null - llm_kwargs: {} - memory: null - metadata: null - output_key: text - output_parser: - _type: default - prompt: - _type: prompt - input_types: {} - input_variables: - - subject - output_parser: null - partial_variables: {} - template: Tell me a joke about {subject} - template_format: f-string - validate_template: false - return_final_only: true - tags: null - verbose: true - -Verify the Serialized Application -********************************* - -Verify the serialized application by calling ``verify()`` to make sure it is working as expected. -There will be error if your application is not fully serializable. +.. code-block:: python + + import os + import ads + from langchain_core.prompts import ChatPromptTemplate + from langchain_core.output_parsers import StrOutputParser + from ads.llm import ChatOCIModelDeploymentVLLM -.. code-block:: python3 - chain_deployment.verify({"subject": "animals"}) + ads.set_auth(auth="resource_principal") -Save Artifacts to OCI Model Catalog -*********************************** -Call ``save`` to pack and upload the artifacts under ``artifact_dir`` to OCI data science model catalog. Once the artifacts are successfully uploaded, you should be able to see the id of the model. + llm = ChatOCIModelDeploymentVLLM( + model="odsc-llm", + # LLM_ENDPOINT environment variable should be set to a model deployment endpoint. + endpoint=os.environ["LLM_ENDPOINT"], + # Optionally you can specify additional keyword arguments for the model, e.g. temperature. + temperature=0.1, + ) -.. code-block:: python3 + prompt = ChatPromptTemplate.from_messages( + [ + ( + "human", + "You are a helpful assistant to translate English into French. Response only the translation.\n" + "{input}", + ), + ] + ) + + chain = prompt | llm | StrOutputParser() + + def invoke(message): + return chain.invoke({"input": message}) + +The ``llm`` model in this example uses a chat model deployed with `AI Quick Actions `_. + +You can find a few example applications in the `model artifact template `_, including `tool calling with OCI generative AI `_ and `LangGraph multi-agent example `_. + +Once you added your application, you can call the ``verify()`` function to test/debug it locally: + +.. code-block:: python + + llm_app.verify({ + "inputs": "Hello!", + "module": "translate.py" + }) + +Note that with the default ``score.py`` template, you will invoke your application with two keys: + +* ``module``: The module in the model artifact (``my_apps`` folder) containing the application to be invoked. Here we are using the ``translate.py`` example. You can specify a default module using the ``DEFAULT_MODULE`` environment variables. +* ``inputs``: the value should be the payload for your application module. This example uses a string. However, you can use list or other JSON payload for your application. + +The response will have the following format: - chain_deployment.save(display_name="LangChain Model") +.. code-block:: python -Deploy the Model -**************** + { + "outputs": "The outputs returned by invoking your app/agent", + "error": "Error message, if any.", + "traceback": "Traceback, if any.", + "id": "The ID for identifying the request.", + } -Deploy the LangChain model from previous step by calling ``deploy``. Remember to replace the ```` with the actual cohere api key in the ``environment_variables``. -It usually takes a couple of minutes to deploy the model and you should see the model deployment in the output once the process completes. +If there is an error when invoking your app/agent, the ``error`` message along with the ``traceback`` will be returned in the response. + +Register the Model Artifact +*************************** + +Once your apps and agents are ready, you need save it to OCI Data Science Model Catalog before deployment: .. code-block:: python3 - chain_deployment.deploy( - display_name="LangChain Model Deployment", - environment_variables={"COHERE_API_KEY":""}, - ) + llm_app.save(display_name="LLM Apps", ignore_introspection=True) + + +Build Container Image +********************* + +Before deploying the model, you will need to build a container image with the dependencies for your apps and agents. + +To configure your environment for pushing image to OCI container registry (OCIR). Please refer to the OCIR documentation for `Pushing Images Using the Docker CLI `. + +The `container image template `_ contains files for building a container image for OCI Data Model Deployment service. You can add your dependencies into the ``requirement.txt`` file. You may also modify the ``Dockerfile`` if you need to add system libraries. + +```bash +docker build -t . +``` -Invoke the Deployed Model -************************* +Once the image is built, you can push it to OCI container registry. +```bash +docker push +``` -Now the OCI data science model deployment endpoint is ready and you can invoke it to ``tell a joke about animals``. +Deploy as Model Deployment +************************** + +To deploy the model, simply call the ``deploy()`` function with your settings: +* For most application, a CPU shape would be sufficient. +* Specify log group and log OCID to enable logging for the deployment. +* `Custom networking `_ with internet access is required for accessing external APIs or OCI Generative AI APIs in a different region. +* Add environments variables as needed by your application, including any API keys or endpoints. +* You may set the ``DEFAULT_MODULE`` for invoking the default app .. code-block:: python3 - chain_deployment.predict(data={"subject": "animals"})["output"] + import os + + generic_model.deploy( + display_name="LLM Apps", + deployment_instance_shape="VM.Standard.E4.Flex", + deployment_log_group_id="", + deployment_predict_log_id="", + deployment_access_log_id="", + deployment_image="", + # Custom networking with internet access is needed for external API calls. + deployment_instance_subnet_id="", + # Add environments variables as needed by your application. + # Following are just examples + environment_variables={ + "TAVILY_API_KEY": os.environ["TAVILY_API_KEY"], + "PROJECT_COMPARTMENT_OCID": os.environ["PROJECT_COMPARTMENT_OCID"], + "LLM_ENDPOINT": os.environ["LLM_ENDPOINT"], + "DEFAULT_MODULE": "translate.py", + } + ) + +Invoking the Deployment +*********************** + +Once the deployment is active, you can invoke the application with HTTP requests. For example: -.. figure:: figures/prediction.png - :width: 800 +.. code-block:: python3 -Alternatively, you can use OCI CLI to invoke the model deployment. Remember to replace the ``langchain_application_model_deployment_url`` with the actual model deployment url which you can find in the output from deploy step. + import oci + import requests -.. code-block:: shell + response = requests.post( + endpoint, + json={ + "inputs": "Hello!", + }, + auth=oci.auth.signers.get_resource_principals_signer() + ) + response.json() + +The response will be similar to the following: - oci raw-request --http-method POST --target-uri /predict --request-body '{"subject": "animals"}' --auth resource_principal +.. code-block:: python3 + + { + 'error': None, + 'id': 'fa3d7111-326f-4736-a8f4-ed5b21654534', + 'outputs': 'Bonjour!', + 'traceback': None + } + +Alternatively, you can use OCI CLI to invoke the model deployment. Remember to replace the ``model_deployment_url`` with the actual model deployment url, which you can find in the output from deploy step. + +.. code-block:: shell -.. figure:: figures/cli_prediction.png - :width: 800 \ No newline at end of file + oci raw-request --http-method POST --target-uri /predict --request-body '{"input": "Hello!"}' --auth resource_principal diff --git a/docs/source/user_guide/large_language_model/figures/cli_prediction.png b/docs/source/user_guide/large_language_model/figures/cli_prediction.png deleted file mode 100644 index d29960290..000000000 Binary files a/docs/source/user_guide/large_language_model/figures/cli_prediction.png and /dev/null differ diff --git a/docs/source/user_guide/large_language_model/figures/prediction.png b/docs/source/user_guide/large_language_model/figures/prediction.png deleted file mode 100644 index 3b9b4cba1..000000000 Binary files a/docs/source/user_guide/large_language_model/figures/prediction.png and /dev/null differ diff --git a/docs/source/user_guide/large_language_model/figures/workflow.png b/docs/source/user_guide/large_language_model/figures/workflow.png new file mode 100644 index 000000000..218bee377 Binary files /dev/null and b/docs/source/user_guide/large_language_model/figures/workflow.png differ diff --git a/docs/source/user_guide/large_language_model/langchain_models.rst b/docs/source/user_guide/large_language_model/langchain_models.rst index 72e162a78..f079f5d74 100644 --- a/docs/source/user_guide/large_language_model/langchain_models.rst +++ b/docs/source/user_guide/large_language_model/langchain_models.rst @@ -1,7 +1,7 @@ LangChain Integration ********************* -.. versionadded:: 2.11.19 +.. versionadded:: 2.12.0 .. admonition:: LangChain Community :class: note @@ -96,13 +96,12 @@ Chat models takes `chat messages /predict", + endpoint=f">/predict", # Optionally you can specify additional keyword arguments for the model. max_tokens=32, ) messages = [ - SystemMessage(content="You're a helpful assistant providing concise answers."), HumanMessage(content="Who's the first president of United States?"), ] diff --git a/tests/unitary/with_extras/langchain/chat_models/test_oci_data_science.py b/tests/unitary/with_extras/langchain/chat_models/test_oci_data_science.py index 89ebce844..34152d3e4 100644 --- a/tests/unitary/with_extras/langchain/chat_models/test_oci_data_science.py +++ b/tests/unitary/with_extras/langchain/chat_models/test_oci_data_science.py @@ -7,16 +7,19 @@ """Test OCI Data Science Model Deployment Endpoint.""" import sys +from typing import Any, AsyncGenerator, Dict, Generator from unittest import mock + import pytest + + +if sys.version_info < (3, 9): + pytest.skip(allow_module_level=True) + from langchain_core.messages import AIMessage, AIMessageChunk from requests.exceptions import HTTPError -from ads.llm import ChatOCIModelDeploymentVLLM, ChatOCIModelDeploymentTGI - -pytestmark = pytest.mark.skipif( - sys.version_info < (3, 9), reason="Requires Python 3.9 or higher" -) +from ads.llm import ChatOCIModelDeploymentVLLM, ChatOCIModelDeploymentTGI CONST_MODEL_NAME = "odsc-vllm" @@ -71,35 +74,37 @@ ) -def mocked_requests_post(self, **kwargs): - """Method to mock post requests""" +class MockResponse: + """Represents a mocked response.""" - class MockResponse: - """Represents a mocked response.""" + def __init__(self, json_data: Dict, status_code: int = 200): + self.json_data = json_data + self.status_code = status_code - def __init__(self, json_data, status_code=200): - self.json_data = json_data - self.status_code = status_code + def raise_for_status(self) -> None: + """Mocked raise for status.""" + if 400 <= self.status_code < 600: + raise HTTPError() - def raise_for_status(self): - """Mocked raise for status.""" - if 400 <= self.status_code < 600: - raise HTTPError("", response=self) + def json(self) -> Dict: + """Returns mocked json data.""" + return self.json_data - def json(self): - """Returns mocked json data.""" - return self.json_data + def iter_lines(self, chunk_size: int = 4096) -> Generator[bytes, None, None]: + """Returns a generator of mocked streaming response.""" + return CONST_STREAM_RESPONSE - def iter_lines(self, chunk_size=4096): - """Returns a generator of mocked streaming response.""" - return CONST_STREAM_RESPONSE + @property + def text(self) -> str: + """Returns the mocked text representation.""" + return "" - @property - def text(self): - return "" - payload = kwargs.get("json") - messages = payload.get("messages") +def mocked_requests_post(url: str, **kwargs: Any) -> MockResponse: + """Method to mock post requests""" + + payload: dict = kwargs.get("json", {}) + messages: list = payload.get("messages", []) prompt = messages[0].get("content") if prompt == CONST_PROMPT: @@ -112,9 +117,10 @@ def text(self): @pytest.mark.requires("ads") +@pytest.mark.requires("langchain_openai") @mock.patch("ads.common.auth.default_signer", return_value=dict(signer=None)) @mock.patch("requests.post", side_effect=mocked_requests_post) -def test_invoke_vllm(mock_post, mock_auth) -> None: +def test_invoke_vllm(*args: Any) -> None: """Tests invoking vLLM endpoint.""" llm = ChatOCIModelDeploymentVLLM(endpoint=CONST_ENDPOINT, model=CONST_MODEL_NAME) output = llm.invoke(CONST_PROMPT) @@ -123,9 +129,10 @@ def test_invoke_vllm(mock_post, mock_auth) -> None: @pytest.mark.requires("ads") +@pytest.mark.requires("langchain_openai") @mock.patch("ads.common.auth.default_signer", return_value=dict(signer=None)) @mock.patch("requests.post", side_effect=mocked_requests_post) -def test_invoke_tgi(mock_post, mock_auth) -> None: +def test_invoke_tgi(*args: Any) -> None: """Tests invoking TGI endpoint using OpenAI Spec.""" llm = ChatOCIModelDeploymentTGI(endpoint=CONST_ENDPOINT, model=CONST_MODEL_NAME) output = llm.invoke(CONST_PROMPT) @@ -134,24 +141,32 @@ def test_invoke_tgi(mock_post, mock_auth) -> None: @pytest.mark.requires("ads") +@pytest.mark.requires("langchain_openai") @mock.patch("ads.common.auth.default_signer", return_value=dict(signer=None)) @mock.patch("requests.post", side_effect=mocked_requests_post) -def test_stream_vllm(mock_post, mock_auth) -> None: +def test_stream_vllm(*args: Any) -> None: """Tests streaming with vLLM endpoint using OpenAI spec.""" llm = ChatOCIModelDeploymentVLLM( endpoint=CONST_ENDPOINT, model=CONST_MODEL_NAME, streaming=True ) - output = AIMessageChunk("") + output = None count = 0 for chunk in llm.stream(CONST_PROMPT): assert isinstance(chunk, AIMessageChunk) - output += chunk + if output is None: + output = chunk + else: + output += chunk count += 1 assert count == 5 - assert output.content.strip() == CONST_COMPLETION + assert output is not None + if output is not None: + assert str(output.content).strip() == CONST_COMPLETION -async def mocked_async_streaming_response(*args, **kwargs): +async def mocked_async_streaming_response( + *args: Any, **kwargs: Any +) -> AsyncGenerator[bytes, None]: """Returns mocked response for async streaming.""" for item in CONST_ASYNC_STREAM_RESPONSE: yield item @@ -159,6 +174,7 @@ async def mocked_async_streaming_response(*args, **kwargs): @pytest.mark.asyncio @pytest.mark.requires("ads") +@pytest.mark.requires("langchain_openai") @mock.patch( "ads.common.auth.default_signer", return_value=dict(signer=mock.MagicMock()) ) @@ -166,7 +182,7 @@ async def mocked_async_streaming_response(*args, **kwargs): "langchain_community.utilities.requests.Requests.apost", mock.MagicMock(), ) -async def test_stream_async(mock_auth): +async def test_stream_async(*args: Any) -> None: """Tests async streaming.""" llm = ChatOCIModelDeploymentVLLM( endpoint=CONST_ENDPOINT, model=CONST_MODEL_NAME, streaming=True @@ -176,6 +192,5 @@ async def test_stream_async(mock_auth): "_aiter_sse", mock.MagicMock(return_value=mocked_async_streaming_response()), ): - - chunks = [chunk.content async for chunk in llm.astream(CONST_PROMPT)] + chunks = [str(chunk.content) async for chunk in llm.astream(CONST_PROMPT)] assert "".join(chunks).strip() == CONST_COMPLETION diff --git a/tests/unitary/with_extras/langchain/llms/test_oci_model_deployment_endpoint.py b/tests/unitary/with_extras/langchain/llms/test_oci_model_deployment_endpoint.py index 16e2f04e6..ce7bce482 100644 --- a/tests/unitary/with_extras/langchain/llms/test_oci_model_deployment_endpoint.py +++ b/tests/unitary/with_extras/langchain/llms/test_oci_model_deployment_endpoint.py @@ -7,15 +7,18 @@ """Test OCI Data Science Model Deployment Endpoint.""" import sys +from typing import Any, AsyncGenerator, Dict, Generator from unittest import mock + import pytest + +if sys.version_info < (3, 9): + pytest.skip(allow_module_level=True) + + from requests.exceptions import HTTPError from ads.llm import OCIModelDeploymentTGI, OCIModelDeploymentVLLM -pytestmark = pytest.mark.skipif( - sys.version_info < (3, 9), reason="Requires Python 3.9 or higher" -) - CONST_MODEL_NAME = "odsc-vllm" CONST_ENDPOINT = "https://oci.endpoint/ocid/predict" @@ -51,34 +54,36 @@ ) -def mocked_requests_post(self, **kwargs): - """Method to mock post requests""" +class MockResponse: + """Represents a mocked response.""" - class MockResponse: - """Represents a mocked response.""" + def __init__(self, json_data: Dict, status_code: int = 200) -> None: + self.json_data = json_data + self.status_code = status_code - def __init__(self, json_data, status_code=200): - self.json_data = json_data - self.status_code = status_code + def raise_for_status(self) -> None: + """Mocked raise for status.""" + if 400 <= self.status_code < 600: + raise HTTPError() - def raise_for_status(self): - """Mocked raise for status.""" - if 400 <= self.status_code < 600: - raise HTTPError("", response=self) + def json(self) -> Dict: + """Returns mocked json data.""" + return self.json_data - def json(self): - """Returns mocked json data.""" - return self.json_data + def iter_lines(self, chunk_size: int = 4096) -> Generator[bytes, None, None]: + """Returns a generator of mocked streaming response.""" + return CONST_STREAM_RESPONSE - def iter_lines(self, chunk_size=4096): - """Returns a generator of mocked streaming response.""" - return CONST_STREAM_RESPONSE + @property + def text(self) -> str: + """Returns the mocked text representation.""" + return "" - @property - def text(self): - return "" - payload = kwargs.get("json") +def mocked_requests_post(url: str, **kwargs: Any) -> MockResponse: + """Method to mock post requests""" + + payload: dict = kwargs.get("json", {}) if "inputs" in payload: prompt = payload.get("inputs") is_tgi = True @@ -97,7 +102,9 @@ def text(self): ) -async def mocked_async_streaming_response(*args, **kwargs): +async def mocked_async_streaming_response( + *args: Any, **kwargs: Any +) -> AsyncGenerator[bytes, None]: """Returns mocked response for async streaming.""" for item in CONST_ASYNC_STREAM_RESPONSE: yield item @@ -106,7 +113,7 @@ async def mocked_async_streaming_response(*args, **kwargs): @pytest.mark.requires("ads") @mock.patch("ads.common.auth.default_signer", return_value=dict(signer=None)) @mock.patch("requests.post", side_effect=mocked_requests_post) -def test_invoke_vllm(mock_post, mock_auth) -> None: +def test_invoke_vllm(*args: Any) -> None: """Tests invoking vLLM endpoint.""" llm = OCIModelDeploymentVLLM(endpoint=CONST_ENDPOINT, model=CONST_MODEL_NAME) output = llm.invoke(CONST_PROMPT) @@ -116,7 +123,7 @@ def test_invoke_vllm(mock_post, mock_auth) -> None: @pytest.mark.requires("ads") @mock.patch("ads.common.auth.default_signer", return_value=dict(signer=None)) @mock.patch("requests.post", side_effect=mocked_requests_post) -def test_stream_tgi(mock_post, mock_auth) -> None: +def test_stream_tgi(*args: Any) -> None: """Tests streaming with TGI endpoint using OpenAI spec.""" llm = OCIModelDeploymentTGI( endpoint=CONST_ENDPOINT, model=CONST_MODEL_NAME, streaming=True @@ -133,7 +140,7 @@ def test_stream_tgi(mock_post, mock_auth) -> None: @pytest.mark.requires("ads") @mock.patch("ads.common.auth.default_signer", return_value=dict(signer=None)) @mock.patch("requests.post", side_effect=mocked_requests_post) -def test_generate_tgi(mock_post, mock_auth) -> None: +def test_generate_tgi(*args: Any) -> None: """Tests invoking TGI endpoint using TGI generate spec.""" llm = OCIModelDeploymentTGI( endpoint=CONST_ENDPOINT, api="/generate", model=CONST_MODEL_NAME @@ -151,7 +158,7 @@ def test_generate_tgi(mock_post, mock_auth) -> None: "langchain_community.utilities.requests.Requests.apost", mock.MagicMock(), ) -async def test_stream_async(mock_auth): +async def test_stream_async(*args: Any) -> None: """Tests async streaming.""" llm = OCIModelDeploymentTGI( endpoint=CONST_ENDPOINT, model=CONST_MODEL_NAME, streaming=True @@ -161,6 +168,5 @@ async def test_stream_async(mock_auth): "_aiter_sse", mock.MagicMock(return_value=mocked_async_streaming_response()), ): - chunks = [chunk async for chunk in llm.astream(CONST_PROMPT)] assert "".join(chunks).strip() == CONST_COMPLETION