Skip to content

Commit 437cdc8

Browse files
authored
migrating code from community (#94)
1 parent be5dc2b commit 437cdc8

File tree

20 files changed

+1357
-12
lines changed

20 files changed

+1357
-12
lines changed
Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,31 @@
1+
from langchain_google_community.bigquery import BigQueryLoader
12
from langchain_google_community.bigquery_vector_search import BigQueryVectorSearch
23
from langchain_google_community.docai import DocAIParser, DocAIParsingResults
34
from langchain_google_community.documentai_warehouse import DocumentAIWarehouseRetriever
5+
from langchain_google_community.drive import GoogleDriveLoader
46
from langchain_google_community.gcs_directory import GCSDirectoryLoader
57
from langchain_google_community.gcs_file import GCSFileLoader
68
from langchain_google_community.gmail.loader import GMailLoader
79
from langchain_google_community.gmail.toolkit import GmailToolkit
8-
from langchain_google_community.google_speech_to_text import GoogleSpeechToTextLoader
9-
from langchain_google_community.googledrive import GoogleDriveLoader
10+
from langchain_google_community.google_speech_to_text import SpeechToTextLoader
11+
from langchain_google_community.places_api import (
12+
GooglePlacesAPIWrapper,
13+
GooglePlacesTool,
14+
)
15+
from langchain_google_community.search import (
16+
GoogleSearchAPIWrapper,
17+
GoogleSearchResults,
18+
GoogleSearchRun,
19+
)
20+
from langchain_google_community.texttospeech import TextToSpeechTool
21+
from langchain_google_community.translate import GoogleTranslateTransformer
1022
from langchain_google_community.vertex_ai_search import (
1123
VertexAIMultiTurnSearchRetriever,
1224
VertexAISearchRetriever,
1325
)
1426

1527
__all__ = [
28+
"BigQueryLoader",
1629
"BigQueryVectorSearch",
1730
"DocAIParser",
1831
"DocAIParsingResults",
@@ -22,7 +35,14 @@
2235
"GMailLoader",
2336
"GmailToolkit",
2437
"GoogleDriveLoader",
25-
"GoogleSpeechToTextLoader",
38+
"GooglePlacesAPIWrapper",
39+
"GooglePlacesTool",
40+
"GoogleSearchAPIWrapper",
41+
"GoogleSearchResults",
42+
"GoogleSearchRun",
43+
"GoogleTranslateTransformer",
44+
"SpeechToTextLoader",
45+
"TextToSpeechTool",
2646
"VertexAIMultiTurnSearchRetriever",
2747
"VertexAISearchRetriever",
2848
]
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING, List, Optional
4+
5+
from langchain_core.document_loaders import BaseLoader
6+
from langchain_core.documents import Document
7+
8+
from langchain_google_community._utils import get_client_info
9+
10+
if TYPE_CHECKING:
11+
from google.auth.credentials import Credentials # type: ignore[import]
12+
13+
14+
class BigQueryLoader(BaseLoader):
15+
"""Load from the Google Cloud Platform `BigQuery`.
16+
17+
Each document represents one row of the result. The `page_content_columns`
18+
are written into the `page_content` of the document. The `metadata_columns`
19+
are written into the `metadata` of the document. By default, all columns
20+
are written into the `page_content` and none into the `metadata`.
21+
22+
"""
23+
24+
def __init__(
25+
self,
26+
query: str,
27+
project: Optional[str] = None,
28+
page_content_columns: Optional[List[str]] = None,
29+
metadata_columns: Optional[List[str]] = None,
30+
credentials: Optional[Credentials] = None,
31+
):
32+
"""Initialize BigQuery document loader.
33+
34+
Args:
35+
query: The query to run in BigQuery.
36+
project: Optional. The project to run the query in.
37+
page_content_columns: Optional. The columns to write into the `page_content`
38+
of the document.
39+
metadata_columns: Optional. The columns to write into the `metadata` of the
40+
document.
41+
credentials : google.auth.credentials.Credentials, optional
42+
Credentials for accessing Google APIs. Use this parameter to override
43+
default credentials, such as to use Compute Engine
44+
(`google.auth.compute_engine.Credentials`) or Service Account
45+
(`google.oauth2.service_account.Credentials`) credentials directly.
46+
"""
47+
self.query = query
48+
self.project = project
49+
self.page_content_columns = page_content_columns
50+
self.metadata_columns = metadata_columns
51+
self.credentials = credentials
52+
53+
def load(self) -> List[Document]:
54+
try:
55+
from google.cloud import bigquery # type: ignore[attr-defined]
56+
except ImportError as ex:
57+
raise ImportError(
58+
"Could not import google-cloud-bigquery python package. "
59+
"Please install it with `pip install google-cloud-bigquery`."
60+
) from ex
61+
62+
bq_client = bigquery.Client(
63+
credentials=self.credentials,
64+
project=self.project,
65+
client_info=get_client_info(module="bigquery"),
66+
)
67+
if not bq_client.project:
68+
error_desc = (
69+
"GCP project for Big Query is not set! Either provide a "
70+
"`project` argument during BigQueryLoader instantiation, "
71+
"or set a default project with `gcloud config set project` "
72+
"command."
73+
)
74+
raise ValueError(error_desc)
75+
query_result = bq_client.query(self.query).result()
76+
docs: List[Document] = []
77+
78+
page_content_columns = self.page_content_columns
79+
metadata_columns = self.metadata_columns
80+
81+
if page_content_columns is None:
82+
page_content_columns = [column.name for column in query_result.schema]
83+
if metadata_columns is None:
84+
metadata_columns = []
85+
86+
for row in query_result:
87+
page_content = "\n".join(
88+
f"{k}: {v}" for k, v in row.items() if k in page_content_columns
89+
)
90+
metadata = {k: v for k, v in row.items() if k in metadata_columns}
91+
doc = Document(page_content=page_content, metadata=metadata)
92+
docs.append(doc)
93+
94+
return docs
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
"""Base class for Gmail tools."""
2+
from __future__ import annotations
3+
4+
from typing import TYPE_CHECKING
5+
6+
from langchain_core.pydantic_v1 import Field
7+
from langchain_core.tools import BaseTool
8+
9+
from langchain_google_community.gmail.utils import build_resource_service
10+
11+
if TYPE_CHECKING:
12+
# This is for linting and IDE typehints
13+
from googleapiclient.discovery import Resource # type: ignore[import]
14+
else:
15+
try:
16+
# We do this so pydantic can resolve the types when instantiating
17+
from googleapiclient.discovery import Resource
18+
except ImportError:
19+
pass
20+
21+
22+
class GmailBaseTool(BaseTool):
23+
"""Base class for Gmail tools."""
24+
25+
api_resource: Resource = Field(default_factory=build_resource_service)
26+
27+
@classmethod
28+
def from_api_resource(cls, api_resource: Resource) -> "GmailBaseTool":
29+
"""Create a tool from an api resource.
30+
31+
Args:
32+
api_resource: The api resource to use.
33+
34+
Returns:
35+
A tool.
36+
"""
37+
return cls(service=api_resource)
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import base64
2+
from email.message import EmailMessage
3+
from typing import List, Optional, Type
4+
5+
from langchain_core.callbacks import CallbackManagerForToolRun
6+
from langchain_core.pydantic_v1 import BaseModel, Field
7+
8+
from langchain_google_community.gmail.base import GmailBaseTool
9+
10+
11+
class CreateDraftSchema(BaseModel):
12+
"""Input for CreateDraftTool."""
13+
14+
message: str = Field(
15+
...,
16+
description="The message to include in the draft.",
17+
)
18+
to: List[str] = Field(
19+
...,
20+
description="The list of recipients.",
21+
)
22+
subject: str = Field(
23+
...,
24+
description="The subject of the message.",
25+
)
26+
cc: Optional[List[str]] = Field(
27+
None,
28+
description="The list of CC recipients.",
29+
)
30+
bcc: Optional[List[str]] = Field(
31+
None,
32+
description="The list of BCC recipients.",
33+
)
34+
35+
36+
class GmailCreateDraft(GmailBaseTool):
37+
"""Tool that creates a draft email for Gmail."""
38+
39+
name: str = "create_gmail_draft"
40+
description: str = (
41+
"Use this tool to create a draft email with the provided message fields."
42+
)
43+
args_schema: Type[CreateDraftSchema] = CreateDraftSchema
44+
45+
def _prepare_draft_message(
46+
self,
47+
message: str,
48+
to: List[str],
49+
subject: str,
50+
cc: Optional[List[str]] = None,
51+
bcc: Optional[List[str]] = None,
52+
) -> dict:
53+
draft_message = EmailMessage()
54+
draft_message.set_content(message)
55+
56+
draft_message["To"] = ", ".join(to)
57+
draft_message["Subject"] = subject
58+
if cc is not None:
59+
draft_message["Cc"] = ", ".join(cc)
60+
61+
if bcc is not None:
62+
draft_message["Bcc"] = ", ".join(bcc)
63+
64+
encoded_message = base64.urlsafe_b64encode(draft_message.as_bytes()).decode()
65+
return {"message": {"raw": encoded_message}}
66+
67+
def _run(
68+
self,
69+
message: str,
70+
to: List[str],
71+
subject: str,
72+
cc: Optional[List[str]] = None,
73+
bcc: Optional[List[str]] = None,
74+
run_manager: Optional[CallbackManagerForToolRun] = None,
75+
) -> str:
76+
try:
77+
create_message = self._prepare_draft_message(message, to, subject, cc, bcc)
78+
draft = (
79+
self.api_resource.users()
80+
.drafts()
81+
.create(userId="me", body=create_message)
82+
.execute()
83+
)
84+
output = f'Draft created. Draft Id: {draft["id"]}'
85+
return output
86+
except Exception as e:
87+
raise Exception(f"An error occurred: {e}")
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import base64
2+
import email
3+
from typing import Dict, Optional, Type
4+
5+
from langchain_core.callbacks import CallbackManagerForToolRun
6+
from langchain_core.pydantic_v1 import BaseModel, Field
7+
8+
from langchain_google_community.gmail.base import GmailBaseTool
9+
from langchain_google_community.gmail.utils import clean_email_body
10+
11+
12+
class SearchArgsSchema(BaseModel):
13+
"""Input for GetMessageTool."""
14+
15+
message_id: str = Field(
16+
...,
17+
description="The unique ID of the email message, retrieved from a search.",
18+
)
19+
20+
21+
class GmailGetMessage(GmailBaseTool):
22+
"""Tool that gets a message by ID from Gmail."""
23+
24+
name: str = "get_gmail_message"
25+
description: str = (
26+
"Use this tool to fetch an email by message ID."
27+
" Returns the thread ID, snippet, body, subject, and sender."
28+
)
29+
args_schema: Type[SearchArgsSchema] = SearchArgsSchema
30+
31+
def _run(
32+
self,
33+
message_id: str,
34+
run_manager: Optional[CallbackManagerForToolRun] = None,
35+
) -> Dict:
36+
"""Run the tool."""
37+
query = (
38+
self.api_resource.users()
39+
.messages()
40+
.get(userId="me", format="raw", id=message_id)
41+
)
42+
message_data = query.execute()
43+
raw_message = base64.urlsafe_b64decode(message_data["raw"])
44+
45+
email_msg = email.message_from_bytes(raw_message)
46+
47+
subject = email_msg["Subject"]
48+
sender = email_msg["From"]
49+
50+
message_body = ""
51+
if email_msg.is_multipart():
52+
for part in email_msg.walk():
53+
ctype = part.get_content_type()
54+
cdispo = str(part.get("Content-Disposition"))
55+
if ctype == "text/plain" and "attachment" not in cdispo:
56+
message_body = part.get_payload(decode=True).decode("utf-8")
57+
break
58+
else:
59+
message_body = email_msg.get_payload(decode=True).decode("utf-8")
60+
61+
body = clean_email_body(message_body)
62+
63+
return {
64+
"id": message_id,
65+
"threadId": message_data["threadId"],
66+
"snippet": message_data["snippet"],
67+
"body": body,
68+
"subject": subject,
69+
"sender": sender,
70+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from typing import Dict, Optional, Type
2+
3+
from langchain_core.callbacks import CallbackManagerForToolRun
4+
from langchain_core.pydantic_v1 import BaseModel, Field
5+
6+
from langchain_google_community.gmail.base import GmailBaseTool
7+
8+
9+
class GetThreadSchema(BaseModel):
10+
"""Input for GetMessageTool."""
11+
12+
# From https://support.google.com/mail/answer/7190?hl=en
13+
thread_id: str = Field(
14+
...,
15+
description="The thread ID.",
16+
)
17+
18+
19+
class GmailGetThread(GmailBaseTool):
20+
"""Tool that gets a thread by ID from Gmail."""
21+
22+
name: str = "get_gmail_thread"
23+
description: str = (
24+
"Use this tool to search for email messages."
25+
" The input must be a valid Gmail query."
26+
" The output is a JSON list of messages."
27+
)
28+
args_schema: Type[GetThreadSchema] = GetThreadSchema
29+
30+
def _run(
31+
self,
32+
thread_id: str,
33+
run_manager: Optional[CallbackManagerForToolRun] = None,
34+
) -> Dict:
35+
"""Run the tool."""
36+
query = self.api_resource.users().threads().get(userId="me", id=thread_id)
37+
thread_data = query.execute()
38+
if not isinstance(thread_data, dict):
39+
raise ValueError("The output of the query must be a list.")
40+
messages = thread_data["messages"]
41+
thread_data["messages"] = []
42+
keys_to_keep = ["id", "snippet", "snippet"]
43+
# TODO: Parse body.
44+
for message in messages:
45+
thread_data["messages"].append(
46+
{k: message[k] for k in keys_to_keep if k in message}
47+
)
48+
return thread_data

0 commit comments

Comments
 (0)