Skip to content

Commit 4da765b

Browse files
authored
Add diskcache for openai requests. Increase speed of requests to OpenAI API by 10 times. (#236)
* Add diskcache for openai requests. Increase speed by 10 times.
1 parent 5bcf7a1 commit 4da765b

File tree

2 files changed

+22
-34
lines changed

2 files changed

+22
-34
lines changed

requirements.txt

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
datasets>=2.14.2
22
rouge-score>=0.0.4
33
nlpaug>=1.1.10
4-
scikit-learn
4+
scikit-learn>=1.5.1
55
tqdm>=4.64.1
66
matplotlib>=3.6
77
pandas>=1.3.5
@@ -11,7 +11,7 @@ transformers>=4.40
1111
nltk>=3.6.5
1212
sacrebleu>=1.5.0
1313
sentencepiece>=0.1.97
14-
hf-lfs==0.0.3
14+
hf-lfs>=0.0.3
1515
pytest>=4.4.1
1616
pytreebank>=0.2.7
1717
setuptools>=60.2.0
@@ -23,15 +23,15 @@ protobuf>=4.23
2323
fschat>=0.2.3
2424
hydra-core>=1.3.2
2525
einops
26-
accelerate
26+
accelerate>=0.32.1
2727
bitsandbytes
28-
openai==0.28.0
29-
filelock==3.12.2
28+
openai>=0.28.0
3029
wget
3130
sentence-transformers
32-
bert-score
31+
bert-score>=0.3.13
3332
unbabel-comet==2.2.1
3433
nltk>=3.7,<4
35-
evaluate
34+
evaluate>=0.4.2
3635
spacy>=3.4.0,<4
3736
fastchat
37+
diskcache>=5.6.3

src/lm_polygraph/utils/openai_chat.py

Lines changed: 15 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
import openai
2-
import json
32
import os
43
import time
54
import logging
6-
7-
from filelock import FileLock
5+
import diskcache as dc
86

97

108
log = logging.getLogger()
@@ -17,7 +15,7 @@ class OpenAIChat:
1715

1816
def __init__(
1917
self,
20-
openai_model: str = "gpt-4",
18+
openai_model: str = "gpt-4o",
2119
cache_path: str = os.path.expanduser("~") + "/.cache",
2220
):
2321
"""
@@ -31,22 +29,20 @@ def __init__(
3129
openai.api_key = api_key
3230
self.openai_model = openai_model
3331

34-
self.cache_path = os.path.join(cache_path, "openai_chat_cache.json")
35-
self.cache_lock = FileLock(self.cache_path + ".lock")
36-
with self.cache_lock:
37-
if not os.path.exists(self.cache_path):
38-
if not os.path.exists(cache_path):
39-
os.makedirs(cache_path)
40-
with open(self.cache_path, "w") as f:
41-
json.dump({}, f)
32+
self.cache_path = os.path.join(cache_path, "openai_chat_cache.diskcache")
33+
if not os.path.exists(cache_path):
34+
os.makedirs(cache_path)
4235

4336
def ask(self, message: str) -> str:
44-
# check if the message is cached
45-
with open(self.cache_path, "r") as f:
46-
openai_responses = json.load(f)
37+
cache_settings = dc.DEFAULT_SETTINGS.copy()
38+
cache_settings["eviction_policy"] = "none"
39+
cache_settings["size_limit"] = int(1e12)
40+
cache_settings["cull_limit"] = 0
41+
openai_responses = dc.Cache(self.cache_path, **cache_settings)
42+
43+
if (self.openai_model, message) in openai_responses:
44+
reply = openai_responses[(self.openai_model, message)]
4745

48-
if message in openai_responses.get(self.openai_model, {}).keys():
49-
reply = openai_responses[self.openai_model][message]
5046
else:
5147
# Ask openai
5248
if openai.api_key is None:
@@ -59,18 +55,10 @@ def ask(self, message: str) -> str:
5955
{"role": "user", "content": message},
6056
]
6157
chat = self._send_request(messages)
62-
6358
reply = chat.choices[0].message.content
6459

65-
# add reply to cache
66-
with self.cache_lock:
67-
with open(self.cache_path, "r") as f:
68-
openai_responses = json.load(f)
69-
if self.openai_model not in openai_responses.keys():
70-
openai_responses[self.openai_model] = {}
71-
openai_responses[self.openai_model][message] = reply
72-
with open(self.cache_path, "w") as f:
73-
json.dump(openai_responses, f)
60+
openai_responses[(self.openai_model, message)] = reply
61+
openai_responses.close()
7462

7563
if "please provide" in reply.lower():
7664
return ""

0 commit comments

Comments
 (0)