Switched to OpenAI instead of Azure OAI
Browse files- app.py +1 -1
- climateqa/engine/llm.py +0 -26
- climateqa/engine/llm/__init__.py +15 -0
- climateqa/engine/llm/azure.py +99 -0
- climateqa/engine/llm/mistral.py +0 -0
- climateqa/engine/llm/openai.py +22 -0
- requirements.txt +3 -1
app.py
CHANGED
|
@@ -90,7 +90,7 @@ def parse_output_llm_with_sources(output):
|
|
| 90 |
|
| 91 |
# Create vectorstore and retriever
|
| 92 |
vectorstore = get_pinecone_vectorstore(embeddings_function)
|
| 93 |
-
llm = get_llm(max_tokens = 1024,temperature = 0.0)
|
| 94 |
|
| 95 |
|
| 96 |
def make_pairs(lst):
|
|
|
|
| 90 |
|
| 91 |
# Create vectorstore and retriever
|
| 92 |
vectorstore = get_pinecone_vectorstore(embeddings_function)
|
| 93 |
+
llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
|
| 94 |
|
| 95 |
|
| 96 |
def make_pairs(lst):
|
climateqa/engine/llm.py
DELETED
|
@@ -1,26 +0,0 @@
|
|
| 1 |
-
from langchain_community.chat_models import AzureChatOpenAI
|
| 2 |
-
import os
|
| 3 |
-
# LOAD ENVIRONMENT VARIABLES
|
| 4 |
-
try:
|
| 5 |
-
from dotenv import load_dotenv
|
| 6 |
-
load_dotenv()
|
| 7 |
-
except:
|
| 8 |
-
pass
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
def get_llm(max_tokens = 1024,temperature = 0.0,verbose = True,streaming = False, **kwargs):
|
| 12 |
-
|
| 13 |
-
llm = AzureChatOpenAI(
|
| 14 |
-
openai_api_base=os.environ["AZURE_OPENAI_API_BASE_URL"],
|
| 15 |
-
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
|
| 16 |
-
deployment_name=os.environ["AZURE_OPENAI_API_DEPLOYMENT_NAME"],
|
| 17 |
-
openai_api_key=os.environ["AZURE_OPENAI_API_KEY"],
|
| 18 |
-
openai_api_type = "azure",
|
| 19 |
-
max_tokens = max_tokens,
|
| 20 |
-
temperature = temperature,
|
| 21 |
-
request_timeout = 60,
|
| 22 |
-
verbose = verbose,
|
| 23 |
-
streaming = streaming,
|
| 24 |
-
**kwargs,
|
| 25 |
-
)
|
| 26 |
-
return llm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
climateqa/engine/llm/__init__.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from climateqa.engine.llm.openai import get_llm as get_openai_llm
|
| 2 |
+
from climateqa.engine.llm.azure import get_llm as get_azure_llm
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def get_llm(provider="openai",**kwargs):
|
| 6 |
+
|
| 7 |
+
if provider == "openai":
|
| 8 |
+
return get_openai_llm(**kwargs)
|
| 9 |
+
elif provider == "azure":
|
| 10 |
+
return get_azure_llm(**kwargs)
|
| 11 |
+
else:
|
| 12 |
+
raise ValueError(f"Unknown provider: {provider}")
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
|
climateqa/engine/llm/azure.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import time
|
| 3 |
+
|
| 4 |
+
from langchain_openai import AzureChatOpenAI
|
| 5 |
+
from msal import ConfidentialClientApplication
|
| 6 |
+
|
| 7 |
+
DEFAULT_TOKEN_UPDATE_FREQUENCY = 3300 # Default token duration is 1 hour (3600 s.)
|
| 8 |
+
|
| 9 |
+
# LOAD ENVIRONMENT VARIABLES
|
| 10 |
+
try:
|
| 11 |
+
from dotenv import load_dotenv
|
| 12 |
+
load_dotenv()
|
| 13 |
+
except Exception:
|
| 14 |
+
pass
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
client_id = os.environ.get("AZURE_CLIENT_ID", None)
|
| 18 |
+
client_credential = os.environ.get("AZURE_CLIENT_CREDENTIAL", None)
|
| 19 |
+
tenant_name = os.environ.get("AZURE_TENANT_NAME", None)
|
| 20 |
+
scopes = [os.environ.get("AZURE_SCOPE", None)]
|
| 21 |
+
|
| 22 |
+
azure_ad_token_frequency = int(
|
| 23 |
+
os.environ.get("TOKEN_UPDATE_FREQUENCY", DEFAULT_TOKEN_UPDATE_FREQUENCY)
|
| 24 |
+
)
|
| 25 |
+
azure_ad_token = None
|
| 26 |
+
azure_ad_token_timestamp = 0.0
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _get_azure_ad_token():
|
| 30 |
+
global azure_ad_token
|
| 31 |
+
global azure_ad_token_timestamp
|
| 32 |
+
now = time.time()
|
| 33 |
+
|
| 34 |
+
# Return current token if not outdated:
|
| 35 |
+
if (azure_ad_token is not None) and (
|
| 36 |
+
azure_ad_token_timestamp + azure_ad_token_frequency > now
|
| 37 |
+
):
|
| 38 |
+
print("Using current token (not expired)...")
|
| 39 |
+
return azure_ad_token
|
| 40 |
+
|
| 41 |
+
# Else, generate a new token:
|
| 42 |
+
print("Generating new token...")
|
| 43 |
+
app = ConfidentialClientApplication(
|
| 44 |
+
client_id=client_id,
|
| 45 |
+
client_credential=client_credential,
|
| 46 |
+
authority=f"https://login.microsoftonline.com/{tenant_name}",
|
| 47 |
+
)
|
| 48 |
+
result = app.acquire_token_for_client(scopes=scopes)
|
| 49 |
+
if "access_token" not in result:
|
| 50 |
+
raise ValueError("No access token in result")
|
| 51 |
+
|
| 52 |
+
if result["access_token"] != azure_ad_token:
|
| 53 |
+
print("New token received.")
|
| 54 |
+
azure_ad_token = result["access_token"]
|
| 55 |
+
azure_ad_token_timestamp = now
|
| 56 |
+
else:
|
| 57 |
+
print("Same token received.")
|
| 58 |
+
|
| 59 |
+
return azure_ad_token
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def get_llm(
|
| 63 |
+
max_tokens: int = 1024,
|
| 64 |
+
temperature: float = 0.0,
|
| 65 |
+
verbose: bool = True,
|
| 66 |
+
streaming: bool = False,
|
| 67 |
+
**kwargs,
|
| 68 |
+
) -> AzureChatOpenAI:
|
| 69 |
+
auth_dict = dict(openai_api_type="azure")
|
| 70 |
+
# Note: OPENAI_API_VERSION is automatically taken from environment variables.
|
| 71 |
+
|
| 72 |
+
# First option: provide AZURE_OPENAI_API_BASE_URL, OPENAI_API_VERSION, AZURE_CLIENT_ID,
|
| 73 |
+
# AZURE_CLIENT_CREDENTIAL, AZURE_TENANT_NAME & AZURE_SCOPE:
|
| 74 |
+
if (
|
| 75 |
+
(client_id is not None)
|
| 76 |
+
and (client_credential is not None)
|
| 77 |
+
and (tenant_name is not None)
|
| 78 |
+
):
|
| 79 |
+
print("Using Azure AD token")
|
| 80 |
+
auth_dict["openai_api_base"] = os.environ["AZURE_OPENAI_API_BASE_URL"]
|
| 81 |
+
auth_dict["azure_ad_token_provider"] = _get_azure_ad_token
|
| 82 |
+
|
| 83 |
+
# Second option: provide AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_API_DEPLOYMENT_NAME,
|
| 84 |
+
# OPENAI_API_VERSION & AZURE_OPENAI_API_KEY:
|
| 85 |
+
else:
|
| 86 |
+
print("Using AZURE_OPENAI_API_DEPLOYMENT_NAME and AZURE_OPENAI_API_KEY")
|
| 87 |
+
auth_dict["deployment_name"] = os.environ["AZURE_OPENAI_API_DEPLOYMENT_NAME"]
|
| 88 |
+
# Note: AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_API_KEY are automatically taken
|
| 89 |
+
# from environment variable.
|
| 90 |
+
|
| 91 |
+
llm = AzureChatOpenAI(
|
| 92 |
+
**auth_dict,
|
| 93 |
+
max_tokens=max_tokens,
|
| 94 |
+
temperature=temperature,
|
| 95 |
+
verbose=verbose,
|
| 96 |
+
streaming=streaming,
|
| 97 |
+
**kwargs,
|
| 98 |
+
)
|
| 99 |
+
return llm
|
climateqa/engine/llm/mistral.py
ADDED
|
File without changes
|
climateqa/engine/llm/openai.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_openai import ChatOpenAI
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
try:
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
load_dotenv()
|
| 7 |
+
except Exception:
|
| 8 |
+
pass
|
| 9 |
+
|
| 10 |
+
def get_llm(model="gpt-3.5-turbo-0125",max_tokens=1024, temperature=0.0, streaming=True,timeout=30, **kwargs):
|
| 11 |
+
|
| 12 |
+
llm = ChatOpenAI(
|
| 13 |
+
model=model,
|
| 14 |
+
api_key=os.environ.get("THEO_API_KEY", None),
|
| 15 |
+
max_tokens = max_tokens,
|
| 16 |
+
streaming = streaming,
|
| 17 |
+
temperature=temperature,
|
| 18 |
+
timeout = timeout,
|
| 19 |
+
**kwargs,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
return llm
|
requirements.txt
CHANGED
|
@@ -4,6 +4,8 @@ azure-storage-file-share==12.11.1
|
|
| 4 |
azure-storage-blob
|
| 5 |
python-dotenv==1.0.0
|
| 6 |
langchain==0.1.4
|
|
|
|
| 7 |
pinecone-client==3.0.2
|
| 8 |
sentence-transformers
|
| 9 |
-
huggingface-hub
|
|
|
|
|
|
| 4 |
azure-storage-blob
|
| 5 |
python-dotenv==1.0.0
|
| 6 |
langchain==0.1.4
|
| 7 |
+
langchain_openai==0.0.6
|
| 8 |
pinecone-client==3.0.2
|
| 9 |
sentence-transformers
|
| 10 |
+
huggingface-hub
|
| 11 |
+
msal
|