CNC_QA / app.py
Keisuke Yamanaka - CNC
update app.py
374c014
import gradio as gr
import os
import time
import sys
import pinecone
from openai import OpenAI
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter
import glob
from langdetect import detect
from googletrans import Translator
class CNC_QA:
def __init__(self):
print("Initialing CNC_QA ")
os.environ['PYTHINTRACEMALLOC'] = '1'
#os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
# initialize connection to pinecone (get API key at app.pinecone.io)
#os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
self.PINECONE_INDEX ="meldas"
self.PINECONE_ENV = "gcp-starter"
self.add_files=False
#self.files=[r'./Doc/IB-1501621:M800V-M80V_Series_Programming_Manual_(Machining_Center_System)__.pdf']
self.files=glob.glob(f'./Doc/*')
self.vectorstore = self.initialize_vectorstore(index_name=self.PINECONE_INDEX)
if self.add_files==True:
self.add_documents()
llm = self.load_llm(model_id="gpt-4o")
#self.bot = RetrievalQA.from_llm(llm=llm, retriever=vectorstore.as_retriever())
self.bot = RetrievalQA.from_chain_type(
llm=llm,
chain_type='stuff',
retriever=self.vectorstore.as_retriever(),
return_source_documents=True
)
def load_embedding_model(self,model_name):
print(f'loading embedding model:{model_name}')
embeddings = OpenAIEmbeddings(
model=model_name,
)
return embeddings
def initialize_vectorstore(self,index_name):
model_name = "text-embedding-3-small"
embeddings = self.load_embedding_model(model_name=model_name)
print(f'loading vectorstore:{index_name}')
self.pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))
existing_indexes = [index_info["name"] for index_info in self.pc.list_indexes()]
if index_name not in existing_indexes:
print(f'Index:{self.PINECONE_INDEX} is not found....')
print(f'Creating new Index:{self.PINECONE_INDEX}')
self.add_files=True
self.pc.create_index(
name=self.PINECONE_INDEX,
dimension=1536,
metric="cosine",
spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)
while not self.pc.describe_index(self.PINECONE_INDEX).status["ready"]:
time.sleep(1)
print(f'Created new Index:{self.PINECONE_INDEX}')
self.show_index()
index = self.pc.Index(self.PINECONE_INDEX)
vectorstore = PineconeVectorStore(index=index, embedding=embeddings)
return vectorstore
def show_index(self):
print(f'detail of Index:{self.PINECONE_INDEX}')
index = self.pc.Index(self.PINECONE_INDEX)
while not self.pc.describe_index(self.PINECONE_INDEX).status["ready"]:
time.sleep(1)
print(index.describe_index_stats())
def delete_documents(self):
print(f'delete documents.....')
self.pc.delete_index(self.PINECONE_INDEX)
self.show_index
def add_documents(self):
print(f'add documents.....')
for i,file in enumerate(self.files):
print(f'{i+1}/{len(self.files)}:{file}')
loader = PyPDFLoader(file)
documents = loader.load_and_split()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)
self.vectorstore.add_documents(documents=docs)
print('Documents are loaded on database')
self.show_index()
def load_llm(self,model_id):
print(f'load llm:{model_id}')
llm = ChatOpenAI(
model=model_id,
temperature=0,
max_tokens=None,
timeout=None,
max_retries=2,
)
return llm
def echo(self,message,history):
#message = text_en
if message == 'meldas_del':
self.delete_documents()
elif message == "Who are you?":
message = "I am MELDAS. I will answer your question about Mitsubishi CNC"
else:
text_en ,lang_original = self.translate_message(message,'en')
ans = self.bot(text_en)
try:
result,lang = self.translate_message(ans['result'],lang_original)
except:
result=''
try:
source = ans['source_documents'][0].metadata['source']
except:
source = ''
try:
page_content = ans['source_documents'][0].page_content
except:
page_content = ''
OtherSource = ''
for source_documents in ans['source_documents']:
OtherSource += source_documents.metadata['source'] + '\n'
message = result + '\n\n' + '■Document\n' + source + '\n\n' + '■Page\n' + page_content + '\n\n' + '■References Documents\n' + OtherSource
return message
def translate_message(self,message,lang_dest):
import asyncio
async def translate_text(text,dest='en'):
async with Translator() as translator:
result = await translator.translate(text, dest = dest)
print(result) # <Translated src=ko dest=en text=Good evening. pronunciation=Good evening.>
return result.text,result.src
text, lang = asyncio.run(translate_text(text = message,dest=lang_dest))
print(f'元言語:{lang} -> 翻訳言語:{lang_dest}')
print(message)
print(text)
return text, lang
if __name__ == "__main__":
print("start")
meldas = CNC_QA()
demo = gr.ChatInterface(fn=meldas.echo, examples=["Who are you?"], title="MELDAS AI")
#demo = gr.Interface(fn=chat_func)
demo.launch(debug=True)