kanninian commited on
Commit
d6cfea3
·
verified ·
1 Parent(s): 1f44489

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -26
app.py CHANGED
@@ -10,9 +10,6 @@ import torch
10
  import numpy as np
11
  from qa_vector_store import build_qa_vector_store, retrieve_and_rerank, generate_response_from_local_llm
12
 
13
- # 建立 FastAPI 應用
14
- app = FastAPI()
15
-
16
  # 初始化模型和資料庫
17
  model_name = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
18
  collection_name = model_name.split("/")[-1]
@@ -34,21 +31,15 @@ class SearchResult(BaseModel):
34
  score: float
35
 
36
  # 搜尋+rerank API
37
- @app.post("/chat", response_model=List[SearchResult])
38
- def search_and_generate(input: QueryInput):
39
- reranked = retrieve_and_rerank(input.query, model_name, collection_name, cross_encoder_model, score_threshold=0.5, search_top_k=20, rerank_top_k=input.top_k)
40
-
41
- # 如果沒有找到相關答案,則返回 404 錯誤
42
- if not reranked:
43
- raise HTTPException(status_code=404, detail="找不到相關答案,請嘗試換個問題或降低門檻。")
44
-
45
  final_passages = [r[0] for r in reranked]
46
 
47
  # 使用 LLM 生成回答
48
  answer = generate_response_from_local_llm(input.query, final_passages, tokenizer, llm_model, max_new_tokens=256)
49
 
50
  if not answer:
51
- raise HTTPException(status_code=404, detail="無法生成回答,請檢查輸入或模型設定。")
52
  return answer
53
 
54
  # demo = gr.ChatInterface(
@@ -73,22 +64,16 @@ def search_and_generate(input: QueryInput):
73
 
74
  import gradio as gr
75
 
76
- # def respond(message, history, system_message, max_tokens, temperature, top_p):
77
- # try:
78
- # llm.temperature = temperature
79
- # llm.max_output_tokens = max_tokens
80
-
81
- # search_results = hybrid_search(message)
82
-
83
- # rerank_response = rerank_chunks_with_llm(message, search_results, llm, top_n=3)
84
- # reranked_indices = [int(i.strip()) - 1 for i in rerank_response.content.split(",") if i.strip().isdigit()]
85
- # reranked_docs = [search_results[i] for i in reranked_indices]
86
 
87
- # answer = generate_answer_with_rag(message, reranked_docs, llm)
88
- # return answer.content
89
 
90
- # except Exception as e:
91
- # return f"[錯誤] {str(e)}"
92
 
93
  chat_interface = gr.ChatInterface(
94
  fn=search_and_generate,
 
10
  import numpy as np
11
  from qa_vector_store import build_qa_vector_store, retrieve_and_rerank, generate_response_from_local_llm
12
 
 
 
 
13
  # 初始化模型和資料庫
14
  model_name = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
15
  collection_name = model_name.split("/")[-1]
 
31
  score: float
32
 
33
  # 搜尋+rerank API
34
+ def search_and_generate:
35
+ reranked = retrieve_and_rerank(input.query, model_name, collection_name, cross_encoder_model, score_threshold=0.5, search_top_k=20, rerank_top_k=5)
 
 
 
 
 
 
36
  final_passages = [r[0] for r in reranked]
37
 
38
  # 使用 LLM 生成回答
39
  answer = generate_response_from_local_llm(input.query, final_passages, tokenizer, llm_model, max_new_tokens=256)
40
 
41
  if not answer:
42
+ return "沒有資料,請重問。"
43
  return answer
44
 
45
  # demo = gr.ChatInterface(
 
64
 
65
  import gradio as gr
66
 
67
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
68
+ try:
69
+ llm.temperature = temperature
70
+ llm.max_output_tokens = max_tokens
 
 
 
 
 
 
71
 
72
+ answer = search_and_generate(message)
73
+ return answer.content
74
 
75
+ except Exception as e:
76
+ return f"[錯誤] {str(e)}"
77
 
78
  chat_interface = gr.ChatInterface(
79
  fn=search_and_generate,