Update app.py
Browse files
app.py
CHANGED
@@ -3,12 +3,6 @@ import gradio as gr
|
|
3 |
import shutil
|
4 |
import os
|
5 |
import subprocess
|
6 |
-
import sys
|
7 |
-
import platform
|
8 |
-
import transformers
|
9 |
-
import numpy
|
10 |
-
import torch
|
11 |
-
|
12 |
from llama_cpp import Llama
|
13 |
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings
|
14 |
from llama_index.core.llms import ChatMessage
|
@@ -17,14 +11,31 @@ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
|
17 |
from huggingface_hub import hf_hub_download
|
18 |
from llama_index.core.node_parser import SentenceSplitter
|
19 |
|
20 |
-
# Fungsi untuk
|
21 |
-
def
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
# Fungsi untuk mengunduh model Llama
|
30 |
def initialize_llama_model():
|
@@ -36,19 +47,14 @@ def initialize_llama_model():
|
|
36 |
)
|
37 |
return model_path
|
38 |
|
39 |
-
# Fungsi untuk mengatur konfigurasi Settings
|
40 |
def initialize_settings(model_path):
|
41 |
-
|
42 |
model_path=model_path,
|
43 |
-
|
44 |
temperature=0.7, # Sesuaikan untuk respons yang lebih cepat
|
45 |
top_p=0.9 # Mengurangi eksplorasi token
|
46 |
)
|
47 |
-
# Pastikan Llama model yang dibuat sudah sesuai
|
48 |
-
print(f"Model type: {type(llama_model)}")
|
49 |
-
|
50 |
-
# Simpan model di Settings
|
51 |
-
Settings.llm = llama_model
|
52 |
|
53 |
# Fungsi untuk Menginisialisasi Index
|
54 |
def initialize_index():
|
@@ -94,7 +100,7 @@ def generate_response(message, history, chat_engine):
|
|
94 |
|
95 |
def clear_history(chat_engine):
|
96 |
chat_engine.clear()
|
97 |
-
|
98 |
# Inisialisasi Komponen Gradio untuk UI
|
99 |
def launch_gradio(chat_engine):
|
100 |
with gr.Blocks() as demo:
|
@@ -110,7 +116,7 @@ def launch_gradio(chat_engine):
|
|
110 |
|
111 |
# Fungsi Utama untuk Menjalankan Aplikasi
|
112 |
def main():
|
113 |
-
|
114 |
# Unduh model dan inisialisasi pengaturan
|
115 |
model_path = initialize_llama_model()
|
116 |
initialize_settings(model_path) # Mengirimkan model_path ke fungsi initialize_settings
|
@@ -119,6 +125,6 @@ def main():
|
|
119 |
chat_engine = initialize_chat_engine(index)
|
120 |
# Luncurkan antarmuka
|
121 |
launch_gradio(chat_engine)
|
122 |
-
|
123 |
if __name__ == "__main__":
|
124 |
main()
|
|
|
3 |
import shutil
|
4 |
import os
|
5 |
import subprocess
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
from llama_cpp import Llama
|
7 |
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings
|
8 |
from llama_index.core.llms import ChatMessage
|
|
|
11 |
from huggingface_hub import hf_hub_download
|
12 |
from llama_index.core.node_parser import SentenceSplitter
|
13 |
|
14 |
+
# Fungsi untuk memasang ulang llama-cpp-python dengan dukungan CUDA
|
15 |
+
def install_llama_with_cuda():
|
16 |
+
try:
|
17 |
+
# Baca file requirements.txt
|
18 |
+
with open("requirements.txt", "r") as f:
|
19 |
+
packages = f.read().splitlines()
|
20 |
+
|
21 |
+
# Install setiap paket dengan CMAKE_ARGS untuk dukungan CUDA
|
22 |
+
for package in packages:
|
23 |
+
subprocess.run(
|
24 |
+
env={"CMAKE_ARGS": "-DGGML_CUDA=on"},
|
25 |
+
check=True
|
26 |
+
)
|
27 |
+
# Periksa apakah CUDA Toolkit tersedia
|
28 |
+
if not shutil.which("nvcc"):
|
29 |
+
print("CUDA Toolkit tidak ditemukan. Pastikan sudah diinstal.")
|
30 |
+
return
|
31 |
+
|
32 |
+
print("Memasang ulang llama-cpp-python dengan dukungan CUDA...")
|
33 |
+
|
34 |
+
print("llama-cpp-python berhasil diinstal ulang dengan dukungan CUDA.")
|
35 |
+
except subprocess.CalledProcessError as e:
|
36 |
+
print(f"Error saat menginstal ulang llama-cpp-python: {e}")
|
37 |
+
except Exception as e:
|
38 |
+
print(f"Kesalahan umum: {e}")
|
39 |
|
40 |
# Fungsi untuk mengunduh model Llama
|
41 |
def initialize_llama_model():
|
|
|
47 |
)
|
48 |
return model_path
|
49 |
|
50 |
+
# Fungsi untuk mengatur konfigurasi Settings
|
51 |
def initialize_settings(model_path):
|
52 |
+
Settings.llm = Llama(
|
53 |
model_path=model_path,
|
54 |
+
n_gpu_layers=1, # Sesuaikan dengan kebutuhan perangkat Anda
|
55 |
temperature=0.7, # Sesuaikan untuk respons yang lebih cepat
|
56 |
top_p=0.9 # Mengurangi eksplorasi token
|
57 |
)
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
# Fungsi untuk Menginisialisasi Index
|
60 |
def initialize_index():
|
|
|
100 |
|
101 |
def clear_history(chat_engine):
|
102 |
chat_engine.clear()
|
103 |
+
|
104 |
# Inisialisasi Komponen Gradio untuk UI
|
105 |
def launch_gradio(chat_engine):
|
106 |
with gr.Blocks() as demo:
|
|
|
116 |
|
117 |
# Fungsi Utama untuk Menjalankan Aplikasi
|
118 |
def main():
|
119 |
+
install_llama_with_cuda()
|
120 |
# Unduh model dan inisialisasi pengaturan
|
121 |
model_path = initialize_llama_model()
|
122 |
initialize_settings(model_path) # Mengirimkan model_path ke fungsi initialize_settings
|
|
|
125 |
chat_engine = initialize_chat_engine(index)
|
126 |
# Luncurkan antarmuka
|
127 |
launch_gradio(chat_engine)
|
128 |
+
|
129 |
if __name__ == "__main__":
|
130 |
main()
|