File size: 6,787 Bytes
f24cd0e
 
 
 
 
 
 
 
e7a54b6
dbae4d1
e7a54b6
 
 
 
f24cd0e
7a790c8
f24cd0e
 
 
 
 
 
 
 
 
 
 
 
 
 
622c74d
f24cd0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a69dde2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import json
import random
import pickle
import numpy as np
import re
from flask import Flask, request, jsonify
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
# import os

# os.environ['HF_HOME'] = '/tmp/huggingface'
# os.environ['TRANSFORMERS_CACHE'] = '/tmp/huggingface/transformers'
# os.environ['HF_DATASETS_CACHE'] = '/tmp/huggingface/datasets'
# os.environ['HF_METRICS_CACHE'] = '/tmp/huggingface/metrics'


class ImprovedBPJSChatbot:
    def __init__(self):
        self.load_models()
        self.load_intents()
        
    def load_models(self):
        """Load semua model yang diperlukan"""
        print("Memuat model dan konfigurasi...")
        
        # Load konfigurasi
        with open('model_config.pkl', 'rb') as f:
            config = pickle.load(f)
        
        # Load sentence transformer
        self.st_model = SentenceTransformer("Dyna-99/local-st-model")
        self.preprocessing_enabled = config['preprocessing_enabled']
        
        # Load classifier
        with open('svm_model.pkl', 'rb') as f:
            self.clf = pickle.load(f)
        
        # Load label encoder
        with open('label_encoder.pkl', 'rb') as f:
            self.label_encoder = pickle.load(f)
            
        print("Semua model berhasil dimuat!")
    
    def load_intents(self):
        """Load data intents untuk responses"""
        with open('intents.json', 'r', encoding='utf-8') as f:
            self.intents_data = json.load(f)
        
        self.tag_responses = {intent['tag']: intent['responses'] for intent in self.intents_data['intents']}
        
        # Buat embeddings untuk semua patterns (untuk similarity fallback)
        self.pattern_embeddings = []
        self.pattern_tags = []
        
        for intent in self.intents_data['intents']:
            for pattern in intent['patterns']:
                processed_pattern = self.preprocess_text(pattern) if self.preprocessing_enabled else pattern
                embedding = self.st_model.encode(processed_pattern)
                self.pattern_embeddings.append(embedding)
                self.pattern_tags.append(intent['tag'])
                
        self.pattern_embeddings = np.array(self.pattern_embeddings)
    
    def preprocess_text(self, text):
        """Preprocessing teks yang sama dengan training"""
        text = text.lower()
        
        # Normalisasi singkatan
        text = re.sub(r'\bjkk\b', 'jaminan kecelakaan kerja', text)
        text = re.sub(r'\bjkm\b', 'jaminan kematian', text)
        text = re.sub(r'\bjht\b', 'jaminan hari tua', text)
        text = re.sub(r'\bjp\b', 'jaminan pensiun', text)
        text = re.sub(r'\bbpjs\b', 'bpjs ketenagakerjaan', text)
        
        # Hapus karakter khusus
        text = re.sub(r'[^\w\s]', ' ', text)
        text = re.sub(r'\s+', ' ', text).strip()
        
        return text
    
    def get_prediction_confidence(self, msg_embedding):
        """Dapatkan prediksi dengan confidence score"""
        # Prediksi probabilitas
        probabilities = self.clf.predict_proba(msg_embedding)[0]
        max_prob = np.max(probabilities)
        predicted_class = np.argmax(probabilities)
        predicted_tag = self.label_encoder.inverse_transform([predicted_class])[0]
        
        return predicted_tag, max_prob
    
    def similarity_fallback(self, msg_embedding, threshold=0.7):
        """Fallback menggunakan cosine similarity"""
        similarities = cosine_similarity(msg_embedding, self.pattern_embeddings)[0]
        max_similarity_idx = np.argmax(similarities)
        max_similarity = similarities[max_similarity_idx]
        
        if max_similarity >= threshold:
            return self.pattern_tags[max_similarity_idx], max_similarity
        
        return 'fallback', max_similarity
    
    def get_contextual_response(self, tag, user_message):
        """Pilih response yang paling kontekstual"""
        responses = self.tag_responses.get(tag, self.tag_responses['fallback'])
        
        # Jika hanya ada satu response, return langsung
        if len(responses) == 1:
            return responses[0]
        
        # Pilih response berdasarkan kata kunci dalam pesan user
        user_words = set(user_message.lower().split())
        
        best_response = responses[0]
        best_score = 0
        
        for response in responses:
            response_words = set(response.lower().split())
            # Hitung kesamaan kata
            common_words = user_words.intersection(response_words)
            score = len(common_words)
            
            if score > best_score:
                best_score = score
                best_response = response
        
        # Jika tidak ada yang cocok, pilih random
        if best_score == 0:
            return random.choice(responses)
        
        return best_response
    
    def generate_response(self, message):
        """Generate response dengan multiple strategies"""
        if not message.strip():
            return "Tolong kirim sebuah pesan."
        
        # Preprocessing
        processed_msg = self.preprocess_text(message) if self.preprocessing_enabled else message
        msg_embedding = self.st_model.encode(processed_msg).reshape(1, -1)
        
        # Strategy 1: SVM prediction dengan confidence
        predicted_tag, confidence = self.get_prediction_confidence(msg_embedding)
        
        # Strategy 2: Similarity fallback jika confidence rendah
        if confidence < 0.6:  # Threshold bisa di-adjust
            fallback_tag, similarity = self.similarity_fallback(msg_embedding)
            if similarity > confidence:
                predicted_tag = fallback_tag
        
        # Strategy 3: Contextual response selection
        response = self.get_contextual_response(predicted_tag, message)
        
        # Logging untuk debugging
        print(f"Input: {message}")
        print(f"Processed: {processed_msg}")
        print(f"Predicted tag: {predicted_tag} (confidence: {confidence:.3f})")
        
        return response

# Inisialisasi chatbot
chatbot = ImprovedBPJSChatbot()

# Flask app
app = Flask(__name__)

@app.route('/chat', methods=['POST'])
def chat():
    try:
        msg = request.json.get("message", "").strip()
        response = chatbot.generate_response(msg)
        return jsonify({"reply": response})
    except Exception as e:
        print(f"Error: {e}")
        return jsonify({"reply": "Maaf, terjadi kesalahan sistem. Silakan coba lagi."})

@app.route('/health', methods=['GET'])
def health():
    return jsonify({"status": "healthy", "model": "BPJS Chatbot Improved"})

if __name__ == '__main__':
    app.run(host='0.0.0.0',port=7860, debug=False) #ganti dari 5000 ke 7860