Spaces:

liloge
/

Group7

Sleeping

App Files Files Community

loge-dot commited on Mar 3

Commit

d88ff3b

1 Parent(s): 06f70ac

change1

Browse files

Files changed (13) hide show

app.py +13 -9
components/__pycache__/__init__.cpython-313.pyc +0 -0
components/__pycache__/audio_player.cpython-313.pyc +0 -0
components/__pycache__/debug_tools.cpython-313.pyc +0 -0
components/__pycache__/visualizations.cpython-313.pyc +0 -0
pages/__pycache__/__init__.cpython-313.pyc +0 -0
pages/__pycache__/chatbot.cpython-313.pyc +0 -0
pages/__pycache__/emotion_analyzer.cpython-313.pyc +0 -0
pages/chatbot.py +1 -1
utils/__pycache__/__init__.cpython-313.pyc +0 -0
utils/__pycache__/audio_processing.cpython-313.pyc +0 -0
utils/__pycache__/model_inference.cpython-313.pyc +0 -0
utils/model_inference.py +75 -23

app.py CHANGED Viewed

@@ -1,12 +1,21 @@
 import streamlit as st
 import os
-import sys
 from pathlib import Path
 # 确保能找到项目模块
 sys.path.append(str(Path(__file__).parent))
-from pages import emotion_analyzer, chatbot  # 导入情绪分析页面和 Chatbot 页面
 def main():
     st.set_page_config(
@@ -18,13 +27,8 @@ def main():
     st.title("Audio Emotion Recognition System")
     st.write("This is a web application for audio emotion recognition.")
-    # 选择页面
-    page = st.sidebar.selectbox("Select a page", ["Emotion Analyzer", "Chatbot"])
-    if page == "Emotion Analyzer":
-        emotion_analyzer.show()
-    elif page == "Chatbot":
-        chatbot.show_chatbot()
 if __name__ == "__main__":
     main()

+import asyncio
+import sys
 import streamlit as st
 import os
 from pathlib import Path
+try:
+    asyncio.get_running_loop()
+except RuntimeError:
+    asyncio.set_event_loop(asyncio.new_event_loop())
+sys.path.append(str(Path(__file__).parent))
 # 确保能找到项目模块
 sys.path.append(str(Path(__file__).parent))
+from pages import emotion_analyzer # 导入情绪分析页面和 Chatbot 页面
 def main():
     st.set_page_config(
     st.title("Audio Emotion Recognition System")
     st.write("This is a web application for audio emotion recognition.")
+    # 先只测试情绪分析页面
+    emotion_analyzer.show()
 if __name__ == "__main__":
     main()

components/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary files a/components/__pycache__/__init__.cpython-313.pyc and b/components/__pycache__/__init__.cpython-313.pyc differ

components/__pycache__/audio_player.cpython-313.pyc CHANGED Viewed

Binary files a/components/__pycache__/audio_player.cpython-313.pyc and b/components/__pycache__/audio_player.cpython-313.pyc differ

components/__pycache__/debug_tools.cpython-313.pyc CHANGED Viewed

Binary files a/components/__pycache__/debug_tools.cpython-313.pyc and b/components/__pycache__/debug_tools.cpython-313.pyc differ

components/__pycache__/visualizations.cpython-313.pyc CHANGED Viewed

Binary files a/components/__pycache__/visualizations.cpython-313.pyc and b/components/__pycache__/visualizations.cpython-313.pyc differ

pages/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary files a/pages/__pycache__/__init__.cpython-313.pyc and b/pages/__pycache__/__init__.cpython-313.pyc differ

pages/__pycache__/chatbot.cpython-313.pyc CHANGED Viewed

Binary files a/pages/__pycache__/chatbot.cpython-313.pyc and b/pages/__pycache__/chatbot.cpython-313.pyc differ

pages/__pycache__/emotion_analyzer.cpython-313.pyc CHANGED Viewed

Binary files a/pages/__pycache__/emotion_analyzer.cpython-313.pyc and b/pages/__pycache__/emotion_analyzer.cpython-313.pyc differ

pages/chatbot.py CHANGED Viewed

@@ -12,7 +12,7 @@ from utils import model_inference
 import os
 # 加载环境变量
-load_dotenv(".env")
 api_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
 api_key = os.getenv("AZURE_OPENAI_API_KEY")
 api_version = os.getenv("AZURE_OPENAI_API_VERSION")

 import os
 # 加载环境变量
+load_dotenv("Group7/.env")
 api_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
 api_key = os.getenv("AZURE_OPENAI_API_KEY")
 api_version = os.getenv("AZURE_OPENAI_API_VERSION")

utils/__pycache__/__init__.cpython-313.pyc CHANGED Viewed

Binary files a/utils/__pycache__/__init__.cpython-313.pyc and b/utils/__pycache__/__init__.cpython-313.pyc differ

utils/__pycache__/audio_processing.cpython-313.pyc CHANGED Viewed

Binary files a/utils/__pycache__/audio_processing.cpython-313.pyc and b/utils/__pycache__/audio_processing.cpython-313.pyc differ

utils/__pycache__/model_inference.cpython-313.pyc CHANGED Viewed

Binary files a/utils/__pycache__/model_inference.cpython-313.pyc and b/utils/__pycache__/model_inference.cpython-313.pyc differ

utils/model_inference.py CHANGED Viewed

@@ -4,52 +4,104 @@ import os
 from transformers import AutoTokenizer, BertModel, Wav2Vec2Model
 from utils.audio_processing import AudioProcessor
 import torchaudio
 import torch.nn.functional as F
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
 # 下载模型
-model_path = hf_hub_download(repo_id="liloge/Group7_model_test", filename="model.safetensors")
-class MultimodalClassifier(torch.nn.Module):
-    def __init__(self):
-        super(MultimodalClassifier, self).__init__()
-        self.bert = BertModel.from_pretrained("bert-base-uncased")
-        self.wav2vec2 = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")
-        self.classifier = torch.nn.Sequential(
-            torch.nn.Linear(self.bert.config.hidden_size + self.wav2vec2.config.hidden_size, 256),
-            torch.nn.ReLU(),
-            torch.nn.Dropout(0.7),
-            torch.nn.Linear(256, 7)  # 7分类任务
         )
     def forward(self, text_input, audio_input):
         text_outputs = self.bert(**text_input, output_hidden_states=True)
-        text_features = text_outputs.hidden_states[-1][:, 0, :]  # [CLS] token
         audio_outputs = self.wav2vec2(audio_input, output_hidden_states=True)
         audio_features = audio_outputs.hidden_states[-1][:, 0, :]
         combined_features = torch.cat((text_features, audio_features), dim=-1)
         logits = self.classifier(combined_features)
         return logits
-# 加载模型
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model = MultimodalClassifier().to(device)
-# 加载 SafeTensors 权重
-state_dict = load_file(model_path)
-print("state_dict:", state_dict)
-model.load_state_dict(state_dict)
-model.eval()  # 设置为评估模式
 tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
 def preprocess_text(text):
-    return tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128).to(device)
 def preprocess_audio(audio_path):
     waveform, sample_rate = torchaudio.load(audio_path)
     waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
     return waveform.to(device)
 labels = ["Neutral", "Happy", "Sad", "Angry", "Fearful", "Disgusted", "Surprised"]
@@ -59,7 +111,7 @@ def predict_emotion(text, audio):
     audio_inputs = preprocess_audio(audio)
     with torch.no_grad():
-        output = model(audio_inputs,text_inputs)  # (1, 7) logits
         probabilities = F.softmax(output, dim=1).squeeze().tolist()  # 归一化为概率
     return {labels[i]: f"{probabilities[i]*100:.2f}%" for i in range(len(labels))}
@@ -87,4 +139,4 @@ def save_history(audio_file, transcript, emotions, probabilities):
     })
     with open(history_file, 'w') as f:
-        json.dump(history, f, indent=4)

 from transformers import AutoTokenizer, BertModel, Wav2Vec2Model
 from utils.audio_processing import AudioProcessor
 import torchaudio
+import torch.nn as nn
 import torch.nn.functional as F
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
+from transformers import AutoModelForSequenceClassification, AutoConfig, Wav2Vec2ForPreTraining
 # 下载模型
+# huggingface_hub 仓库下载
+# model_path = hf_hub_download(repo_id="liloge/Group7_model_test", filename="model.safetensors")
+# 本地下载
+import torch
+import torch.nn as nn
+from transformers import AutoModelForSequenceClassification, AutoConfig, Wav2Vec2ForPreTraining
+class MultimodalClassifier(nn.Module):
+    def __init__(self, bert_ckpt_path, wav2vec2_config_path, wav2vec2_safetensors_path):
+        super().__init__()
+        # **加载微调后的 BERT**
+        self.bert = AutoModelForSequenceClassification.from_pretrained(
+            "bert-base-uncased", num_labels=7
+        )
+        self.bert.classifier = nn.Sequential(
+            nn.Dropout(0.5),
+            nn.Linear(self.bert.config.hidden_size, self.bert.config.num_labels)
+        )
+        try:
+            self.bert.load_state_dict(torch.load(bert_ckpt_path, map_location=torch.device("cpu")), strict=True)
+        except Exception as e:
+            print(f"❌ 加载 `{bert_ckpt_path}` 失败: {e}")
+        # **先加载 Wav2Vec2**
+        config = AutoConfig.from_pretrained(wav2vec2_config_path, num_labels=7)
+        self.wav2vec2 = Wav2Vec2ForPreTraining.from_pretrained("facebook/wav2vec2-base", config=config)
+        # **再修改 Wav2Vec2 的分类头**
+        self.wav2vec2.classifier = nn.Sequential(
+            nn.Dropout(0.5),
+            nn.Linear(self.wav2vec2.config.hidden_size, self.wav2vec2.config.num_labels)
+        )
+        # **加载 safetensors 权重**
+        from safetensors.torch import load_file
+        state_dict = load_file(wav2vec2_safetensors_path)
+        try:
+            self.wav2vec2.load_state_dict(state_dict, strict=False)
+        except Exception as e:
+            print(f"❌ 加载 `{wav2vec2_safetensors_path}` 失败: {e}")
+        # **拼接特征的分类头**
+        self.classifier = nn.Sequential(
+            nn.Linear(self.bert.config.hidden_size + self.wav2vec2.config.hidden_size, 256),
+            nn.ReLU(),
+            nn.Dropout(0.7),
+            nn.Linear(256, 7)  # 7分类任务
         )
     def forward(self, text_input, audio_input):
+        # **文本特征**
         text_outputs = self.bert(**text_input, output_hidden_states=True)
+        text_features = text_outputs.hidden_states[-1][:, 0, :]
+        # **音频特征**
         audio_outputs = self.wav2vec2(audio_input, output_hidden_states=True)
         audio_features = audio_outputs.hidden_states[-1][:, 0, :]
+        # **拼接特征**
         combined_features = torch.cat((text_features, audio_features), dim=-1)
+        # **分类**
         logits = self.classifier(combined_features)
         return logits
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# **定义路径**
+bert_ckpt_path = "bert_meld_finetune_model.pth"
+wav2vec2_config_path = "config.json"
+wav2vec2_safetensors_path = "wav2vec2.safetensors"
+# **加载模型**
+model = MultimodalClassifier(bert_ckpt_path, wav2vec2_config_path, wav2vec2_safetensors_path).to(device)
+model.eval()
+print("✅ 微调的 BERT + Wav2Vec2 模型加载成功！")
 tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
 def preprocess_text(text):
+    text_inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
+    print(text_inputs)
+    return text_inputs.to(device)
 def preprocess_audio(audio_path):
     waveform, sample_rate = torchaudio.load(audio_path)
     waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
+    print(waveform)
     return waveform.to(device)
 labels = ["Neutral", "Happy", "Sad", "Angry", "Fearful", "Disgusted", "Surprised"]
     audio_inputs = preprocess_audio(audio)
     with torch.no_grad():
+        output = model(text_input=text_inputs, audio_input=audio_inputs)  # (1, 7) logits
         probabilities = F.softmax(output, dim=1).squeeze().tolist()  # 归一化为概率
     return {labels[i]: f"{probabilities[i]*100:.2f}%" for i in range(len(labels))}
     })
     with open(history_file, 'w') as f:
+        json.dump(history, f, indent=4)