Upload folder using huggingface_hub

Files changed (8) hide show

README.md ADDED Viewed

+---
+language: zh
+license: mit
+tags:
+- bert
+- sentiment-analysis
+- chinese
+datasets:
+- custom
+---
+# 中文情感分析模型
+這是一個基於 BERT 的中文情感分析模型，可用於判斷文本的情感傾向（正面、負面或中性）。
+## 模型描述
+- 模型基於 bert-base-chinese 微調
+- 適用於中文文本的情感分析
+- 輸出標籤：0（負面），1（正面），2（中性）
+- 使用 Focal Loss 訓練，以處理類別不平衡問題
+## 使用方法
+```python
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+import torch
+# 載入模型和分詞器
+model = AutoModelForSequenceClassification.from_pretrained("jackietung/bert-base-chinese-sentiment-finetuned")
+tokenizer = AutoTokenizer.from_pretrained("jackietung/bert-base-chinese-sentiment-finetuned")
+# 準備輸入
+text = "這個App使用體驗很差！"
+inputs = tokenizer(text, return_tensors="pt")
+# 進行預測
+with torch.no_grad():
+    outputs = model(**inputs)
+    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
+    # 獲取預測結果
+    label_names = ["負面", "正面", "中性"]
+    predicted_class = torch.argmax(predictions, dim=1).item()
+    print(f"預測類別: {label_names[predicted_class]}")
+    print(f"預測分數: {predictions[0][predicted_class].item():.4f}")
+    # 顯示所有類別的分數
+    for i, label in enumerate(label_names):
+        print(f"{label} 分數: {predictions[0][i].item():.4f}")

config.json ADDED Viewed

+{
+  "_name_or_path": "bert-base-chinese",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "\u8ca0\u9762",
+    "1": "\u6b63\u9762",
+    "2": "\u4e2d\u6027"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "\u4e2d\u6027": 2,
+    "\u6b63\u9762": 1,
+    "\u8ca0\u9762": 0
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.3",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 21128
+}

examples.py ADDED Viewed

+from transformers import pipeline
+# 載入情感分析管道
+classifier = pipeline(
+    "sentiment-analysis",
+    model="jackietung/bert-base-chinese-sentiment-finetuned",
+    return_all_scores=True
+)
+# 測試文本
+texts = [
+    "這款 App 的界面設計非常直觀，使用起來很順暢！",
+    "客服回應速度太慢，問題遲遲得不到解決，很失望。",
+    "功能還算齊全，但偶爾會閃退，希望能改進。",
+    "雖然有些小bug，但整體來說是個實用的工具App。",
+    "完全不推薦下載，廣告太多而且耗電量驚人。"
+]
+# 進行預測
+for text in texts:
+    result = classifier(text)[0]
+    print(f"文本: {text}")
+    # 按分數排序
+    sorted_scores = sorted(result, key=lambda x: x['score'], reverse=True)
+    # 獲取最高分數的情感
+    top_sentiment = sorted_scores[0]
+    print(f"預測情感: {top_sentiment['label']} (分數: {top_sentiment['score']:.4f})")
+    # 顯示所有情感分數
+    print("所有情感分數:")
+    for score_item in sorted_scores:
+        print(f"  {score_item['label']}: {score_item['score']:.4f}")
+    print("-" * 50)

metadata.json ADDED Viewed

File without changes

model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ead3f78962d41b506f182b3fdc6ca023f9d72b3367d789b677014c198cf16b9
+size 409103316

special_tokens_map.json ADDED Viewed

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer_config.json ADDED Viewed

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": false,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff