{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "988dc733",
   "metadata": {
    "papermill": {
     "duration": 0.007087,
     "end_time": "2024-06-07T07:59:47.120323",
     "exception": false,
     "start_time": "2024-06-07T07:59:47.113236",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "## 导入库"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e45a4da1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-07T07:59:47.134597Z",
     "iopub.status.busy": "2024-06-07T07:59:47.134196Z",
     "iopub.status.idle": "2024-06-07T07:59:49.742368Z",
     "shell.execute_reply": "2024-06-07T07:59:49.741337Z"
    },
    "papermill": {
     "duration": 2.618114,
     "end_time": "2024-06-07T07:59:49.744905",
     "exception": false,
     "start_time": "2024-06-07T07:59:47.126791",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import re\n",
    "import nltk\n",
    "from nltk.corpus.reader.tagged import ToktokTokenizer\n",
    "from bs4 import BeautifulSoup\n",
    "import pandas as pd\n",
    "import joblib\n",
    "\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "from sklearn.preprocessing import LabelBinarizer\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score, classification_report\n",
    "from sklearn.linear_model import LogisticRegression"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "521ea494",
   "metadata": {
    "papermill": {
     "duration": 0.006048,
     "end_time": "2024-06-07T07:59:49.757454",
     "exception": false,
     "start_time": "2024-06-07T07:59:49.751406",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "## 读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "cf510c24",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-07T07:59:49.771974Z",
     "iopub.status.busy": "2024-06-07T07:59:49.770991Z",
     "iopub.status.idle": "2024-06-07T07:59:52.074581Z",
     "shell.execute_reply": "2024-06-07T07:59:52.073532Z"
    },
    "papermill": {
     "duration": 2.313203,
     "end_time": "2024-06-07T07:59:52.076814",
     "exception": false,
     "start_time": "2024-06-07T07:59:49.763611",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>text</th>\n",
       "      <th>Emotion</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>i seriously hate one subject to death but now ...</td>\n",
       "      <td>hate</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>im so full of life i feel appalled</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>i sit here to write i start to dig out my feel...</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>ive been really angry with r and i feel like a...</td>\n",
       "      <td>anger</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>i feel suspicious if there is no one outside l...</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0                                               text  Emotion\n",
       "0           0  i seriously hate one subject to death but now ...     hate\n",
       "1           1                 im so full of life i feel appalled  neutral\n",
       "2           2  i sit here to write i start to dig out my feel...  neutral\n",
       "3           3  ive been really angry with r and i feel like a...    anger\n",
       "4           4  i feel suspicious if there is no one outside l...  neutral"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('/kaggle/input/emotion-analysis-based-on-text/emotion_sentimen_dataset.csv', encoding='utf-8')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cdad6e1a",
   "metadata": {
    "papermill": {
     "duration": 0.006328,
     "end_time": "2024-06-07T07:59:52.089627",
     "exception": false,
     "start_time": "2024-06-07T07:59:52.083299",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "## 数据预处理"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "980ecdc4",
   "metadata": {
    "papermill": {
     "duration": 0.006119,
     "end_time": "2024-06-07T07:59:52.102058",
     "exception": false,
     "start_time": "2024-06-07T07:59:52.095939",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "### 去掉'Unnamed:0'列"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b6b98b5e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-07T07:59:52.116308Z",
     "iopub.status.busy": "2024-06-07T07:59:52.115936Z",
     "iopub.status.idle": "2024-06-07T07:59:52.125497Z",
     "shell.execute_reply": "2024-06-07T07:59:52.124514Z"
    },
    "papermill": {
     "duration": 0.019378,
     "end_time": "2024-06-07T07:59:52.127784",
     "exception": false,
     "start_time": "2024-06-07T07:59:52.108406",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'i seriously hate one subject to death but now i feel reluctant to drop it'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "if 'Unnamed: 0' in df.columns:\n",
    "    del df['Unnamed: 0']\n",
    "df.loc[0]['text']"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "13ba4d1b",
   "metadata": {
    "papermill": {
     "duration": 0.006843,
     "end_time": "2024-06-07T07:59:52.141633",
     "exception": false,
     "start_time": "2024-06-07T07:59:52.134790",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "### 检查缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "aaf443cd",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-07T07:59:52.156361Z",
     "iopub.status.busy": "2024-06-07T07:59:52.155702Z",
     "iopub.status.idle": "2024-06-07T07:59:52.260417Z",
     "shell.execute_reply": "2024-06-07T07:59:52.259372Z"
    },
    "papermill": {
     "duration": 0.114358,
     "end_time": "2024-06-07T07:59:52.262476",
     "exception": false,
     "start_time": "2024-06-07T07:59:52.148118",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "text       False\n",
       "Emotion    False\n",
       "dtype: bool"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.isnull().any()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d210bd7d",
   "metadata": {
    "papermill": {
     "duration": 0.006379,
     "end_time": "2024-06-07T07:59:52.275724",
     "exception": false,
     "start_time": "2024-06-07T07:59:52.269345",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "没有缺失值"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b020d1da",
   "metadata": {
    "papermill": {
     "duration": 0.006849,
     "end_time": "2024-06-07T07:59:52.289204",
     "exception": false,
     "start_time": "2024-06-07T07:59:52.282355",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "### 统计标签个数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "beeb611f",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-07T07:59:52.304207Z",
     "iopub.status.busy": "2024-06-07T07:59:52.303413Z",
     "iopub.status.idle": "2024-06-07T07:59:52.372271Z",
     "shell.execute_reply": "2024-06-07T07:59:52.371311Z"
    },
    "papermill": {
     "duration": 0.078711,
     "end_time": "2024-06-07T07:59:52.374395",
     "exception": false,
     "start_time": "2024-06-07T07:59:52.295684",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Emotion\n",
       "neutral       674538\n",
       "love           39553\n",
       "happiness      27175\n",
       "sadness        17481\n",
       "relief         16729\n",
       "hate           15267\n",
       "anger          12336\n",
       "fun            10075\n",
       "enthusiasm      9304\n",
       "surprise        6954\n",
       "empty           5542\n",
       "worry           4475\n",
       "boredom          126\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Emotion'].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c4325963",
   "metadata": {
    "papermill": {
     "duration": 0.006512,
     "end_time": "2024-06-07T07:59:52.387868",
     "exception": false,
     "start_time": "2024-06-07T07:59:52.381356",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "neutral情绪居多"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "42a255ea",
   "metadata": {
    "papermill": {
     "duration": 0.00651,
     "end_time": "2024-06-07T07:59:52.401107",
     "exception": false,
     "start_time": "2024-06-07T07:59:52.394597",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "### 过滤HTML"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "f6340f4d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-07T07:59:52.416407Z",
     "iopub.status.busy": "2024-06-07T07:59:52.415653Z",
     "iopub.status.idle": "2024-06-07T07:59:52.420831Z",
     "shell.execute_reply": "2024-06-07T07:59:52.419846Z"
    },
    "papermill": {
     "duration": 0.014951,
     "end_time": "2024-06-07T07:59:52.422798",
     "exception": false,
     "start_time": "2024-06-07T07:59:52.407847",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "def noiseremovel_text(text):\n",
    "    soup = BeautifulSoup(text, \"html.parser\")\n",
    "    text = soup.get_text()\n",
    "    text = re.sub(r'\\[[^]]*\\]', '', text)    \n",
    "    return text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "324c0fd5",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-07T07:59:52.439372Z",
     "iopub.status.busy": "2024-06-07T07:59:52.438551Z",
     "iopub.status.idle": "2024-06-07T07:59:52.445213Z",
     "shell.execute_reply": "2024-06-07T07:59:52.444197Z"
    },
    "papermill": {
     "duration": 0.016475,
     "end_time": "2024-06-07T07:59:52.447222",
     "exception": false,
     "start_time": "2024-06-07T07:59:52.430747",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'I really enjoyed the latest episode of my favorite show!  Check out this link for a recap. #bestshowever '"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sample_text = '<div>I really enjoyed the latest episode of my favorite show! [https://t.co/xyz123] Check out this link for a recap. #bestshowever [Ad: Stream now on MyStreamingService for 50% off!]</div>'\n",
    "trans_sample_text = noiseremovel_text(sample_text)\n",
    "trans_sample_text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "532f7045",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-07T07:59:52.462719Z",
     "iopub.status.busy": "2024-06-07T07:59:52.461914Z",
     "iopub.status.idle": "2024-06-07T08:00:34.124522Z",
     "shell.execute_reply": "2024-06-07T08:00:34.123423Z"
    },
    "papermill": {
     "duration": 41.673227,
     "end_time": "2024-06-07T08:00:34.127258",
     "exception": false,
     "start_time": "2024-06-07T07:59:52.454031",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "df['text'] = df['text'].apply(noiseremovel_text)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c3b24680",
   "metadata": {
    "papermill": {
     "duration": 0.006637,
     "end_time": "2024-06-07T08:00:34.141272",
     "exception": false,
     "start_time": "2024-06-07T08:00:34.134635",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "### 移除stopwords"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "e48353c3",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-07T08:00:34.157003Z",
     "iopub.status.busy": "2024-06-07T08:00:34.156235Z",
     "iopub.status.idle": "2024-06-07T08:00:34.382132Z",
     "shell.execute_reply": "2024-06-07T08:00:34.381037Z"
    },
    "papermill": {
     "duration": 0.23647,
     "end_time": "2024-06-07T08:00:34.384580",
     "exception": false,
     "start_time": "2024-06-07T08:00:34.148110",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading package stopwords to /usr/share/nltk_data...\n",
      "[nltk_data]   Package stopwords is already up-to-date!\n"
     ]
    }
   ],
   "source": [
    "nltk.download('stopwords')\n",
    "stopwords = nltk.corpus.stopwords.words('english')\n",
    "stop_wr = set(stopwords)\n",
    "\n",
    "def remove_stopwords(text, stop_words):\n",
    "   tokenizers = ToktokTokenizer()\n",
    "   #提取单词和缩写\n",
    "   words = re.findall(r'\\w+|\\.\\.+', text)\n",
    "   stop_words = set(stop_words)\n",
    "   filtokens = [i for i in words if i.lower() not in stop_words]\n",
    "   # 连接\n",
    "   filtered_text = ' '.join(filtokens)\n",
    "   return filtered_text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "f328854d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-07T08:00:34.402759Z",
     "iopub.status.busy": "2024-06-07T08:00:34.401319Z",
     "iopub.status.idle": "2024-06-07T08:00:34.411254Z",
     "shell.execute_reply": "2024-06-07T08:00:34.410112Z"
    },
    "papermill": {
     "duration": 0.021777,
     "end_time": "2024-06-07T08:00:34.413600",
     "exception": false,
     "start_time": "2024-06-07T08:00:34.391823",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'seriously hate one subject death feel reluctant drop'"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sample_text = 'i seriously hate one subject to death but now i feel reluctant to drop it'\n",
    "trans_sample_text = remove_stopwords(sample_text, stop_wr)\n",
    "trans_sample_text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "36a11862",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-07T08:00:34.429514Z",
     "iopub.status.busy": "2024-06-07T08:00:34.429146Z",
     "iopub.status.idle": "2024-06-07T08:00:45.852555Z",
     "shell.execute_reply": "2024-06-07T08:00:45.851505Z"
    },
    "papermill": {
     "duration": 11.434069,
     "end_time": "2024-06-07T08:00:45.855050",
     "exception": false,
     "start_time": "2024-06-07T08:00:34.420981",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "df['text'] = df['text'].apply(remove_stopwords, stop_words=stop_wr)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "59754b96",
   "metadata": {
    "papermill": {
     "duration": 0.006971,
     "end_time": "2024-06-07T08:00:45.869210",
     "exception": false,
     "start_time": "2024-06-07T08:00:45.862239",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "## 对情绪标签编码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "0d093d66",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-07T08:00:45.885460Z",
     "iopub.status.busy": "2024-06-07T08:00:45.885088Z",
     "iopub.status.idle": "2024-06-07T08:00:49.595642Z",
     "shell.execute_reply": "2024-06-07T08:00:49.594472Z"
    },
    "papermill": {
     "duration": 3.721209,
     "end_time": "2024-06-07T08:00:49.597909",
     "exception": false,
     "start_time": "2024-06-07T08:00:45.876700",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(839555, 13)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "label_binarizer = LabelBinarizer()\n",
    "\n",
    "emotion_encoded = label_binarizer.fit_transform(df['Emotion'])\n",
    "emotion_encoded.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f39d81be",
   "metadata": {
    "papermill": {
     "duration": 0.00708,
     "end_time": "2024-06-07T08:00:49.612623",
     "exception": false,
     "start_time": "2024-06-07T08:00:49.605543",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "## 文本特征向量化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "67333b0c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-07T08:00:49.629513Z",
     "iopub.status.busy": "2024-06-07T08:00:49.628471Z",
     "iopub.status.idle": "2024-06-07T08:00:58.957626Z",
     "shell.execute_reply": "2024-06-07T08:00:58.956558Z"
    },
    "papermill": {
     "duration": 9.340419,
     "end_time": "2024-06-07T08:00:58.960340",
     "exception": false,
     "start_time": "2024-06-07T08:00:49.619921",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "vectorizer = CountVectorizer()\n",
    "X_bow = vectorizer.fit_transform(df['text'])\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X_bow, df['Emotion'], test_size=0.2, random_state=42)\n",
    "\n",
    "logistic = LogisticRegression(penalty='l2', max_iter=500, C=1, random_state=42)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b6e72f70",
   "metadata": {
    "papermill": {
     "duration": 0.007038,
     "end_time": "2024-06-07T08:00:58.974906",
     "exception": false,
     "start_time": "2024-06-07T08:00:58.967868",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "## 训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "c02112ed",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-07T08:00:58.991472Z",
     "iopub.status.busy": "2024-06-07T08:00:58.991101Z",
     "iopub.status.idle": "2024-06-07T08:04:00.220701Z",
     "shell.execute_reply": "2024-06-07T08:04:00.219324Z"
    },
    "papermill": {
     "duration": 181.241697,
     "end_time": "2024-06-07T08:04:00.224168",
     "exception": false,
     "start_time": "2024-06-07T08:00:58.982471",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "lr_bow = logistic.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a933590f",
   "metadata": {
    "papermill": {
     "duration": 0.011677,
     "end_time": "2024-06-07T08:04:00.248922",
     "exception": false,
     "start_time": "2024-06-07T08:04:00.237245",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "## 保存"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "c7529929",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-07T08:04:00.265919Z",
     "iopub.status.busy": "2024-06-07T08:04:00.264742Z",
     "iopub.status.idle": "2024-06-07T08:04:01.081469Z",
     "shell.execute_reply": "2024-06-07T08:04:01.080435Z"
    },
    "papermill": {
     "duration": 0.82782,
     "end_time": "2024-06-07T08:04:01.084194",
     "exception": false,
     "start_time": "2024-06-07T08:04:00.256374",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "joblib.dump(vectorizer, 'vectorizer.joblib')\n",
    "joblib.dump(lr_bow, 'model.joblib')\n",
    "\n",
    "loaded_vectorizer = joblib.load('vectorizer.joblib')\n",
    "loaded_model = joblib.load('model.joblib')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "57657a5e",
   "metadata": {
    "papermill": {
     "duration": 0.007039,
     "end_time": "2024-06-07T08:04:01.098694",
     "exception": false,
     "start_time": "2024-06-07T08:04:01.091655",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "## 评估"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "688b5e38",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-07T08:04:01.114633Z",
     "iopub.status.busy": "2024-06-07T08:04:01.113895Z",
     "iopub.status.idle": "2024-06-07T08:04:08.550523Z",
     "shell.execute_reply": "2024-06-07T08:04:08.549515Z"
    },
    "papermill": {
     "duration": 7.447225,
     "end_time": "2024-06-07T08:04:08.552950",
     "exception": false,
     "start_time": "2024-06-07T08:04:01.105725",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.9983324499288313\n",
      "Classification Report:\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "       anger       1.00      0.99      0.99      2489\n",
      "     boredom       1.00      0.95      0.98        21\n",
      "       empty       1.00      0.98      0.99      1096\n",
      "  enthusiasm       1.00      0.99      1.00      1839\n",
      "         fun       1.00      0.98      0.99      1977\n",
      "   happiness       1.00      1.00      1.00      5370\n",
      "        hate       1.00      0.99      1.00      3018\n",
      "        love       1.00      1.00      1.00      8001\n",
      "     neutral       1.00      1.00      1.00    134999\n",
      "      relief       1.00      0.98      0.99      3396\n",
      "     sadness       1.00      0.99      0.99      3428\n",
      "    surprise       1.00      0.99      1.00      1372\n",
      "       worry       1.00      0.99      1.00       905\n",
      "\n",
      "    accuracy                           1.00    167911\n",
      "   macro avg       1.00      0.99      0.99    167911\n",
      "weighted avg       1.00      1.00      1.00    167911\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# 评估\n",
    "y_pred = loaded_model.predict(X_test)\n",
    "\n",
    "accuracy = accuracy_score(y_test, y_pred)\n",
    "print(\"Accuracy:\", accuracy)\n",
    "\n",
    "print(\"Classification Report:\")\n",
    "print(classification_report(y_test, y_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ea741f3a",
   "metadata": {
    "papermill": {
     "duration": 0.007129,
     "end_time": "2024-06-07T08:04:08.567812",
     "exception": false,
     "start_time": "2024-06-07T08:04:08.560683",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kaggle": {
   "accelerator": "none",
   "dataSources": [
    {
     "datasetId": 4540583,
     "sourceId": 7763359,
     "sourceType": "datasetVersion"
    }
   ],
   "dockerImageVersionId": 30732,
   "isGpuEnabled": false,
   "isInternetEnabled": true,
   "language": "python",
   "sourceType": "notebook"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 264.700867,
   "end_time": "2024-06-07T08:04:09.296576",
   "environment_variables": {},
   "exception": null,
   "input_path": "__notebook__.ipynb",
   "output_path": "__notebook__.ipynb",
   "parameters": {},
   "start_time": "2024-06-07T07:59:44.595709",
   "version": "2.5.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}