{ "cells": [ { "cell_type": "markdown", "id": "988dc733", "metadata": { "papermill": { "duration": 0.007087, "end_time": "2024-06-07T07:59:47.120323", "exception": false, "start_time": "2024-06-07T07:59:47.113236", "status": "completed" }, "tags": [] }, "source": [ "## 导入库" ] }, { "cell_type": "code", "execution_count": 1, "id": "e45a4da1", "metadata": { "execution": { "iopub.execute_input": "2024-06-07T07:59:47.134597Z", "iopub.status.busy": "2024-06-07T07:59:47.134196Z", "iopub.status.idle": "2024-06-07T07:59:49.742368Z", "shell.execute_reply": "2024-06-07T07:59:49.741337Z" }, "papermill": { "duration": 2.618114, "end_time": "2024-06-07T07:59:49.744905", "exception": false, "start_time": "2024-06-07T07:59:47.126791", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "import re\n", "import nltk\n", "from nltk.corpus.reader.tagged import ToktokTokenizer\n", "from bs4 import BeautifulSoup\n", "import pandas as pd\n", "import joblib\n", "\n", "from sklearn.feature_extraction.text import CountVectorizer\n", "from sklearn.preprocessing import LabelBinarizer\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score, classification_report\n", "from sklearn.linear_model import LogisticRegression" ] }, { "cell_type": "markdown", "id": "521ea494", "metadata": { "papermill": { "duration": 0.006048, "end_time": "2024-06-07T07:59:49.757454", "exception": false, "start_time": "2024-06-07T07:59:49.751406", "status": "completed" }, "tags": [] }, "source": [ "## 读取数据" ] }, { "cell_type": "code", "execution_count": 2, "id": "cf510c24", "metadata": { "execution": { "iopub.execute_input": "2024-06-07T07:59:49.771974Z", "iopub.status.busy": "2024-06-07T07:59:49.770991Z", "iopub.status.idle": "2024-06-07T07:59:52.074581Z", "shell.execute_reply": "2024-06-07T07:59:52.073532Z" }, "papermill": { "duration": 2.313203, "end_time": "2024-06-07T07:59:52.076814", "exception": false, "start_time": "2024-06-07T07:59:49.763611", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", " | Unnamed: 0 | \n", "text | \n", "Emotion | \n", "
---|---|---|---|
0 | \n", "0 | \n", "i seriously hate one subject to death but now ... | \n", "hate | \n", "
1 | \n", "1 | \n", "im so full of life i feel appalled | \n", "neutral | \n", "
2 | \n", "2 | \n", "i sit here to write i start to dig out my feel... | \n", "neutral | \n", "
3 | \n", "3 | \n", "ive been really angry with r and i feel like a... | \n", "anger | \n", "
4 | \n", "4 | \n", "i feel suspicious if there is no one outside l... | \n", "neutral | \n", "