{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Membangun sistem AI yang belajar dari data harga kendaraan bekas dan dapat memprediksi harga masa depan berdasarkan tren depresiasi" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Packages" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2025-05-07T19:49:16.104468Z", "iopub.status.busy": "2025-05-07T19:49:16.103716Z", "iopub.status.idle": "2025-05-07T19:51:00.330501Z", "shell.execute_reply": "2025-05-07T19:51:00.329776Z", "shell.execute_reply.started": "2025-05-07T19:49:16.104435Z" }, "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: transformers in /usr/local/lib/python3.11/dist-packages (4.51.1)\n", "Requirement already satisfied: datasets in /usr/local/lib/python3.11/dist-packages (3.5.0)\n", "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (2.5.1+cu124)\n", "Collecting scikit-learn==1.6.1\n", " Downloading scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)\n", "Requirement already satisfied: numpy>=1.19.5 in /usr/local/lib/python3.11/dist-packages (from scikit-learn==1.6.1) (1.26.4)\n", "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn==1.6.1) (1.15.2)\n", "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn==1.6.1) (1.4.2)\n", "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn==1.6.1) (3.6.0)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from transformers) (3.18.0)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.30.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.30.2)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (24.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from transformers) (6.0.2)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2024.11.6)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from transformers) (2.32.3)\n", "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.21.0)\n", "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.5.2)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/dist-packages (from transformers) (4.67.1)\n", "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (19.0.1)\n", "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.3.8)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from datasets) (2.2.3)\n", "Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from datasets) (3.5.0)\n", "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.70.16)\n", "Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)\n", " Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)\n", "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets) (3.11.16)\n", "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.11/dist-packages (from torch) (4.13.1)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch) (3.4.2)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.6)\n", "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n", "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n", "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n", "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)\n", " Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)\n", " Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)\n", " Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-curand-cu12==10.3.5.147 (from torch)\n", " Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)\n", " Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch)\n", " Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch) (2.21.5)\n", "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n", "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch)\n", " Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.0)\n", "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch) (1.13.1)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n", "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (2.6.1)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.3.2)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (25.3.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.5.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (6.2.0)\n", "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (0.3.1)\n", "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.19.0)\n", "Requirement already satisfied: mkl_fft in /usr/local/lib/python3.11/dist-packages (from numpy>=1.19.5->scikit-learn==1.6.1) (1.3.8)\n", "Requirement already satisfied: mkl_random in /usr/local/lib/python3.11/dist-packages (from numpy>=1.19.5->scikit-learn==1.6.1) (1.2.4)\n", "Requirement already satisfied: mkl_umath in /usr/local/lib/python3.11/dist-packages (from numpy>=1.19.5->scikit-learn==1.6.1) (0.1.1)\n", "Requirement already satisfied: mkl in /usr/local/lib/python3.11/dist-packages (from numpy>=1.19.5->scikit-learn==1.6.1) (2025.1.0)\n", "Requirement already satisfied: tbb4py in /usr/local/lib/python3.11/dist-packages (from numpy>=1.19.5->scikit-learn==1.6.1) (2022.1.0)\n", "Requirement already satisfied: mkl-service in /usr/local/lib/python3.11/dist-packages (from numpy>=1.19.5->scikit-learn==1.6.1) (2.4.1)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.4.1)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.10)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2.3.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2025.1.31)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch) (3.0.2)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2025.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets) (2025.2)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\n", "Requirement already satisfied: intel-openmp<2026,>=2024 in /usr/local/lib/python3.11/dist-packages (from mkl->numpy>=1.19.5->scikit-learn==1.6.1) (2024.2.0)\n", "Requirement already satisfied: tbb==2022.* in /usr/local/lib/python3.11/dist-packages (from mkl->numpy>=1.19.5->scikit-learn==1.6.1) (2022.1.0)\n", "Requirement already satisfied: tcmlib==1.* in /usr/local/lib/python3.11/dist-packages (from tbb==2022.*->mkl->numpy>=1.19.5->scikit-learn==1.6.1) (1.2.0)\n", "Requirement already satisfied: intel-cmplr-lib-rt in /usr/local/lib/python3.11/dist-packages (from mkl_umath->numpy>=1.19.5->scikit-learn==1.6.1) (2024.2.0)\n", "Requirement already satisfied: intel-cmplr-lib-ur==2024.2.0 in /usr/local/lib/python3.11/dist-packages (from intel-openmp<2026,>=2024->mkl->numpy>=1.19.5->scikit-learn==1.6.1) (2024.2.0)\n", "Downloading scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.5/13.5 MB\u001b[0m \u001b[31m89.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m:01\u001b[0m\n", "\u001b[?25hDownloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m0:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m28.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m12.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0mm\n", "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m50.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hDownloading fsspec-2024.12.0-py3-none-any.whl (183 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m183.9/183.9 kB\u001b[0m \u001b[31m10.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cublas-cu12, fsspec, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12, scikit-learn\n", " Attempting uninstall: nvidia-nvjitlink-cu12\n", " Found existing installation: nvidia-nvjitlink-cu12 12.8.93\n", " Uninstalling nvidia-nvjitlink-cu12-12.8.93:\n", " Successfully uninstalled nvidia-nvjitlink-cu12-12.8.93\n", " Attempting uninstall: nvidia-curand-cu12\n", " Found existing installation: nvidia-curand-cu12 10.3.9.90\n", " Uninstalling nvidia-curand-cu12-10.3.9.90:\n", " Successfully uninstalled nvidia-curand-cu12-10.3.9.90\n", " Attempting uninstall: nvidia-cufft-cu12\n", " Found existing installation: nvidia-cufft-cu12 11.3.3.83\n", " Uninstalling nvidia-cufft-cu12-11.3.3.83:\n", " Successfully uninstalled nvidia-cufft-cu12-11.3.3.83\n", " Attempting uninstall: nvidia-cublas-cu12\n", " Found existing installation: nvidia-cublas-cu12 12.8.4.1\n", " Uninstalling nvidia-cublas-cu12-12.8.4.1:\n", " Successfully uninstalled nvidia-cublas-cu12-12.8.4.1\n", " Attempting uninstall: fsspec\n", " Found existing installation: fsspec 2025.3.2\n", " Uninstalling fsspec-2025.3.2:\n", " Successfully uninstalled fsspec-2025.3.2\n", " Attempting uninstall: nvidia-cusparse-cu12\n", " Found existing installation: nvidia-cusparse-cu12 12.5.8.93\n", " Uninstalling nvidia-cusparse-cu12-12.5.8.93:\n", " Successfully uninstalled nvidia-cusparse-cu12-12.5.8.93\n", " Attempting uninstall: nvidia-cudnn-cu12\n", " Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n", " Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n", " Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75\n", " Attempting uninstall: nvidia-cusolver-cu12\n", " Found existing installation: nvidia-cusolver-cu12 11.7.3.90\n", " Uninstalling nvidia-cusolver-cu12-11.7.3.90:\n", " Successfully uninstalled nvidia-cusolver-cu12-11.7.3.90\n", " Attempting uninstall: scikit-learn\n", " Found existing installation: scikit-learn 1.2.2\n", " Uninstalling scikit-learn-1.2.2:\n", " Successfully uninstalled scikit-learn-1.2.2\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "category-encoders 2.7.0 requires scikit-learn<1.6.0,>=1.0.0, but you have scikit-learn 1.6.1 which is incompatible.\n", "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.12.0 which is incompatible.\n", "bigframes 1.36.0 requires rich<14,>=12.4.4, but you have rich 14.0.0 which is incompatible.\n", "pylibcugraph-cu12 24.12.0 requires pylibraft-cu12==24.12.*, but you have pylibraft-cu12 25.2.0 which is incompatible.\n", "pylibcugraph-cu12 24.12.0 requires rmm-cu12==24.12.*, but you have rmm-cu12 25.2.0 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed fsspec-2024.12.0 nvidia-cublas-cu12-12.4.5.8 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127 scikit-learn-1.6.1\n", "Requirement already satisfied: sentence-transformers in /usr/local/lib/python3.11/dist-packages (3.4.1)\n", "Requirement already satisfied: transformers<5.0.0,>=4.41.0 in /usr/local/lib/python3.11/dist-packages (from sentence-transformers) (4.51.1)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from sentence-transformers) (4.67.1)\n", "Requirement already satisfied: torch>=1.11.0 in /usr/local/lib/python3.11/dist-packages (from sentence-transformers) (2.5.1+cu124)\n", "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.11/dist-packages (from sentence-transformers) (1.6.1)\n", "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (from sentence-transformers) (1.15.2)\n", "Requirement already satisfied: huggingface-hub>=0.20.0 in /usr/local/lib/python3.11/dist-packages (from sentence-transformers) (0.30.2)\n", "Requirement already satisfied: Pillow in /usr/local/lib/python3.11/dist-packages (from sentence-transformers) (11.1.0)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.20.0->sentence-transformers) (3.18.0)\n", "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.20.0->sentence-transformers) (2024.12.0)\n", "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.20.0->sentence-transformers) (24.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.20.0->sentence-transformers) (6.0.2)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.20.0->sentence-transformers) (2.32.3)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.20.0->sentence-transformers) (4.13.1)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (3.4.2)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (3.1.6)\n", "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (12.4.127)\n", "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (12.4.127)\n", "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (12.4.127)\n", "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (9.1.0.70)\n", "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (12.4.5.8)\n", "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (11.2.1.3)\n", "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (10.3.5.147)\n", "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (11.6.1.9)\n", "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (12.3.1.170)\n", "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (2.21.5)\n", "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (12.4.127)\n", "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (12.4.127)\n", "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (3.1.0)\n", "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers) (1.13.1)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=1.11.0->sentence-transformers) (1.3.0)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.41.0->sentence-transformers) (1.26.4)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.41.0->sentence-transformers) (2024.11.6)\n", "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.41.0->sentence-transformers) (0.21.0)\n", "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from transformers<5.0.0,>=4.41.0->sentence-transformers) (0.5.2)\n", "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn->sentence-transformers) (1.4.2)\n", "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn->sentence-transformers) (3.6.0)\n", "Requirement already satisfied: mkl_fft in /usr/local/lib/python3.11/dist-packages (from numpy>=1.17->transformers<5.0.0,>=4.41.0->sentence-transformers) (1.3.8)\n", "Requirement already satisfied: mkl_random in /usr/local/lib/python3.11/dist-packages (from numpy>=1.17->transformers<5.0.0,>=4.41.0->sentence-transformers) (1.2.4)\n", "Requirement already satisfied: mkl_umath in /usr/local/lib/python3.11/dist-packages (from numpy>=1.17->transformers<5.0.0,>=4.41.0->sentence-transformers) (0.1.1)\n", "Requirement already satisfied: mkl in /usr/local/lib/python3.11/dist-packages (from numpy>=1.17->transformers<5.0.0,>=4.41.0->sentence-transformers) (2025.1.0)\n", "Requirement already satisfied: tbb4py in /usr/local/lib/python3.11/dist-packages (from numpy>=1.17->transformers<5.0.0,>=4.41.0->sentence-transformers) (2022.1.0)\n", "Requirement already satisfied: mkl-service in /usr/local/lib/python3.11/dist-packages (from numpy>=1.17->transformers<5.0.0,>=4.41.0->sentence-transformers) (2.4.1)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=1.11.0->sentence-transformers) (3.0.2)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub>=0.20.0->sentence-transformers) (3.4.1)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub>=0.20.0->sentence-transformers) (3.10)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub>=0.20.0->sentence-transformers) (2.3.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub>=0.20.0->sentence-transformers) (2025.1.31)\n", "Requirement already satisfied: intel-openmp<2026,>=2024 in /usr/local/lib/python3.11/dist-packages (from mkl->numpy>=1.17->transformers<5.0.0,>=4.41.0->sentence-transformers) (2024.2.0)\n", "Requirement already satisfied: tbb==2022.* in /usr/local/lib/python3.11/dist-packages (from mkl->numpy>=1.17->transformers<5.0.0,>=4.41.0->sentence-transformers) (2022.1.0)\n", "Requirement already satisfied: tcmlib==1.* in /usr/local/lib/python3.11/dist-packages (from tbb==2022.*->mkl->numpy>=1.17->transformers<5.0.0,>=4.41.0->sentence-transformers) (1.2.0)\n", "Requirement already satisfied: intel-cmplr-lib-rt in /usr/local/lib/python3.11/dist-packages (from mkl_umath->numpy>=1.17->transformers<5.0.0,>=4.41.0->sentence-transformers) (2024.2.0)\n", "Requirement already satisfied: intel-cmplr-lib-ur==2024.2.0 in /usr/local/lib/python3.11/dist-packages (from intel-openmp<2026,>=2024->mkl->numpy>=1.17->transformers<5.0.0,>=4.41.0->sentence-transformers) (2024.2.0)\n" ] } ], "source": [ "!pip install transformers datasets torch scikit-learn==1.6.1\n", "!pip install sentence-transformers\n", "# !pip uninstall -y scikit-learn\n", "# !pip install scikit-learn==1.6.1" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2025-05-07T19:51:00.332413Z", "iopub.status.busy": "2025-05-07T19:51:00.332155Z", "iopub.status.idle": "2025-05-07T19:51:00.617776Z", "shell.execute_reply": "2025-05-07T19:51:00.617208Z", "shell.execute_reply.started": "2025-05-07T19:51:00.332391Z" }, "trusted": true }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from datetime import datetime" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Preprocessing Data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Rules\n", "- Tidak menggunakan drop duplicates karena merk bisa sama dan harga berbeda\n", "- Menggunakan fungsi untuk feature engineering yaitu tahun, nama merk dan harga (integer)\n", "- normalisasi harga\n", "- tahun akan diubah menjadi one-hot-encoding\n", "- ada logic untuk depresiasi harga dengan 10% penurunan tiap tahun\n", " harga = harga_awal * (0.9) ** umur" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-05-06T16:38:53.637390Z", "iopub.status.busy": "2025-05-06T16:38:53.636652Z", "iopub.status.idle": "2025-05-06T16:38:53.678424Z", "shell.execute_reply": "2025-05-06T16:38:53.677330Z", "shell.execute_reply.started": "2025-05-06T16:38:53.637360Z" }, "trusted": true }, "outputs": [], "source": [ "df = pd.read_csv('/kaggle/input/legoas-scrapping-otomotif/results.csv')\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-05-06T16:38:56.063738Z", "iopub.status.busy": "2025-05-06T16:38:56.063438Z", "iopub.status.idle": "2025-05-06T16:38:56.334665Z", "shell.execute_reply": "2025-05-06T16:38:56.333648Z", "shell.execute_reply.started": "2025-05-06T16:38:56.063716Z" }, "trusted": true }, "outputs": [], "source": [ "# current year datetime dynamic\n", "curent_year = datetime.now().year\n", "\n", "# feature engineering\n", "df['year'] = df['car_names'].str.extract(r'(\\b\\d{4}\\b)')\n", "df['car_names_clean'] = df['car_names'].str.replace(r'^\\d{4}\\s+', '', regex=True).str.split(r' - ').str[0].str.strip()\n", "df['prices'] = df['car_prices'].str.replace(r'\\D', '', regex=True).apply(lambda x: int(x) if x else None)\n", "df['ages'] = df['year'].apply(lambda x: curent_year - int(x))\n", "df['estimated_original_price'] = df.apply(\n", " lambda row: row['prices'] if row['ages'] == 0 else int(row['prices'] / (0.9 ** row['ages'])),\n", " axis=1\n", ")\n", "df['deprecate_percentage'] = ((df['estimated_original_price'] - df['prices']) / df['estimated_original_price'])\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-05-06T16:39:17.065222Z", "iopub.status.busy": "2025-05-06T16:39:17.064476Z", "iopub.status.idle": "2025-05-06T16:39:17.080912Z", "shell.execute_reply": "2025-05-06T16:39:17.079946Z", "shell.execute_reply.started": "2025-05-06T16:39:17.065190Z" }, "trusted": true }, "outputs": [], "source": [ "feature_selected = df[['car_names_clean', 'prices', 'ages', 'estimated_original_price', 'deprecate_percentage','year']]\n", "feature_selected" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-05-06T16:40:50.642557Z", "iopub.status.busy": "2025-05-06T16:40:50.641742Z", "iopub.status.idle": "2025-05-06T16:40:50.702984Z", "shell.execute_reply": "2025-05-06T16:40:50.702219Z", "shell.execute_reply.started": "2025-05-06T16:40:50.642528Z" }, "trusted": true }, "outputs": [], "source": [ "feature_selected.to_csv('/kaggle/working/pemrosesan.csv', index=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Modeling" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2025-05-07T19:51:00.618979Z", "iopub.status.busy": "2025-05-07T19:51:00.618498Z", "iopub.status.idle": "2025-05-07T19:51:40.777507Z", "shell.execute_reply": "2025-05-07T19:51:40.776631Z", "shell.execute_reply.started": "2025-05-07T19:51:00.618958Z" }, "trusted": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2025-05-07 19:51:20.403766: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n", "E0000 00:00:1746647480.840255 31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "E0000 00:00:1746647480.968271 31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n" ] } ], "source": [ "import torch, joblib\n", "from torch import nn\n", "from torch.utils.data import Dataset, DataLoader\n", "from transformers import RobertaTokenizer, RobertaModel\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import MinMaxScaler\n", "from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score\n", "from sentence_transformers import SentenceTransformer\n", "from sklearn.ensemble import RandomForestRegressor\n", "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2025-05-07T19:51:40.779745Z", "iopub.status.busy": "2025-05-07T19:51:40.779227Z", "iopub.status.idle": "2025-05-07T19:51:40.849371Z", "shell.execute_reply": "2025-05-07T19:51:40.848610Z", "shell.execute_reply.started": "2025-05-07T19:51:40.779724Z" }, "trusted": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
car_names_cleanpricesagesestimated_original_pricedeprecate_percentageyear
0Mazda CX-5 2.5 Elite SUV41500000035692729760.2710002022
1Mitsubishi Xpander Cross 1.5 Premium MPV22200000064177321650.4685592019
2Wuling Alvez 1.5 EX SUV28600000002860000000.0000002025
3Mitsubishi Pajero Sport 2.4 Dakar 4x2 SUV45000000036172839500.2710002022
4Daihatsu Terios 1.5 TX SUV111000000165990220750.8146982009
.....................
11820Hyundai IONIQ 6 Fastback1020000000111333333330.1000002024
11821Toyota Raize 1.0 GR Sport (2 Tone) SUV20000000032743484220.2710002022
11822Toyota Sienta 1.5 G MPV15000000073136127370.5217032018
11823Mitsubishi Pajero Sport 2.4 Dakar 4X2 SUV41500000057028061440.4095102020
11824Toyota Calya 1.2 G MPV10800000072258011700.5217032018
\n", "

11825 rows × 6 columns

\n", "
" ], "text/plain": [ " car_names_clean prices ages \\\n", "0 Mazda CX-5 2.5 Elite SUV 415000000 3 \n", "1 Mitsubishi Xpander Cross 1.5 Premium MPV 222000000 6 \n", "2 Wuling Alvez 1.5 EX SUV 286000000 0 \n", "3 Mitsubishi Pajero Sport 2.4 Dakar 4x2 SUV 450000000 3 \n", "4 Daihatsu Terios 1.5 TX SUV 111000000 16 \n", "... ... ... ... \n", "11820 Hyundai IONIQ 6 Fastback 1020000000 1 \n", "11821 Toyota Raize 1.0 GR Sport (2 Tone) SUV 200000000 3 \n", "11822 Toyota Sienta 1.5 G MPV 150000000 7 \n", "11823 Mitsubishi Pajero Sport 2.4 Dakar 4X2 SUV 415000000 5 \n", "11824 Toyota Calya 1.2 G MPV 108000000 7 \n", "\n", " estimated_original_price deprecate_percentage year \n", "0 569272976 0.271000 2022 \n", "1 417732165 0.468559 2019 \n", "2 286000000 0.000000 2025 \n", "3 617283950 0.271000 2022 \n", "4 599022075 0.814698 2009 \n", "... ... ... ... \n", "11820 1133333333 0.100000 2024 \n", "11821 274348422 0.271000 2022 \n", "11822 313612737 0.521703 2018 \n", "11823 702806144 0.409510 2020 \n", "11824 225801170 0.521703 2018 \n", "\n", "[11825 rows x 6 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv('/kaggle/working/pemrosesan.csv')\n", "df" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2025-05-07T19:51:40.850347Z", "iopub.status.busy": "2025-05-07T19:51:40.850100Z", "iopub.status.idle": "2025-05-07T19:51:40.883221Z", "shell.execute_reply": "2025-05-07T19:51:40.882458Z", "shell.execute_reply.started": "2025-05-07T19:51:40.850328Z" }, "trusted": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
car_names_cleanpricesagesestimated_original_pricedeprecate_percentageyearp_normeop_norm
0Mazda CX-5 2.5 Elite SUV41500000035692729760.27100020220.0016520.001338
1Mitsubishi Xpander Cross 1.5 Premium MPV22200000064177321650.46855920190.0008840.000982
2Wuling Alvez 1.5 EX SUV28600000002860000000.00000020250.0011390.000672
3Mitsubishi Pajero Sport 2.4 Dakar 4x2 SUV45000000036172839500.27100020220.0017920.001451
4Daihatsu Terios 1.5 TX SUV111000000165990220750.81469820090.0004420.001408
...........................
11820Hyundai IONIQ 6 Fastback1020000000111333333330.10000020240.0040610.002665
11821Toyota Raize 1.0 GR Sport (2 Tone) SUV20000000032743484220.27100020220.0007960.000645
11822Toyota Sienta 1.5 G MPV15000000073136127370.52170320180.0005970.000737
11823Mitsubishi Pajero Sport 2.4 Dakar 4X2 SUV41500000057028061440.40951020200.0016520.001652
11824Toyota Calya 1.2 G MPV10800000072258011700.52170320180.0004300.000531
\n", "

11825 rows × 8 columns

\n", "
" ], "text/plain": [ " car_names_clean prices ages \\\n", "0 Mazda CX-5 2.5 Elite SUV 415000000 3 \n", "1 Mitsubishi Xpander Cross 1.5 Premium MPV 222000000 6 \n", "2 Wuling Alvez 1.5 EX SUV 286000000 0 \n", "3 Mitsubishi Pajero Sport 2.4 Dakar 4x2 SUV 450000000 3 \n", "4 Daihatsu Terios 1.5 TX SUV 111000000 16 \n", "... ... ... ... \n", "11820 Hyundai IONIQ 6 Fastback 1020000000 1 \n", "11821 Toyota Raize 1.0 GR Sport (2 Tone) SUV 200000000 3 \n", "11822 Toyota Sienta 1.5 G MPV 150000000 7 \n", "11823 Mitsubishi Pajero Sport 2.4 Dakar 4X2 SUV 415000000 5 \n", "11824 Toyota Calya 1.2 G MPV 108000000 7 \n", "\n", " estimated_original_price deprecate_percentage year p_norm \\\n", "0 569272976 0.271000 2022 0.001652 \n", "1 417732165 0.468559 2019 0.000884 \n", "2 286000000 0.000000 2025 0.001139 \n", "3 617283950 0.271000 2022 0.001792 \n", "4 599022075 0.814698 2009 0.000442 \n", "... ... ... ... ... \n", "11820 1133333333 0.100000 2024 0.004061 \n", "11821 274348422 0.271000 2022 0.000796 \n", "11822 313612737 0.521703 2018 0.000597 \n", "11823 702806144 0.409510 2020 0.001652 \n", "11824 225801170 0.521703 2018 0.000430 \n", "\n", " eop_norm \n", "0 0.001338 \n", "1 0.000982 \n", "2 0.000672 \n", "3 0.001451 \n", "4 0.001408 \n", "... ... \n", "11820 0.002665 \n", "11821 0.000645 \n", "11822 0.000737 \n", "11823 0.001652 \n", "11824 0.000531 \n", "\n", "[11825 rows x 8 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# normalisasi, karena komputer sebatas bisa membaca angka 0-1 dan memudahkan komputasi \n", "p_scaler = MinMaxScaler()\n", "eop_scaler = MinMaxScaler()\n", "df['p_norm'] = p_scaler.fit_transform(df[['prices']])\n", "df['eop_norm'] = eop_scaler.fit_transform(df[['estimated_original_price']])\n", "df" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2025-05-07T19:51:40.884278Z", "iopub.status.busy": "2025-05-07T19:51:40.884008Z", "iopub.status.idle": "2025-05-07T19:51:40.893468Z", "shell.execute_reply": "2025-05-07T19:51:40.892785Z", "shell.execute_reply.started": "2025-05-07T19:51:40.884259Z" }, "trusted": true }, "outputs": [ { "data": { "text/plain": [ "['/kaggle/working/eop_norm.pkl']" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "joblib.dump(p_scaler, \"/kaggle/working/price_norm.pkl\")\n", "joblib.dump(eop_scaler, \"/kaggle/working/eop_norm.pkl\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2025-05-07T19:51:40.894668Z", "iopub.status.busy": "2025-05-07T19:51:40.894391Z", "iopub.status.idle": "2025-05-07T19:51:51.842095Z", "shell.execute_reply": "2025-05-07T19:51:51.841383Z", "shell.execute_reply.started": "2025-05-07T19:51:40.894635Z" }, "trusted": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "546eeae9f79f476ba920558002287196", "version_major": 2, "version_minor": 0 }, "text/plain": [ "modules.json: 0%| | 0.00/349 [00:00" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "p_norm 1.000000\n", "eop_norm 0.730175\n", "deprecate_percentage -0.022325\n", "ages -0.028863\n", "Name: p_norm, dtype: float64" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "analysis_df = pd.concat([df[['p_norm' , 'eop_norm', 'ages', 'deprecate_percentage']]], axis=1)\n", "\n", "correlations = analysis_df.corr(numeric_only=True)['p_norm'].sort_values(ascending=False)\n", "\n", "plt.figure(figsize=(10, 6))\n", "sns.barplot(x=correlations.values, y=correlations.index)\n", "plt.title('Korelasi Fitur terhadap Target p_norm')\n", "plt.xlabel('Korelasi')\n", "plt.ylabel('Fitur')\n", "plt.grid(True)\n", "plt.tight_layout()\n", "plt.show()\n", "\n", "correlations.head(10)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2025-05-07T19:51:52.136742Z", "iopub.status.busy": "2025-05-07T19:51:52.135970Z", "iopub.status.idle": "2025-05-07T19:51:52.165009Z", "shell.execute_reply": "2025-05-07T19:51:52.164196Z", "shell.execute_reply.started": "2025-05-07T19:51:52.136717Z" }, "trusted": true }, "outputs": [], "source": [ "X = pd.concat([car_names_df, df[['eop_norm', 'ages']].reset_index(drop=True)], axis=1)\n", "y = df['p_norm']" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2025-05-07T19:51:52.167635Z", "iopub.status.busy": "2025-05-07T19:51:52.167277Z", "iopub.status.idle": "2025-05-07T19:51:52.188251Z", "shell.execute_reply": "2025-05-07T19:51:52.187478Z", "shell.execute_reply.started": "2025-05-07T19:51:52.167618Z" }, "trusted": true }, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2025-05-07T19:51:52.189278Z", "iopub.status.busy": "2025-05-07T19:51:52.189023Z", "iopub.status.idle": "2025-05-07T19:51:52.212945Z", "shell.execute_reply": "2025-05-07T19:51:52.212299Z", "shell.execute_reply.started": "2025-05-07T19:51:52.189256Z" }, "trusted": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bert_0bert_1bert_2bert_3bert_4bert_5bert_6bert_7bert_8bert_9...bert_376bert_377bert_378bert_379bert_380bert_381bert_382bert_383eop_normages
11378-0.029470-0.013234-0.041143-0.0770630.0264630.027834-0.0502740.031605-0.0038440.016589...-0.0631930.065644-0.057080-0.0188100.0411550.026669-0.0237110.0135440.0012364
10383-0.1148980.106321-0.0012280.049948-0.0840370.012905-0.0190980.118853-0.006946-0.044849...-0.0385720.093288-0.036840-0.0130680.0226240.001857-0.011609-0.0081820.0028012
6106-0.056078-0.041918-0.029557-0.0823100.0183890.034702-0.0722290.045174-0.078204-0.050956...-0.0103630.088272-0.067078-0.0786140.004449-0.020617-0.0072530.0903850.0009876
11605-0.0137870.090064-0.0601550.045836-0.021725-0.0359840.0420960.078376-0.027304-0.036993...0.0110800.027116-0.027053-0.0044180.0485150.030234-0.1051040.0067960.0010581
1451-0.0176720.009427-0.010428-0.0342530.0123390.030820-0.0083250.042181-0.021712-0.017776...-0.0443560.066460-0.077470-0.0443590.0219460.009875-0.0393960.0268650.0007200
..................................................................
9079-0.0065890.065311-0.0840010.045546-0.001115-0.023570-0.0045200.047788-0.083888-0.026300...-0.004477-0.015251-0.071699-0.046150-0.0105360.025932-0.0772950.0215110.0031561
89660.0007380.005126-0.0247870.0882500.054263-0.008861-0.047321-0.011977-0.0557150.041752...0.007776-0.033882-0.043081-0.022249-0.047681-0.022898-0.0350750.0225170.0019591
18180.002087-0.030689-0.024057-0.0349050.0547940.021325-0.0032450.047044-0.0266610.024959...-0.0524930.039645-0.044388-0.0237840.004011-0.025987-0.0157620.0032200.0039328
1629-0.0283340.006627-0.034478-0.0087970.0416490.0385200.0301500.124965-0.0360340.007961...0.033675-0.004588-0.044272-0.0524770.076229-0.0102240.0114230.0488180.0003632
107680.020114-0.015322-0.012969-0.0667820.0631440.054644-0.0214720.0172500.015408-0.004987...-0.0480610.102294-0.041882-0.0412730.020969-0.0154160.0169270.0260580.0013680
\n", "

9460 rows × 386 columns

\n", "
" ], "text/plain": [ " bert_0 bert_1 bert_2 bert_3 bert_4 bert_5 bert_6 \\\n", "11378 -0.029470 -0.013234 -0.041143 -0.077063 0.026463 0.027834 -0.050274 \n", "10383 -0.114898 0.106321 -0.001228 0.049948 -0.084037 0.012905 -0.019098 \n", "6106 -0.056078 -0.041918 -0.029557 -0.082310 0.018389 0.034702 -0.072229 \n", "11605 -0.013787 0.090064 -0.060155 0.045836 -0.021725 -0.035984 0.042096 \n", "1451 -0.017672 0.009427 -0.010428 -0.034253 0.012339 0.030820 -0.008325 \n", "... ... ... ... ... ... ... ... \n", "9079 -0.006589 0.065311 -0.084001 0.045546 -0.001115 -0.023570 -0.004520 \n", "8966 0.000738 0.005126 -0.024787 0.088250 0.054263 -0.008861 -0.047321 \n", "1818 0.002087 -0.030689 -0.024057 -0.034905 0.054794 0.021325 -0.003245 \n", "1629 -0.028334 0.006627 -0.034478 -0.008797 0.041649 0.038520 0.030150 \n", "10768 0.020114 -0.015322 -0.012969 -0.066782 0.063144 0.054644 -0.021472 \n", "\n", " bert_7 bert_8 bert_9 ... bert_376 bert_377 bert_378 \\\n", "11378 0.031605 -0.003844 0.016589 ... -0.063193 0.065644 -0.057080 \n", "10383 0.118853 -0.006946 -0.044849 ... -0.038572 0.093288 -0.036840 \n", "6106 0.045174 -0.078204 -0.050956 ... -0.010363 0.088272 -0.067078 \n", "11605 0.078376 -0.027304 -0.036993 ... 0.011080 0.027116 -0.027053 \n", "1451 0.042181 -0.021712 -0.017776 ... -0.044356 0.066460 -0.077470 \n", "... ... ... ... ... ... ... ... \n", "9079 0.047788 -0.083888 -0.026300 ... -0.004477 -0.015251 -0.071699 \n", "8966 -0.011977 -0.055715 0.041752 ... 0.007776 -0.033882 -0.043081 \n", "1818 0.047044 -0.026661 0.024959 ... -0.052493 0.039645 -0.044388 \n", "1629 0.124965 -0.036034 0.007961 ... 0.033675 -0.004588 -0.044272 \n", "10768 0.017250 0.015408 -0.004987 ... -0.048061 0.102294 -0.041882 \n", "\n", " bert_379 bert_380 bert_381 bert_382 bert_383 eop_norm ages \n", "11378 -0.018810 0.041155 0.026669 -0.023711 0.013544 0.001236 4 \n", "10383 -0.013068 0.022624 0.001857 -0.011609 -0.008182 0.002801 2 \n", "6106 -0.078614 0.004449 -0.020617 -0.007253 0.090385 0.000987 6 \n", "11605 -0.004418 0.048515 0.030234 -0.105104 0.006796 0.001058 1 \n", "1451 -0.044359 0.021946 0.009875 -0.039396 0.026865 0.000720 0 \n", "... ... ... ... ... ... ... ... \n", "9079 -0.046150 -0.010536 0.025932 -0.077295 0.021511 0.003156 1 \n", "8966 -0.022249 -0.047681 -0.022898 -0.035075 0.022517 0.001959 1 \n", "1818 -0.023784 0.004011 -0.025987 -0.015762 0.003220 0.003932 8 \n", "1629 -0.052477 0.076229 -0.010224 0.011423 0.048818 0.000363 2 \n", "10768 -0.041273 0.020969 -0.015416 0.016927 0.026058 0.001368 0 \n", "\n", "[9460 rows x 386 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "execution": { "iopub.execute_input": "2025-05-07T19:51:52.214334Z", "iopub.status.busy": "2025-05-07T19:51:52.213827Z", "iopub.status.idle": "2025-05-07T19:54:15.197146Z", "shell.execute_reply": "2025-05-07T19:54:15.196397Z", "shell.execute_reply.started": "2025-05-07T19:51:52.214306Z" }, "trusted": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MAE: 0.0001\n", "RMSE: 0.0006\n", "R²: 0.9754\n" ] } ], "source": [ "model = RandomForestRegressor(random_state=42)\n", "model.fit(X_train, y_train)\n", "y_pred = model.predict(X_test)\n", "\n", "mae = mean_absolute_error(y_test, y_pred)\n", "rmse = np.sqrt(mean_squared_error(y_test, y_pred))\n", "r2 = r2_score(y_test, y_pred)\n", "\n", "print(f\"MAE: {mae:.4f}\")\n", "print(f\"RMSE: {rmse:.4f}\")\n", "print(f\"R²: {r2:.4f}\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "execution": { "iopub.execute_input": "2025-05-07T19:54:20.181580Z", "iopub.status.busy": "2025-05-07T19:54:20.180891Z", "iopub.status.idle": "2025-05-07T19:54:20.191401Z", "shell.execute_reply": "2025-05-07T19:54:20.190419Z", "shell.execute_reply.started": "2025-05-07T19:54:20.181557Z" }, "trusted": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
predlabel
29870.0006610.000661
60750.0022700.002270
41560.0011190.001119
81110.0013940.001394
80800.0011130.001115
.........
17040.0011990.001194
51500.0014160.001425
117310.0010970.001095
110020.0005490.000549
106630.0028860.002887
\n", "

2365 rows × 2 columns

\n", "
" ], "text/plain": [ " pred label\n", "2987 0.000661 0.000661\n", "6075 0.002270 0.002270\n", "4156 0.001119 0.001119\n", "8111 0.001394 0.001394\n", "8080 0.001113 0.001115\n", "... ... ...\n", "1704 0.001199 0.001194\n", "5150 0.001416 0.001425\n", "11731 0.001097 0.001095\n", "11002 0.000549 0.000549\n", "10663 0.002886 0.002887\n", "\n", "[2365 rows x 2 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "check_df = pd.DataFrame({'pred': y_pred, 'label': y_test})\n", "check_df" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "execution": { "iopub.execute_input": "2025-05-07T19:54:23.741639Z", "iopub.status.busy": "2025-05-07T19:54:23.741083Z", "iopub.status.idle": "2025-05-07T19:54:23.846173Z", "shell.execute_reply": "2025-05-07T19:54:23.845423Z", "shell.execute_reply.started": "2025-05-07T19:54:23.741616Z" }, "trusted": true }, "outputs": [ { "data": { "text/plain": [ "['model.pkl']" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "joblib.dump(model, 'model.pkl')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Test" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "execution": { "iopub.execute_input": "2025-05-07T19:54:27.229964Z", "iopub.status.busy": "2025-05-07T19:54:27.229464Z", "iopub.status.idle": "2025-05-07T19:54:27.237833Z", "shell.execute_reply": "2025-05-07T19:54:27.237002Z", "shell.execute_reply.started": "2025-05-07T19:54:27.229942Z" }, "trusted": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
car_namescar_prices
02025 Mitsubishi Pajero Sport Dakar Ultimate (4...Rp 764.200.000
12022 Mitsubishi Expander Ultimate CVTRp331.950.000
\n", "
" ], "text/plain": [ " car_names car_prices\n", "0 2025 Mitsubishi Pajero Sport Dakar Ultimate (4... Rp 764.200.000\n", "1 2022 Mitsubishi Expander Ultimate CVT Rp331.950.000" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# contoh testing\n", "test = {\n", " 'car_names': ['2025 Mitsubishi Pajero Sport Dakar Ultimate (4X4) AT', '2022 Mitsubishi Expander Ultimate CVT'],\n", " 'car_prices': ['Rp 764.200.000', 'Rp331.950.000']\n", "}\n", "test_df = pd.DataFrame(test)\n", "test_df" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "execution": { "iopub.execute_input": "2025-05-07T19:54:32.122104Z", "iopub.status.busy": "2025-05-07T19:54:32.121342Z", "iopub.status.idle": "2025-05-07T19:54:32.148102Z", "shell.execute_reply": "2025-05-07T19:54:32.147175Z", "shell.execute_reply.started": "2025-05-07T19:54:32.122068Z" }, "trusted": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
car_namescar_pricesyearcar_names_cleanpricesagesestimated_original_pricedeprecate_percentage
02025 Mitsubishi Pajero Sport Dakar Ultimate (4...Rp 764.200.0002025Mitsubishi Pajero Sport Dakar Ultimate (4X4) AT76420000007642000000.000
12022 Mitsubishi Expander Ultimate CVTRp331.950.0002022Mitsubishi Expander Ultimate CVT33195000034553497940.271
\n", "
" ], "text/plain": [ " car_names car_prices year \\\n", "0 2025 Mitsubishi Pajero Sport Dakar Ultimate (4... Rp 764.200.000 2025 \n", "1 2022 Mitsubishi Expander Ultimate CVT Rp331.950.000 2022 \n", "\n", " car_names_clean prices ages \\\n", "0 Mitsubishi Pajero Sport Dakar Ultimate (4X4) AT 764200000 0 \n", "1 Mitsubishi Expander Ultimate CVT 331950000 3 \n", "\n", " estimated_original_price deprecate_percentage \n", "0 764200000 0.000 \n", "1 455349794 0.271 " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# current year datetime dynamic\n", "curent_year = datetime.now().year\n", "\n", "# feature engineering\n", "test_df['year'] = test_df['car_names'].str.extract(r'(\\b\\d{4}\\b)')\n", "test_df['car_names_clean'] = test_df['car_names'].str.replace(r'^\\d{4}\\s+', '', regex=True).str.split(r' - ').str[0].str.strip()\n", "test_df['prices'] = test_df['car_prices'].str.replace(r'\\D', '', regex=True).apply(lambda x: int(x) if x else None)\n", "test_df['ages'] = test_df['year'].apply(lambda x: curent_year - int(x))\n", "test_df['estimated_original_price'] = test_df.apply(\n", " lambda row: row['prices'] if row['ages'] == 0 else int(row['prices'] / (0.9 ** row['ages'])),\n", " axis=1\n", ")\n", "test_df['deprecate_percentage'] = ((test_df['estimated_original_price'] - test_df['prices']) / test_df['estimated_original_price'])\n", "test_df" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "execution": { "iopub.execute_input": "2025-05-07T19:54:37.131755Z", "iopub.status.busy": "2025-05-07T19:54:37.130977Z", "iopub.status.idle": "2025-05-07T19:54:37.148763Z", "shell.execute_reply": "2025-05-07T19:54:37.148110Z", "shell.execute_reply.started": "2025-05-07T19:54:37.131732Z" }, "trusted": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
car_namescar_pricesyearcar_names_cleanpricesagesestimated_original_pricedeprecate_percentagep_normeop_norm
02025 Mitsubishi Pajero Sport Dakar Ultimate (4...Rp 764.200.0002025Mitsubishi Pajero Sport Dakar Ultimate (4X4) AT76420000007642000000.0000.0030430.001797
12022 Mitsubishi Expander Ultimate CVTRp331.950.0002022Mitsubishi Expander Ultimate CVT33195000034553497940.2710.0013220.001071
\n", "
" ], "text/plain": [ " car_names car_prices year \\\n", "0 2025 Mitsubishi Pajero Sport Dakar Ultimate (4... Rp 764.200.000 2025 \n", "1 2022 Mitsubishi Expander Ultimate CVT Rp331.950.000 2022 \n", "\n", " car_names_clean prices ages \\\n", "0 Mitsubishi Pajero Sport Dakar Ultimate (4X4) AT 764200000 0 \n", "1 Mitsubishi Expander Ultimate CVT 331950000 3 \n", "\n", " estimated_original_price deprecate_percentage p_norm eop_norm \n", "0 764200000 0.000 0.003043 0.001797 \n", "1 455349794 0.271 0.001322 0.001071 " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p_scaler = joblib.load('price_norm.pkl')\n", "eop_scaler = joblib.load('eop_norm.pkl')\n", "\n", "test_df['p_norm'] = p_scaler.transform(test_df[['prices']])\n", "test_df['eop_norm'] = eop_scaler.transform(test_df[['estimated_original_price']])\n", "test_df" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "execution": { "iopub.execute_input": "2025-05-07T19:54:40.021134Z", "iopub.status.busy": "2025-05-07T19:54:40.020566Z", "iopub.status.idle": "2025-05-07T19:54:40.598197Z", "shell.execute_reply": "2025-05-07T19:54:40.597443Z", "shell.execute_reply.started": "2025-05-07T19:54:40.021108Z" }, "trusted": true }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b3c7fbcafddf4971ad925b043f669452", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Batches: 0%| | 0/1 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bert_0bert_1bert_2bert_3bert_4bert_5bert_6bert_7bert_8bert_9...bert_376bert_377bert_378bert_379bert_380bert_381bert_382bert_383eop_normages
00.0140660.086013-0.082927-0.061399-0.0537580.035179-0.0107060.091782-0.0158370.025600...-0.0135590.080615-0.132412-0.0479830.037217-0.044200-0.0404560.0874680.0017970
1-0.099028-0.024163-0.014358-0.0337320.055197-0.001037-0.0474340.204618-0.023997-0.052195...0.0081760.049285-0.0471410.001278-0.053189-0.036602-0.0176180.0667200.0010713
\n", "

2 rows × 386 columns

\n", "" ], "text/plain": [ " bert_0 bert_1 bert_2 bert_3 bert_4 bert_5 bert_6 \\\n", "0 0.014066 0.086013 -0.082927 -0.061399 -0.053758 0.035179 -0.010706 \n", "1 -0.099028 -0.024163 -0.014358 -0.033732 0.055197 -0.001037 -0.047434 \n", "\n", " bert_7 bert_8 bert_9 ... bert_376 bert_377 bert_378 bert_379 \\\n", "0 0.091782 -0.015837 0.025600 ... -0.013559 0.080615 -0.132412 -0.047983 \n", "1 0.204618 -0.023997 -0.052195 ... 0.008176 0.049285 -0.047141 0.001278 \n", "\n", " bert_380 bert_381 bert_382 bert_383 eop_norm ages \n", "0 0.037217 -0.044200 -0.040456 0.087468 0.001797 0 \n", "1 -0.053189 -0.036602 -0.017618 0.066720 0.001071 3 \n", "\n", "[2 rows x 386 columns]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_input = pd.concat([car_names_df, test_df[['eop_norm', 'ages']].reset_index(drop=True)], axis=1)\n", "test_input" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "execution": { "iopub.execute_input": "2025-05-07T19:54:50.894730Z", "iopub.status.busy": "2025-05-07T19:54:50.894336Z", "iopub.status.idle": "2025-05-07T19:54:51.015278Z", "shell.execute_reply": "2025-05-07T19:54:51.014547Z", "shell.execute_reply.started": "2025-05-07T19:54:50.894700Z" }, "trusted": true }, "outputs": [], "source": [ "model = joblib.load('model.pkl')\n", "test_result = model.predict(test_input)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "execution": { "iopub.execute_input": "2025-05-07T19:54:53.094448Z", "iopub.status.busy": "2025-05-07T19:54:53.093810Z", "iopub.status.idle": "2025-05-07T19:54:53.102418Z", "shell.execute_reply": "2025-05-07T19:54:53.101518Z", "shell.execute_reply.started": "2025-05-07T19:54:53.094425Z" }, "trusted": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
predlabel
00.0030070.003043
10.0013820.001322
\n", "
" ], "text/plain": [ " pred label\n", "0 0.003007 0.003043\n", "1 0.001382 0.001322" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# hasil prediksi\n", "check_df_1 = pd.DataFrame({'pred': test_result, 'label': test_df['p_norm']})\n", "check_df_1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "1. feature -> estimate origin price, age and car names\n", "2. result -> MAE: 0.0001, RMSE: 0.0006, R²: 0.9754" ] } ], "metadata": { "kaggle": { "accelerator": "nvidiaTeslaT4", "dataSources": [ { "datasetId": 7345863, "sourceId": 11703231, "sourceType": "datasetVersion" } ], "dockerImageVersionId": 31011, "isGpuEnabled": true, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 4 }