{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 1. loading data\n", "## installing packages " ] }, { "cell_type": "code", "execution_count": 79, "metadata": { "id": "E7Ae3ZiczQVT" }, "outputs": [], "source": [ "%pip install seaborn numpy pandas xgboost -qqU" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Importing libs" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Sesct3fTzQVW" }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split, GridSearchCV\n", "from xgboost import XGBRegressor\n", "import xgboost\n", "from sklearn.metrics import mean_squared_error\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import calendar\n", "from sklearn.preprocessing import OneHotEncoder\n", "import pickle" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# reading data and discarding data of years after 2019" ] }, { "cell_type": "code", "execution_count": 122, "metadata": { "id": "WGqXaM-XzQVX" }, "outputs": [], "source": [ "data = pd.read_csv('data.csv')" ] }, { "cell_type": "code", "execution_count": 123, "metadata": { "id": "zBBpsXqszQVY" }, "outputs": [], "source": [ "data = data[~(data['JAHR'] > 2019)]\n", "data = data[data.columns[:5]]" ] }, { "cell_type": "code", "execution_count": 124, "metadata": { "id": "r5SG6rjMzQVY" }, "outputs": [], "source": [ "data.reset_index(drop=True, inplace=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## here I removed the outliers outside z-threshold because these values often dont provide any useful information or help converge the model" ] }, { "cell_type": "code", "execution_count": 125, "metadata": { "id": "jJVSlcgGzQVZ" }, "outputs": [], "source": [ "def remove_outliers(df, columns=None, z_threshold=3):\n", "\n", " if columns is None:\n", " columns = df.select_dtypes(include=[np.number]).columns\n", "\n", "\n", " df_clean = df.copy()\n", "\n", "\n", " for col in columns:\n", " z_scores = np.abs((df_clean[col] - df_clean[col].mean()) / df_clean[col].std())\n", " df_clean = df_clean[z_scores < z_threshold]\n", "\n", " return df_clean\n", "\n", "\n", "data = remove_outliers(data)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 234 }, "id": "hYPXBmXfzQVa", "outputId": "a153268a-04a1-483a-f4f1-7fd250224248" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
MONATSZAHL0
AUSPRAEGUNG0
JAHR0
MONAT0
WERT0
\n", "

" ], "text/plain": [ "MONATSZAHL 0\n", "AUSPRAEGUNG 0\n", "JAHR 0\n", "MONAT 0\n", "WERT 0\n", "dtype: int64" ] }, "execution_count": 126, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# checked for null values\n", "data.isna().sum()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 2 . visualizing the data as per the assignment" ] }, { "cell_type": "code", "execution_count": 127, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 421 }, "id": "2KxsVKNB4bCE", "outputId": "06590c0a-c2b2-44a9-f293-5513eeae6a56" }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1IAAAIjCAYAAAAJLyrXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABZDklEQVR4nO3deXgN1+PH8c/NdmNLCJIIEWsrsRYtUWoLsbbaqJ1QS6toUapKLdFWq7W1tbT9aqJItVrUUmpfSijxpa3ti9paEmqLNUjm90efzM+VhEyaiPJ+Pc88j3vOuWfO3Ny57ufOzBmbYRiGAAAAAAAZ5pTTAwAAAACAfxuCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighSALDNq1CjZbLZ7sq569eqpXr165uN169bJZrPp22+/vSfr79q1q0qUKHFP1pVZly5dUo8ePeTr6yubzab+/fvf8zEcOXJENptNUVFR93zdOen29+f9JCoqSjabTUeOHMnpoQDAvxpBCkCaUr5spSzu7u7y8/NTaGioPvroI128eDFL1nPixAmNGjVKO3fuzJL+stL9PLaMePfddxUVFaXevXtr1qxZ6ty5c7ptbTab+vbtm2bdt99+K5vNpnXr1mXTSHPO/PnzZbPZ9J///CfdNitXrpTNZtNHH310D0f275LyOdGjR48064cNG2a2+euvv1LVL1myRE2aNFHBggXl7u6uRx55RIMGDdKZM2dSte3atatsNpsqVaokwzDSHEt67+W9e/ean2fnz59P1efdlq5du0qSkpOT9eWXX6pGjRry8vJSvnz59Mgjj6hLly7asmWL2W+JEiUy3OetkpKS5OfnJ5vNpmXLlqW5LSk/XKX1eqasu0WLFhl+bQBY55LTAwBwf4uIiFDJkiV148YNxcXFad26derfv78mTJigRYsWqVKlSmbb4cOH64033rDU/4kTJzR69GiVKFFCVapUyfDzVqxYYWk9mXGnsX3++edKTk7O9jH8E2vWrFHNmjU1cuTIHBtDQECArl69KldX1xwbw500b95cnp6eio6OTjcEREdHy9nZWe3atbvHo8senTt3Vrt27WS327O0X3d3d3333XeaOnWq3NzcHOq++uorubu769q1a6meN2jQII0fP16VK1fWkCFD5OXlpR07duiTTz7R3LlztXr1aj366KOpnvfrr79q/vz5CgsLy/AYZ8+eLV9fX507d07ffvut+Td/8cUXFRISYrY7fPiwRowYoV69eqlOnTpmeenSpSVJr7zyiqZMmaJnnnlGHTt2lIuLi/bv369ly5apVKlSqlmzpiRp0qRJunTpUppj+eSTT7R161az7a3WrFmjkydPqkSJEpozZ46aNm2a4W0EcO8QpADcUdOmTVW9enXz8dChQ7VmzRq1aNFCTz/9tPbu3atcuXJJklxcXOTikr0fK1euXFHu3LlTfVG71+7XYHCrU6dOKSgoKEfHkPLr//3KbrerdevWioyM1IkTJ+Tn5+dQf+3aNS1YsECNGjWSt7d3Do0yazk7O8vZ2TnL+23SpIkWLVqkZcuW6ZlnnjHLN2/erMOHDyssLEzfffedw3O++uorjR8/Xm3bttWcOXMcxtW1a1fVr19fzz//vHbs2OHw2ZIrVy75+/srIiJCzz33XIZOKTYMQ9HR0erQoYMOHz6sOXPmmEEqODhYwcHBZtvt27drxIgRCg4OVqdOnRz6iY+P19SpU9WzZ0999tlnDnWTJk3S6dOnzcetWrVKcywrVqzQzz//rKefflovvfRSqvrZs2eratWqCg8P15tvvqnLly8rT548d91GAPcWp/YBsKxBgwZ66623dPToUc2ePdssT+saqZUrV6p27drKnz+/8ubNq0cffVRvvvmmpL+va3r88cclSd26dTNPdUm5nqZevXqqUKGCYmNj9dRTTyl37tzmc9O7BiUpKUlvvvmmfH19lSdPHj399NM6fvy4Q5sSJUqkeTrNrX3ebWxpXSN1+fJlvfbaa/L395fdbtejjz6qDz/8MNXpRymn1yxcuFAVKlSQ3W5X+fLltXz58rRf8NucOnVK3bt3l4+Pj9zd3VW5cmXNnDnTrE+5Xuzw4cNaunSpOfasvCYm5W+zZ88e1a9fX7lz51bRokU1btw4h3bpXSOVsu3u7u6qUKGCFixYkOo1TdmO208pTK/Pffv2qXXr1vLy8pK7u7uqV6+uRYsW3XVbOnXqpOTkZM2dOzdV3dKlS3XhwgV17NhRkhQZGakGDRrI29tbdrtdQUFBmjZt2l3Xkd51Selt49atW9WkSRN5enoqd+7cqlu3rjZt2uTQ5uLFi+rfv79KlCghu90ub29vNWrUSDt27LA8lpTTwH766Sc98cQTcnd3V6lSpfTll1/eddtSFC1aVE899ZSio6MdyufMmaOKFSuqQoUKqZ4zevRoFShQQJ999lmqcPfEE09oyJAh+vXXX1Nd++jk5KThw4frl19+0YIFCzI0vk2bNunIkSNq166d2rVrpw0bNuiPP/7I8PalOHz4sAzD0JNPPpmqzmaz3TVwx8XFqXPnzipatKgiIyNT1V+9elULFixQu3bt1KZNG129elXff/+95XECyH4EKQCZknK9zZ1Osdu9e7datGihxMRERUREaPz48Xr66afNL4SBgYGKiIiQJPXq1UuzZs3SrFmz9NRTT5l9nDlzRk2bNlWVKlU0adIk1a9f/47jeuedd7R06VINGTJEr7zyilauXKmQkBBdvXrV0vZlZGy3MgxDTz/9tCZOnKgmTZpowoQJevTRRzV48GANHDgwVfuffvpJL7/8stq1a6dx48bp2rVrCgsLS/OakFtdvXpV9erV06xZs9SxY0d98MEH8vT0VNeuXTV58mRz7LNmzVKhQoVUpUoVc+yFCxe29Brczblz59SkSRNVrlxZ48ePV7ly5TRkyJB0r+lIsWLFCoWFhclms2ns2LFq1aqVunXrpu3bt2d6LLt371bNmjW1d+9evfHGGxo/frzy5MmjVq1a3fWL9lNPPaVixYqlCgDS36f15c6d2zyyMG3aNAUEBOjNN9/U+PHj5e/vr5dffllTpkzJ9Nhvt2bNGj311FNKSEjQyJEj9e677+r8+fNq0KCBfv75Z7PdSy+9pGnTpiksLExTp07VoEGDlCtXLu3duzdT6z148KBat26tRo0aafz48SpQoIC6du2q3bt3Z7iPDh06aPHixebpbDdv3tS8efPUoUOHVG0PHDig/fv365lnnpGHh0ea/XXp0kXS39dQpbWusmXLKiIiIs1rpW43Z84clS5dWo8//rhatmyp3Llz66uvvsrwtqUICAiQJM2bN09Xrlyx9Nzk5GR16tRJZ86cUXR0tLy8vFK1WbRokS5duqR27drJ19dX9erV05w5c9Lt8+zZs/rrr79SLff7qcfAA8EAgDRERkYakoxt27al28bT09N47LHHzMcjR440bv1YmThxoiHJOH36dLp9bNu2zZBkREZGpqqrW7euIcmYPn16mnV169Y1H69du9aQZBQtWtRISEgwy7/55htDkjF58mSzLCAgwAgPD79rn3caW3h4uBEQEGA+XrhwoSHJePvttx3atW7d2rDZbMbBgwfNMkmGm5ubQ9muXbsMScbHH3+cal23mjRpkiHJmD17tll2/fp1Izg42MibN6/DtgcEBBjNmze/Y3+3jqlPnz5p1s2bN8+QZKxdu9YsS/nbfPnll2ZZYmKi4evra4SFhZllhw8fTvUaVqlSxShSpIhx/vx5s2zFihWGJIfXNOVveut60+uzYcOGRsWKFY1r166ZZcnJyUatWrWMsmXL3nX7Bw8ebEgy9u/fb5ZduHDBcHd3N9q3b2+WXblyJdVzQ0NDjVKlSjmU3f5eStmfDh8+7NDu9m1MTk42ypYta4SGhhrJyckO6y1ZsqTRqFEjs8zT0zPdv9mdpDWWgIAAQ5KxYcMGs+zUqVOG3W43Xnvttbv2mfL+OXv2rOHm5mbMmjXLMAzDWLp0qWGz2YwjR46Ynw8pnwcp+8zEiRPv2LeHh4dRtWpV83F4eLiRJ08ewzAMY+bMmYYkY/78+anGcqvr168bBQsWNIYNG2aWdejQwahcuXKa67zTvm8YhtGlSxdDklGgQAHj2WefNT788ENj7969d9wOwzCMiIgIQ5IxevTodNu0aNHCePLJJ83Hn332meHi4mKcOnXKoV3K63mn5fb9/077OQDrOCIFINPy5s17x9n78ufPL0n6/vvvM/3rqN1uV7du3TLcvkuXLsqXL5/5uHXr1ipSpIh++OGHTK0/o3744Qc5OzvrlVdecSh/7bXXZBhGqqM0ISEh5oXrklSpUiV5eHjo999/v+t6fH191b59e7PM1dVVr7zyii5duqT169dnwdZkTN68eR2uH3Fzc9MTTzxxx204efKkdu7cqfDwcHl6eprljRo1yvT1XGfPntWaNWvUpk0bXbx40fxF/syZMwoNDdWBAwf0559/3rGPlO249ajUd999p2vXrpmn9UkyrweUpAsXLuivv/5S3bp19fvvv+vChQuZGv+tdu7cqQMHDqhDhw46c+aMuS2XL19Ww4YNtWHDBnNfyp8/v7Zu3aoTJ0784/VKUlBQkMPECoULF9ajjz561/fkrQoUKKAmTZqYR3qio6NVq1Yt8yjOrVI+O27dX9OSL18+JSQkpFnXsWPHDB2VWrZsmc6cOeOw37Rv3167du2ydMQtRWRkpD755BOVLFlSCxYs0KBBgxQYGKiGDRum+17buHGjRo8erXr16mn48OFptjlz5ox+/PFHh3GmHL395ptv0nzOd999p5UrV6ZafHx8LG8XAGsIUgAy7dKlS3f8EtS2bVs9+eST6tGjh3x8fNSuXTt98803lkJV0aJFLU0sUbZsWYfHNptNZcqUyfZ75hw9elR+fn6pXo/AwECz/lbFixdP1UeBAgV07ty5u66nbNmycnJy/PhObz1Z6fbr34oVK5aq7G7bkDK+2/9OktKcmS0jDh48KMMw9NZbb6lw4cIOS8qMhadOnbpjH5UqVVKFChUcTvWKjo5WoUKFFBoaapZt2rRJISEhypMnj/Lnz6/ChQub1+1lRZA6cOCAJCk8PDzVtvznP/9RYmKiuZ5x48bpt99+k7+/v5544gmNGjXKUui5XWbfk7fr0KGDVq5cqWPHjmnhwoVpntYn/X+AututFC5evJju54yzs7OGDx+unTt3auHChen2MXv2bJUsWVJ2u10HDx7UwYMHVbp0aeXOnfuOp82lx8nJSX369FFsbKz++usvff/992ratKnWrFmT5uyOKSGuQIECmjNnTqr9N8XXX3+tGzdu6LHHHjPHefbsWdWoUSPdcT711FMKCQlJtdzPk7wADwpm7QOQKX/88YcuXLigMmXKpNsmV65c2rBhg9auXaulS5dq+fLl+vrrr9WgQQOtWLEiQzOH3XoEIKukN8NXUlJStsxmlpb01nOnX9Wzk91uT/c6spTrQG7/Ypbd23Cnv9OtUoL5oEGDHELPre70Pk3RqVMnvfHGG9q+fbuKFSumtWvX6sUXXzRnizt06JAaNmyocuXKacKECfL395ebm5t++OEHTZw48Y4/EFjdlg8++CDd2wHkzZtXktSmTRvVqVNHCxYs0IoVK/TBBx/o/fff1/z58zM1XXZW/T2ffvpp2e12hYeHKzExUW3atEmzXUr4/+WXX9Lt6+jRo0pISLjj0cqOHTtqzJgxioiISHOWvISEBC1evFjXrl1LM8BHR0frnXfeyfTNxAsWLKinn35aTz/9tOrVq6f169fr6NGj5lE4wzAUHh6uEydOaPHixalmhrxVSlhKayILSfr9999VqlSpTI0TQNYjSAHIlFmzZklSul9cUzg5Oalhw4Zq2LChJkyYoHfffVfDhg3T2rVrFRISkukvL+lJ+UU/hWEYOnjwoMP9rgoUKOBwM84UR48edfiSYmVsAQEBWrVqVapfz/ft22fWZ4WAgAD98ssvSk5OdvhV+5+uJyAgQPv370+zLqU8K7YhpY/b/063ridFgQIFJCnV3+r2o24pfzNXV1eHewFZ1b59ew0dOlTR0dEKCAhQUlKSw2l9ixcvVmJiohYtWuRw9Gbt2rV37Tuj25JyuqeHh0eGtqVIkSJ6+eWX9fLLL+vUqVOqWrWq3nnnnRy971CuXLnUqlUrzZ49W02bNlWhQoXSbPfII4/okUce0cKFCzV58uQ0jzqlzBp4+41lb5VyVKpr165pzm43f/58Xbt2TdOmTUs1lv3792v48OHatGmTateubWUz01S9enWtX79eJ0+eNN/rEyZM0NKlSzVgwAA1b9483ecePnxYmzdvVt++fVW3bl2HuuTkZHXu3FnR0dHpnhYI4N7j1D4Alq1Zs0ZjxoxRyZIlHb5o3u7s2bOpylJ+ZU9MTJQk894oaQWbzPjyyy8dThX69ttvdfLkSYcvlqVLl9aWLVt0/fp1s2zJkiWppkm3MrZmzZopKSlJn3zyiUP5xIkTZbPZsuyLbbNmzRQXF6evv/7aLLt586Y+/vhj5c2bN9UXMCv9btmyRbGxsQ7l58+f15w5c1SlShX5+vr+o7FLf3/xr1KlimbOnOlwKtzKlSu1Z88eh7YBAQFydnbWhg0bHMqnTp3q8Njb21v16tXTp59+qpMnT6Za56339bmT4sWLq06dOvr666/NU8Fq1apl1qccsbn1CM2FCxfSnML6dikB6dZtSUpKSnUfomrVqql06dL68MMP07yRa8q2JCUlpTqV0NvbW35+fua+lZMGDRqkkSNH6q233rpjuxEjRujcuXN66aWXUh2di42N1fvvv68KFSrc9aa7nTp1UpkyZTR69OhUdbNnz1apUqX00ksvqXXr1g7LoEGDlDdvXkun98XFxaV6r0rS9evXtXr1ajk5OZlHQLdt26ahQ4eqWrVqeu+99+7Yb8oYXn/99VTjbNOmjerWrZup0xABZB+OSAG4o2XLlmnfvn26efOm4uPjtWbNGq1cuVIBAQFatGjRHc/Dj4iI0IYNG9S8eXMFBATo1KlTmjp1qooVK2b++lu6dGnlz59f06dPV758+ZQnTx7VqFFDJUuWzNR4vby8VLt2bXXr1k3x8fGaNGmSypQpo549e5ptevTooW+//VZNmjRRmzZtdOjQIc2ePdth8gerY2vZsqXq16+vYcOG6ciRI6pcubJWrFih77//Xv3790/Vd2b16tVLn376qbp27arY2FiVKFFC3377rTZt2qRJkybd9cL99LzxxhuaN2+ennrqKb344osqV66cTpw4oaioKJ08eTJDYSGjxo4dq+bNm6t27dp64YUXdPbsWX388ccqX768Q3jw9PTU888/r48//lg2m02lS5fWkiVL0rzeacqUKapdu7YqVqyonj17qlSpUoqPj1dMTIz++OMP7dq1K0Nj69Spk3r16qUTJ05o2LBhDnWNGzeWm5ubWrZsqRdffFGXLl3S559/Lm9v7zQD3K3Kly+vmjVraujQoTp79qy8vLw0d+5c3bx506Gdk5OT/vOf/6hp06YqX768unXrpqJFi+rPP//U2rVr5eHhocWLF+vixYsqVqyYWrdurcqVKytv3rxatWqVtm3bpvHjx2doW7NT5cqVVbly5bu269ixo7Zt26bJkydrz5496tixowoUKKAdO3boiy++UMGCBfXtt9/e9QbYzs7OGjZsWKqJaU6cOKG1a9emmgQmhd1uV2hoqObNm6ePPvooQzfa/uOPP/TEE0+oQYMGatiwoXx9fXXq1Cl99dVX2rVrl/r3769ChQrpypUratu2rW7cuKEWLVqkO1mEj4+PGjVqZP5g4e/vn2a7p59+Wv369dOOHTtUtWrVu44zPdu3b9fbb7+dqrxevXpZclQOeKjk2HyBAO5rKVMkpyxubm6Gr6+v0ahRI2Py5MkO02ynuH3689WrVxvPPPOM4efnZ7i5uRl+fn5G+/btjf/9738Oz/v++++NoKAgw8XFxWHK4bp16xrly5dPc3zpTX/+1VdfGUOHDjW8vb2NXLlyGc2bNzeOHj2a6vnjx483ihYtatjtduPJJ580tm/fnqrPO43t9unPDcMwLl68aAwYMMDw8/MzXF1djbJlyxoffPCBwzTWhpH+FMTpTct+u/j4eKNbt25GoUKFDDc3N6NixYppTtNsZfpzwzCMP/74w+jRo4dRtGhRw8XFxfDy8jJatGhhbNmyJVXb9P42t78uaU1VbhiG8d133xmBgYGG3W43goKCjPnz56f5mp4+fdoICwszcufObRQoUMB48cUXjd9++y3NPg8dOmR06dLF8PX1NVxdXY2iRYsaLVq0ML799tsMvwZnz5417Ha7IcnYs2dPqvpFixYZlSpVMtzd3Y0SJUoY77//vvHFF1+kmk48rffSoUOHjJCQEMNutxs+Pj7Gm2++aaxcuTLNKd7/+9//Gs8995xRsGBBw263GwEBAUabNm2M1atXG4bx91TzgwcPNipXrmzky5fPyJMnj1G5cmVj6tSpd93G9KY/T+u9ktZ2pCW99/Stbp/+/FYLFy40GjVqZBQoUMCw2+1GmTJljNdeey3NtrdOf36rGzduGKVLl3YYy/jx4w1J5uuWlqioKEOS8f3335tld5r+PCEhwZg8ebIRGhpqFCtWzHB1dTXy5ctnBAcHG59//rm5v6e89++21K1b14iNjTUkGW+99Va64zxy5IghyRgwYMBdX0/DSPtveqdxjBkzJt11A0ibzTBy6MpmAABu0bVrV61bty7bZ1gEACArcI0UAAAAAFhEkAIAAAAAiwhSAAAAAGAR10gBAAAAgEUckQIAAAAAiwhSAAAAAGARN+SVlJycrBMnTihfvnyy2Ww5PRwAAAAAOcQwDF28eFF+fn5yckr/uBNBSn/f+Ty9O4kDAAAAePgcP35cxYoVS7eeICUpX758kv5+sTw8PHJ4NAAAAABySkJCgvz9/c2MkB6ClGSezufh4UGQAgAAAHDXS36YbAIAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwyCWnB/AwqTb4y5weAvCvFPtBl5weAgAAgAOOSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWJSjQapEiRKy2Wyplj59+kiSrl27pj59+qhgwYLKmzevwsLCFB8f79DHsWPH1Lx5c+XOnVve3t4aPHiwbt68mRObAwAAAOAhkaNBatu2bTp58qS5rFy5UpL0/PPPS5IGDBigxYsXa968eVq/fr1OnDih5557znx+UlKSmjdvruvXr2vz5s2aOXOmoqKiNGLEiBzZHgAAAAAPB5thGEZODyJF//79tWTJEh04cEAJCQkqXLiwoqOj1bp1a0nSvn37FBgYqJiYGNWsWVPLli1TixYtdOLECfn4+EiSpk+friFDhuj06dNyc3NLcz2JiYlKTEw0HyckJMjf318XLlyQh4dHtm1ftcFfZlvfwIMs9oMuOT0EAADwkEhISJCnp+dds8F9c43U9evXNXv2bL3wwguy2WyKjY3VjRs3FBISYrYpV66cihcvrpiYGElSTEyMKlasaIYoSQoNDVVCQoJ2796d7rrGjh0rT09Pc/H398++DQMAAADwwLlvgtTChQt1/vx5de3aVZIUFxcnNzc35c+f36Gdj4+P4uLizDa3hqiU+pS69AwdOlQXLlwwl+PHj2fdhgAAAAB44Lnk9ABSzJgxQ02bNpWfn1+2r8tut8tut2f7egAAAAA8mO6LI1JHjx7VqlWr1KNHD7PM19dX169f1/nz5x3axsfHy9fX12xz+yx+KY9T2gAAAABAVrsvglRkZKS8vb3VvHlzs6xatWpydXXV6tWrzbL9+/fr2LFjCg4OliQFBwfr119/1alTp8w2K1eulIeHh4KCgu7dBgAAAAB4qOT4qX3JycmKjIxUeHi4XFz+fzienp7q3r27Bg4cKC8vL3l4eKhfv34KDg5WzZo1JUmNGzdWUFCQOnfurHHjxikuLk7Dhw9Xnz59OHUPAAAAQLbJ8SC1atUqHTt2TC+88EKquokTJ8rJyUlhYWFKTExUaGiopk6datY7OztryZIl6t27t4KDg5UnTx6Fh4crIiLiXm4CAAAAgIfMfXUfqZyS0bni/ynuIwVkDveRAgAA98q/7j5SAAAAAPBvQZACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAW5XiQ+vPPP9WpUycVLFhQuXLlUsWKFbV9+3az3jAMjRgxQkWKFFGuXLkUEhKiAwcOOPRx9uxZdezYUR4eHsqfP7+6d++uS5cu3etNAQAAAPCQyNEgde7cOT355JNydXXVsmXLtGfPHo0fP14FChQw24wbN04fffSRpk+frq1btypPnjwKDQ3VtWvXzDYdO3bU7t27tXLlSi1ZskQbNmxQr169cmKTAAAAADwEbIZhGDm18jfeeEObNm3Sxo0b06w3DEN+fn567bXXNGjQIEnShQsX5OPjo6ioKLVr10579+5VUFCQtm3bpurVq0uSli9frmbNmumPP/6Qn59fqn4TExOVmJhoPk5ISJC/v78uXLggDw+PbNjSv1Ub/GW29Q08yGI/6JLTQwAAAA+JhIQEeXp63jUb5OgRqUWLFql69ep6/vnn5e3trccee0yff/65WX/48GHFxcUpJCTELPP09FSNGjUUExMjSYqJiVH+/PnNECVJISEhcnJy0tatW9Nc79ixY+Xp6Wku/v7+2bSFAAAAAB5EORqkfv/9d02bNk1ly5bVjz/+qN69e+uVV17RzJkzJUlxcXGSJB8fH4fn+fj4mHVxcXHy9vZ2qHdxcZGXl5fZ5nZDhw7VhQsXzOX48eNZvWkAAAAAHmAuObny5ORkVa9eXe+++64k6bHHHtNvv/2m6dOnKzw8PNvWa7fbZbfbs61/AAAAAA+2HD0iVaRIEQUFBTmUBQYG6tixY5IkX19fSVJ8fLxDm/j4eLPO19dXp06dcqi/efOmzp49a7YBAAAAgKyUo0HqySef1P79+x3K/ve//ykgIECSVLJkSfn6+mr16tVmfUJCgrZu3arg4GBJUnBwsM6fP6/Y2FizzZo1a5ScnKwaNWrcg60AAAAA8LDJ0VP7BgwYoFq1aundd99VmzZt9PPPP+uzzz7TZ599Jkmy2Wzq37+/3n77bZUtW1YlS5bUW2+9JT8/P7Vq1UrS30ewmjRpop49e2r69Om6ceOG+vbtq3bt2qU5Yx8AAAAA/FM5GqQef/xxLViwQEOHDlVERIRKliypSZMmqWPHjmab119/XZcvX1avXr10/vx51a5dW8uXL5e7u7vZZs6cOerbt68aNmwoJycnhYWF6aOPPsqJTQIAAADwEMjR+0jdLzI6V/w/xX2kgMzhPlIAAOBe+VfcRwoAAAAA/o0IUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFjkktMDAICHTbXBX+b0EIB/ndgPuuT0ELIUnwNA5txPnwUckQIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAW5WiQGjVqlGw2m8NSrlw5s/7atWvq06ePChYsqLx58yosLEzx8fEOfRw7dkzNmzdX7ty55e3trcGDB+vmzZv3elMAAAAAPERy/D5S5cuX16pVq8zHLi7/P6QBAwZo6dKlmjdvnjw9PdW3b18999xz2rRpkyQpKSlJzZs3l6+vrzZv3qyTJ0+qS5cucnV11bvvvnvPtwUAAADAwyHHg5SLi4t8fX1TlV+4cEEzZsxQdHS0GjRoIEmKjIxUYGCgtmzZopo1a2rFihXas2ePVq1aJR8fH1WpUkVjxozRkCFDNGrUKLm5uaW5zsTERCUmJpqPExISsmfjAAAAADyQcvwaqQMHDsjPz0+lSpVSx44ddezYMUlSbGysbty4oZCQELNtuXLlVLx4ccXExEiSYmJiVLFiRfn4+JhtQkNDlZCQoN27d6e7zrFjx8rT09Nc/P39s2nrAAAAADyIcjRI1ahRQ1FRUVq+fLmmTZumw4cPq06dOrp48aLi4uLk5uam/PnzOzzHx8dHcXFxkqS4uDiHEJVSn1KXnqFDh+rChQvmcvz48azdMAAAAAAPtBw9ta9p06bmvytVqqQaNWooICBA33zzjXLlypVt67Xb7bLb7dnWPwAAAIAHW46f2ner/Pnz65FHHtHBgwfl6+ur69ev6/z58w5t4uPjzWuqfH19U83il/I4reuuAAAAACAr3FdB6tKlSzp06JCKFCmiatWqydXVVatXrzbr9+/fr2PHjik4OFiSFBwcrF9//VWnTp0y26xcuVIeHh4KCgq65+MHAAAA8HDI0VP7Bg0apJYtWyogIEAnTpzQyJEj5ezsrPbt28vT01Pdu3fXwIED5eXlJQ8PD/Xr10/BwcGqWbOmJKlx48YKCgpS586dNW7cOMXFxWn48OHq06cPp+4BAAAAyDY5GqT++OMPtW/fXmfOnFHhwoVVu3ZtbdmyRYULF5YkTZw4UU5OTgoLC1NiYqJCQ0M1depU8/nOzs5asmSJevfureDgYOXJk0fh4eGKiIjIqU0CAAAA8BDI0SA1d+7cO9a7u7trypQpmjJlSrptAgIC9MMPP2T10AAAAAAgXffVNVIAAAAA8G9AkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsyFaRKlSqlM2fOpCo/f/68SpUq9Y8HBQAAAAD3s0wFqSNHjigpKSlVeWJiov78889/PCgAAAAAuJ+5WGm8aNEi898//vijPD09zcdJSUlavXq1SpQokWWDAwAAAID7kaUg1apVK0mSzWZTeHi4Q52rq6tKlCih8ePHZ9ngAAAAAOB+ZClIJScnS5JKliypbdu2qVChQtkyKAAAAAC4n1kKUikOHz6c1eMAAAAAgH+NTAUpSVq9erVWr16tU6dOmUeqUnzxxRf/eGAAAAAAcL/KVJAaPXq0IiIiVL16dRUpUkQ2my2rxwUAAAAA961MBanp06crKipKnTt3zurxAAAAAMB9L1P3kbp+/bpq1aqV1WMBAAAAgH+FTAWpHj16KDo6OqvHAgAAAAD/Cpk6te/atWv67LPPtGrVKlWqVEmurq4O9RMmTMiSwQEAAADA/ShTQeqXX35RlSpVJEm//fabQx0TTwAAAAB40GXq1L61a9emu6xZsyZTA3nvvfdks9nUv39/s+zatWvq06ePChYsqLx58yosLEzx8fEOzzt27JiaN2+u3Llzy9vbW4MHD9bNmzczNQYAAAAAyIhMBamstm3bNn366aeqVKmSQ/mAAQO0ePFizZs3T+vXr9eJEyf03HPPmfVJSUlq3ry5rl+/rs2bN2vmzJmKiorSiBEj7vUmAAAAAHiIZOrUvvr169/xFD4rR6UuXbqkjh076vPPP9fbb79tll+4cEEzZsxQdHS0GjRoIEmKjIxUYGCgtmzZopo1a2rFihXas2ePVq1aJR8fH1WpUkVjxozRkCFDNGrUKLm5uWVm8wAAAADgjjJ1RKpKlSqqXLmyuQQFBen69evasWOHKlasaKmvPn36qHnz5goJCXEoj42N1Y0bNxzKy5Urp+LFiysmJkaSFBMTo4oVK8rHx8dsExoaqoSEBO3evTvddSYmJiohIcFhAQAAAICMytQRqYkTJ6ZZPmrUKF26dCnD/cydO1c7duzQtm3bUtXFxcXJzc1N+fPndyj38fFRXFyc2ebWEJVSn1KXnrFjx2r06NEZHicAAAAA3CpLr5Hq1KmTvvjiiwy1PX78uF599VXNmTNH7u7uWTmMuxo6dKguXLhgLsePH7+n6wcAAADw75alQSomJibDoSg2NlanTp1S1apV5eLiIhcXF61fv14fffSRXFxc5OPjo+vXr+v8+fMOz4uPj5evr68kydfXN9UsfimPU9qkxW63y8PDw2EBAAAAgIzK1Kl9t86cJ0mGYejkyZPavn273nrrrQz10bBhQ/36668OZd26dVO5cuU0ZMgQ+fv7y9XVVatXr1ZYWJgkaf/+/Tp27JiCg4MlScHBwXrnnXd06tQpeXt7S5JWrlwpDw8PBQUFZWbTAAAAAOCuMhWkPD09HR47OTnp0UcfVUREhBo3bpyhPvLly6cKFSo4lOXJk0cFCxY0y7t3766BAwfKy8tLHh4e6tevn4KDg1WzZk1JUuPGjRUUFKTOnTtr3LhxiouL0/Dhw9WnTx/Z7fbMbBoAAAAA3FWmglRkZGRWjyNNEydOlJOTk8LCwpSYmKjQ0FBNnTrVrHd2dtaSJUvUu3dvBQcHK0+ePAoPD1dERMQ9GR8AAACAh1OmglSK2NhY7d27V5JUvnx5PfbYY/9oMOvWrXN47O7urilTpmjKlCnpPicgIEA//PDDP1ovAAAAAFiRqSB16tQptWvXTuvWrTOnJz9//rzq16+vuXPnqnDhwlk5RgAAAAC4r2Rq1r5+/frp4sWL2r17t86ePauzZ8/qt99+U0JCgl555ZWsHiMAAAAA3FcydURq+fLlWrVqlQIDA82yoKAgTZkyJcOTTQAAAADAv1WmjkglJyfL1dU1Vbmrq6uSk5P/8aAAAAAA4H6WqSDVoEEDvfrqqzpx4oRZ9ueff2rAgAFq2LBhlg0OAAAAAO5HmQpSn3zyiRISElSiRAmVLl1apUuXVsmSJZWQkKCPP/44q8cIAAAAAPeVTF0j5e/vrx07dmjVqlXat2+fJCkwMFAhISFZOjgAAAAAuB9ZOiK1Zs0aBQUFKSEhQTabTY0aNVK/fv3Ur18/Pf744ypfvrw2btyYXWMFAAAAgPuCpSA1adIk9ezZUx4eHqnqPD099eKLL2rChAlZNjgAAAAAuB9ZClK7du1SkyZN0q1v3LixYmNj//GgAAAAAOB+ZilIxcfHpznteQoXFxedPn36Hw8KAAAAAO5nloJU0aJF9dtvv6Vb/8svv6hIkSL/eFAAAAAAcD+zFKSaNWumt956S9euXUtVd/XqVY0cOVItWrTIssEBAAAAwP3I0vTnw4cP1/z58/XII4+ob9++evTRRyVJ+/bt05QpU5SUlKRhw4Zly0ABAAAA4H5hKUj5+Pho8+bN6t27t4YOHSrDMCRJNptNoaGhmjJlinx8fLJloAAAAABwv7B8Q96AgAD98MMPOnfunA4ePCjDMFS2bFkVKFAgO8YHAAAAAPcdy0EqRYECBfT4449n5VgAAAAA4F/B0mQTAAAAAACCFAAAAABYRpACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARTkapKZNm6ZKlSrJw8NDHh4eCg4O1rJly8z6a9euqU+fPipYsKDy5s2rsLAwxcfHO/Rx7NgxNW/eXLlz55a3t7cGDx6smzdv3utNAQAAAPAQydEgVaxYMb333nuKjY3V9u3b1aBBAz3zzDPavXu3JGnAgAFavHix5s2bp/Xr1+vEiRN67rnnzOcnJSWpefPmun79ujZv3qyZM2cqKipKI0aMyKlNAgAAAPAQcMnJlbds2dLh8TvvvKNp06Zpy5YtKlasmGbMmKHo6Gg1aNBAkhQZGanAwEBt2bJFNWvW1IoVK7Rnzx6tWrVKPj4+qlKlisaMGaMhQ4Zo1KhRcnNzy4nNAgAAAPCAu2+ukUpKStLcuXN1+fJlBQcHKzY2Vjdu3FBISIjZply5cipevLhiYmIkSTExMapYsaJ8fHzMNqGhoUpISDCPaqUlMTFRCQkJDgsAAAAAZFSOB6lff/1VefPmld1u10svvaQFCxYoKChIcXFxcnNzU/78+R3a+/j4KC4uTpIUFxfnEKJS6lPq0jN27Fh5enqai7+/f9ZuFAAAAIAHWo4HqUcffVQ7d+7U1q1b1bt3b4WHh2vPnj3Zus6hQ4fqwoUL5nL8+PFsXR8AAACAB0uOXiMlSW5ubipTpowkqVq1atq2bZsmT56stm3b6vr16zp//rzDUan4+Hj5+vpKknx9ffXzzz879Jcyq19Km7TY7XbZ7fYs3hIAAAAAD4scPyJ1u+TkZCUmJqpatWpydXXV6tWrzbr9+/fr2LFjCg4OliQFBwfr119/1alTp8w2K1eulIeHh4KCgu752AEAAAA8HHL0iNTQoUPVtGlTFS9eXBcvXlR0dLTWrVunH3/8UZ6enurevbsGDhwoLy8veXh4qF+/fgoODlbNmjUlSY0bN1ZQUJA6d+6scePGKS4uTsOHD1efPn044gQAAAAg2+RokDp16pS6dOmikydPytPTU5UqVdKPP/6oRo0aSZImTpwoJycnhYWFKTExUaGhoZo6dar5fGdnZy1ZskS9e/dWcHCw8uTJo/DwcEVEROTUJgEAAAB4CORokJoxY8Yd693d3TVlyhRNmTIl3TYBAQH64YcfsnpoAAAAAJCu++4aKQAAAAC43xGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARTkapMaOHavHH39c+fLlk7e3t1q1aqX9+/c7tLl27Zr69OmjggULKm/evAoLC1N8fLxDm2PHjql58+bKnTu3vL29NXjwYN28efNebgoAAACAh0iOBqn169erT58+2rJli1auXKkbN26ocePGunz5stlmwIABWrx4sebNm6f169frxIkTeu6558z6pKQkNW/eXNevX9fmzZs1c+ZMRUVFacSIETmxSQAAAAAeAi45ufLly5c7PI6KipK3t7diY2P11FNP6cKFC5oxY4aio6PVoEEDSVJkZKQCAwO1ZcsW1axZUytWrNCePXu0atUq+fj4qEqVKhozZoyGDBmiUaNGyc3NLSc2DQAAAMAD7L66RurChQuSJC8vL0lSbGysbty4oZCQELNNuXLlVLx4ccXExEiSYmJiVLFiRfn4+JhtQkNDlZCQoN27d6e5nsTERCUkJDgsAAAAAJBR902QSk5OVv/+/fXkk0+qQoUKkqS4uDi5ubkpf/78Dm19fHwUFxdntrk1RKXUp9SlZezYsfL09DQXf3//LN4aAAAAAA+y+yZI9enTR7/99pvmzp2b7esaOnSoLly4YC7Hjx/P9nUCAAAAeHDk6DVSKfr27aslS5Zow4YNKlasmFnu6+ur69ev6/z58w5HpeLj4+Xr62u2+fnnnx36S5nVL6XN7ex2u+x2exZvBQAAAICHRY4ekTIMQ3379tWCBQu0Zs0alSxZ0qG+WrVqcnV11erVq82y/fv369ixYwoODpYkBQcH69dff9WpU6fMNitXrpSHh4eCgoLuzYYAAAAAeKjk6BGpPn36KDo6Wt9//73y5ctnXtPk6empXLlyydPTU927d9fAgQPl5eUlDw8P9evXT8HBwapZs6YkqXHjxgoKClLnzp01btw4xcXFafjw4erTpw9HnQAAAABkixwNUtOmTZMk1atXz6E8MjJSXbt2lSRNnDhRTk5OCgsLU2JiokJDQzV16lSzrbOzs5YsWaLevXsrODhYefLkUXh4uCIiIu7VZgAAAAB4yORokDIM465t3N3dNWXKFE2ZMiXdNgEBAfrhhx+ycmgAAAAAkK77ZtY+AAAAAPi3IEgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLcjRIbdiwQS1btpSfn59sNpsWLlzoUG8YhkaMGKEiRYooV65cCgkJ0YEDBxzanD17Vh07dpSHh4fy58+v7t2769KlS/dwKwAAAAA8bHI0SF2+fFmVK1fWlClT0qwfN26cPvroI02fPl1bt25Vnjx5FBoaqmvXrpltOnbsqN27d2vlypVasmSJNmzYoF69et2rTQAAAADwEHLJyZU3bdpUTZs2TbPOMAxNmjRJw4cP1zPPPCNJ+vLLL+Xj46OFCxeqXbt22rt3r5YvX65t27apevXqkqSPP/5YzZo104cffig/P797ti0AAAAAHh737TVShw8fVlxcnEJCQswyT09P1ahRQzExMZKkmJgY5c+f3wxRkhQSEiInJydt3bo13b4TExOVkJDgsAAAAABARt23QSouLk6S5OPj41Du4+Nj1sXFxcnb29uh3sXFRV5eXmabtIwdO1aenp7m4u/vn8WjBwAAAPAgu2+DVHYaOnSoLly4YC7Hjx/P6SEBAAAA+Be5b4OUr6+vJCk+Pt6hPD4+3qzz9fXVqVOnHOpv3ryps2fPmm3SYrfb5eHh4bAAAAAAQEbdt0GqZMmS8vX11erVq82yhIQEbd26VcHBwZKk4OBgnT9/XrGxsWabNWvWKDk5WTVq1LjnYwYAAADwcMjRWfsuXbqkgwcPmo8PHz6snTt3ysvLS8WLF1f//v319ttvq2zZsipZsqTeeust+fn5qVWrVpKkwMBANWnSRD179tT06dN148YN9e3bV+3atWPGPgAAAADZJkeD1Pbt21W/fn3z8cCBAyVJ4eHhioqK0uuvv67Lly+rV69eOn/+vGrXrq3ly5fL3d3dfM6cOXPUt29fNWzYUE5OTgoLC9NHH310z7cFAAAAwMMjR4NUvXr1ZBhGuvU2m00RERGKiIhIt42Xl5eio6OzY3gAAAAAkKb79hopAAAAALhfEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFD0yQmjJlikqUKCF3d3fVqFFDP//8c04PCQAAAMAD6oEIUl9//bUGDhyokSNHaseOHapcubJCQ0N16tSpnB4aAAAAgAfQAxGkJkyYoJ49e6pbt24KCgrS9OnTlTt3bn3xxRc5PTQAAAAADyCXnB7AP3X9+nXFxsZq6NChZpmTk5NCQkIUExOT5nMSExOVmJhoPr5w4YIkKSEhIVvHmpR4NVv7Bx5U2b1v3mt8FgDW8TkAQLo3nwUp6zAM447t/vVB6q+//lJSUpJ8fHwcyn18fLRv3740nzN27FiNHj06Vbm/v3+2jBHAP+P58Us5PQQAOYzPAQDSvf0suHjxojw9PdOt/9cHqcwYOnSoBg4caD5OTk7W2bNnVbBgQdlsthwcGXJKQkKC/P39dfz4cXl4eOT0cADkAD4HAPA5AOnvI1EXL16Un5/fHdv964NUoUKF5OzsrPj4eIfy+Ph4+fr6pvkcu90uu93uUJY/f/7sGiL+RTw8PPjgBB5yfA4A4HMAdzoSleJfP9mEm5ubqlWrptWrV5tlycnJWr16tYKDg3NwZAAAAAAeVP/6I1KSNHDgQIWHh6t69ep64oknNGnSJF2+fFndunXL6aEBAAAAeAA9EEGqbdu2On36tEaMGKG4uDhVqVJFy5cvTzUBBZAeu92ukSNHpjrlE8DDg88BAHwOwAqbcbd5/QAAAAAADv7110gBAAAAwL1GkAIAAAAAiwhSAAAAAGARQQr3jSNHjshms2nnzp2Z7sNms2nhwoVZNqasdOXKFYWFhcnDw0M2m03nz5/X+fPnZbPZtG7dOkVFRaW6n9m6devMtpLSbAP8m2T1e/r2/jIruz47Ro0aJR8fH4f+q1SpolGjRqX7mXdr26z4XATud/Xq1VP//v2zrL9Ro0apSpUqWdafVYZhqFevXvLy8nLYf/Pnz6+oqKg0P7du39ez6rMN2euBmLUP91bLli1148YNLV++PFXdxo0b9dRTT2nXrl2qVKlSDozu/jVz5kxt3LhRmzdvVqFChcwbvZ08eVJeXl6qUaOGmjVrlsOjBLJGTEyMateurSZNmmjp0qU5PZwcsXfvXo0ePVoLFixQzZo1VaBAAUnS6tWrZbfblStXLp08eVKFChXK4ZEC2a9r166aOXNmqvIDBw7kwGj+Dm9VqlTRpEmTsrzv5cuXm4GpVKlS5j7+v//9T/ny5ZOzs7NOnjyZoRu+4v5GkIJl3bt3V1hYmP744w8VK1bMoS4yMlLVq1e3HKKuX7+elUPMMMMwlJSUJBeX7N8VDh06pMDAQFWoUMGh3NfX1/x3rly5sn0cwL0wY8YM9evXTzNmzNCJEyfk5+eX00O65w4dOiRJeuaZZ2Sz2czyggULmv++df8HHnRNmjRRZGSkQ1nhwoVzaDTZ59ChQypSpIhq1arlUO7t7W3+m33/wcCpfbCsRYsWKly4sKKiohzKL126pHnz5ql79+766aefVKdOHeXKlUv+/v565ZVXdPnyZbNtiRIlNGbMGHXp0kUeHh7q1atXqvUkJSXphRdeULly5XTs2DFJ0vfff6+qVavK3d1dpUqV0ujRo3Xz5k2H5/3111969tlnlTt3bpUtW1aLFi0y61IOlS9btkzVqlWT3W7XTz/9pF27dql+/frKly+fPDw8VK1aNW3fvl1S2qcITJo0SSVKlDAfd+3aVa1atdKHH36oIkWKqGDBgurTp49u3Lgh6e9fvsaPH68NGzbIZrOpXr16kv4OntWqVVPevHnl6+urjh076tSpU5b+Hhl5TYB76dKlS/r666/Vu3dvNW/ePNVnxZ2cPn1a1atX17PPPqvExEQlJibqlVdekbe3t9zd3VW7dm1t27Yt1fNiY2NVvXp15c6dW7Vq1dL+/fsd6qdNm6bSpUvLzc1Njz76qGbNmpXuGNI6pWbnzp2y2Ww6cuSIpP8/JfHHH39UYGCg8ubNqyZNmujkyZOS/v7caNmypSTJycnJDFKbN29WSEiIChYsKE9PT9WrV087duzI8OsjSb/99puaNm2qvHnzysfHR507d9Zff/1lqQ8gJ9jtdvn6+joszs7OqdqldaptymlxKf744w+1b99eXl5eypMnj6pXr66tW7c6PGfWrFkqUaKEPD091a5dO128eFHS3/9nr1+/XpMnT5bNZjP37bRONV64cKHDDyEp3wnu1He/fv107Ngx2Ww287vC4sWL9eSTTyp//vwqWLCgWrRoYf7YklF3+26Fe48gBctcXFzUpUsXRUVF6dbbkM2bN09JSUkKDg5WkyZNFBYWpl9++UVff/21fvrpJ/Xt29ehnw8//FCVK1fWf//7X7311lsOdYmJiXr++ee1c+dObdy4UcWLF9fGjRvVpUsXvfrqq9qzZ48+/fRTRUVF6Z133nF47ujRo9WmTRv98ssvatasmTp27KizZ886tHnjjTf03nvvae/evapUqZI6duyoYsWKadu2bYqNjdUbb7whV1dXS6/L2rVrdejQIa1du1YzZ85UVFSU+aE/f/589ezZU8HBwTp58qTmz58vSbpx44beffdd/fLLL1qwYIEOHz6srl27ZnidGX1NgHvpm2++Ubly5fToo4+qU6dO+uKLL5SRWxYeP35cderUUYUKFfTtt9/Kbrfr9ddf13fffaeZM2dqx44dKlOmjEJDQ1Pt08OGDdP48eO1fft2ubi46IUXXjDrFixYoFdffVWvvfaafvvtN7344ovq1q2b1q5d+4+288qVK/rwww81a9YsbdiwQceOHdOgQYMkSYMGDTJ/eT958qQZsC5duqQXXnhBmzdvVkxMjMqUKaNmzZqZX8Lu5vz582rQoIEee+wxbd++XcuXL1d8fLzatGnzj7YF+De5dOmS6tatqz///FOLFi3Srl279Prrrys5Odlsc+jQIS1cuFBLlizRkiVLtH79er333nuSpMmTJys4OFg9e/Y0909/f/8Mr/9ufUdERKhYsWI6efKk+cPPlStXNHjwYG3fvl2rVq2Sk5OTnn32WYcx322dGfluhXvMADJh7969hiRj7dq1ZlmdOnWMTp06Gd27dzd69erl0H7jxo2Gk5OTcfXqVcMwDCMgIMBo1aqVQ5vDhw8bkoyNGzcaDRs2NGrXrm2cP3/erG/YsKHx7rvvOjxn1qxZRpEiRczHkozhw4ebjy9dumRIMpYtW2YYhmGsXbvWkGQsXLjQoZ98+fIZUVFRaW7ryJEjjcqVKzuUTZw40QgICDAfh4eHGwEBAcbNmzfNsueff95o27at+fjVV1816tatm+Y6Umzbts2QZFy8eNFhvOfOnTMMwzAiIyMNT09Ps31GXhPgXqtVq5YxadIkwzAM48aNG0ahQoXMz4r03tP79u0z/P39jVdeecVITk42DOPv/dfV1dWYM2eO2ff169cNPz8/Y9y4cQ79rVq1ymyzdOlSQ5L5eVOrVi2jZ8+eDmN8/vnnjWbNmpmPJRkLFixIc4yGYRj//e9/DUnG4cOHzXFLMg4ePGi2mTJliuHj42M+XrBggXG3/2aTkpKMfPnyGYsXL05zLCmfi//9738NwzCMMWPGGI0bN3bo4/jx44YkY//+/XdcF5CTwsPDDWdnZyNPnjzm0rp1a8MwDKNu3brGq6++ara9dR9I4enpaURGRhqGYRiffvqpkS9fPuPMmTNprmvkyJFG7ty5jYSEBLNs8ODBRo0aNczHt6/TMFL/H2sYqffjjPR9+3eEtJw+fdqQZPz666+GYaTe12//HMrIdyvcexyRQqaUK1dOtWrV0hdffCFJOnjwoDZu3Kju3btr165dioqKUt68ec0lNDRUycnJOnz4sNlH9erV0+y7ffv2unz5slasWOFwIeauXbsUERHh0G/Kr0lXrlwx2916fVaePHnk4eGR6nS529c9cOBA9ejRQyEhIXrvvfcsH26XpPLlyzucolCkSJG7nqYXGxurli1bqnjx4sqXL5/q1q0rSeapjHeT0dcEuFf279+vn3/+We3bt5f09xHstm3basaMGek+5+rVq6pTp46ee+4581Qb6e9fYG/cuKEnn3zSbOvq6qonnnhCe/fudejj1v2+SJEikmTuf3v37nXoQ5KefPLJVH1YlTt3bpUuXdphvXfb5+Pj49WzZ0+VLVtWnp6e8vDw0KVLlyzt82vXrnXY58uVKydJmfrcAu6l+vXra+fOneby0UcfZaqfnTt36rHHHpOXl1e6bUqUKKF8+fKZjzOyf2ZUZvo+cOCA2rdvr1KlSsnDw8M85c/Kvp+R71a4t5hsApnWvXt39evXT1OmTFFkZKRKly6tunXr6tKlS3rxxRf1yiuvpHpO8eLFzX/nyZMnzX6bNWum2bNnKyYmRg0aNDDLL126pNGjR+u5555L9Rx3d3fz37efkmez2VIdOr993aNGjVKHDh20dOlSLVu2TCNHjtTcuXP17LPPysnJKdVpSSnXPt0qI+u91eXLlxUaGqrQ0FDNmTNHhQsX1rFjxxQaGprhyTcy+poA98qMGTN08+ZNh8klDMOQ3W7XJ598kuZz7Ha7QkJCtGTJEg0ePFhFixa1vN5b97+UIJbRU2Zu5+T092+Mt+73Gd3nb/+suF14eLjOnDmjyZMnKyAgQHa7XcHBwZb2+ZYtW+r9999PVZcSIIH7VZ48eVSmTJm7tktrX7p1H8zIxExW/0+WlG3/30t/z3gcEBCgzz//XH5+fkpOTlaFChUs7fsZ+W6Fe4sghUxr06aNXn31VUVHR+vLL79U7969ZbPZVLVqVe3ZsydDH5Zp6d27typUqKCnn35aS5cuNY/SVK1aVfv37890v3fzyCOP6JFHHtGAAQPUvn17RUZG6tlnn1XhwoUVFxcnwzDML2hZcU+Xffv26cyZM3rvvffMc7NTJrjIqOx+TQArbt68qS+//FLjx49X48aNHepatWqlr776yjx6cisnJyfNmjVLHTp0UP369bVu3Tr5+fmZk0Ns2rRJAQEBkv7+UrNt2zZL95wJDAzUpk2bFB4ebpZt2rRJQUFBabZPmUXs5MmT5pTlWXUfp02bNmnq1KnmrQ6OHz9uaaKIqlWr6rvvvlOJEiXuyWyjQE4oXLiweV2h9PfRnNvPPPnPf/6js2fP3vGo1J24ubkpKSkp1XovXryoy5cvmz+4ZsW+f+bMGe3fv1+ff/656tSpI+nviSOs+KffrZA9OLUPmZY3b161bdtWQ4cO1cmTJ81JEoYMGaLNmzerb9++2rlzpw4cOKDvv//e0gWR/fr109tvv60WLVqYHzYjRozQl19+qdGjR2v37t3au3ev5s6dq+HDh/+j7bh69ar69u2rdevW6ejRo9q0aZO2bdumwMBASX/PuHf69GmNGzdOhw4d0pQpU7Rs2bJ/tE7p71+Q3Nzc9PHHH+v333/XokWLNGbMGEt9ZNdrAmTGkiVLdO7cOXXv3l0VKlRwWMLCwu54ep+zs7PmzJmjypUrq0GDBoqLi1OePHnUu3dvDR48WMuXL9eePXvUs2dPXblyRd27d8/wuAYPHqyoqChNmzZNBw4c0IQJEzR//nxzYojblSlTRv7+/ho1apQOHDigpUuXavz48ZZfj7SULVtWs2bN0t69e7V161Z17NjR0m0P+vTpo7Nnz6p9+/batm2bDh06pB9//FHdunVL9aUQ+Ldq0KCBPvnkE/33v//V9u3b9dJLLzkcBWrfvr18fX3VqlUrbdq0Sb///ru+++47xcTEZHgdJUqU0NatW3XkyBH99ddfSk5OVo0aNZQ7d269+eabOnTokKKjoy3NOpqeAgUKqGDBgvrss8908OBBrVmzRgMHDrTUR1Z8t0LWI0jhH+nevbvOnTun0NBQ81SeSpUqaf369frf//6nOnXq6LHHHtOIESMs30emf//+Gj16tJo1a6bNmzcrNDRUS5Ys0YoVK/T444+rZs2amjhxovlLdWY5OzvrzJkz6tKlix555BG1adNGTZs21ejRoyX9/Wv21KlTNWXKFFWuXFk///xzul/ArEiZQn7evHkKCgrSe++9pw8//NBSH9n1mgCZMWPGDIWEhKR5k8mwsDBt375dv/zyS7rPd3Fx0VdffaXy5curQYMGOnXqlN577z2FhYWpc+fOqlq1qg4ePKgff/zRPFKUEa1atdLkyZP14Ycfqnz58vr0008VGRlp3obgdq6urvrqq6+0b98+VapUSe+//77efvvtDK/vTmbMmKFz586patWq6ty5szm1e0b5+flp06ZNSkpKUuPGjVWxYkX1799f+fPnN09JBP7txo8fL39/f9WpU0cdOnTQoEGDlDt3brPezc1NK1askLe3t5o1a6aKFSvqvffeS3Mq9fQMGjRIzs7OCgoKMk+t9/Ly0uzZs/XDDz+oYsWK+uqrrzRq1Kh/vD1OTk6aO3euYmNjVaFCBQ0YMEAffPCBpT6y6rsVspbNuNsJ3QAAAAAAB/x8BQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAABZrmvXrrLZbHrppZdS1fXp00c2m01du3Y1y44fP64XXnhBfn5+cnNzU0BAgF599VWdOXPG4bn16tWTzWbT3LlzHconTZqkEiVKpFrX1atX5eXlpUKFCikxMVGSFBUVJZvNdsflyJEjunLlioYOHarSpUvL3d1dhQsXVt26dfX9999nqJ9u3bqlGk+5cuVkt9sVFxeXqq5evXrq379/qvKoqCjlz58/3ccAgJxBkAIAZAt/f3/NnTtXV69eNcuuXbum6OhoFS9e3Cz7/fffVb16dR04cEBfffWVDh48qOnTp2v16tUKDg7W2bNnHfp1d3fX8OHDdePGjbuO4bvvvlP58uVVrlw5LVy4UJLUtm1bnTx50lyCg4PVs2dPhzJ/f3+99NJLmj9/vj7++GPt27dPy5cvV+vWrc1wd3s/Kctbb70lNzc39ezZ02EsP/30k65evarWrVtr5syZmX1ZAQD3CZecHgAA4MFUtWpVHTp0SPPnz1fHjh0lSfPnz1fx4sVVsmRJs12fPn3k5uamFStWKFeuXJKk4sWL67HHHlPp0qU1bNgwTZs2zWzfvn17LVq0SJ9//rlefvnlO45hxowZ6tSpkwzD0IwZM9S2bVvlypXLXI8kubm5KXfu3PL19XV47qJFizR58mQ1a9ZMklSiRAlVq1bNrL+9H0lav369xo4dq2nTpqlWrVqpxtKhQwfVrVtXr776qoYMGXLX1xAAcP/iiBQAINu88MILioyMNB9/8cUXDqe8nT17Vj/++KNefvnlVKHE19dXHTt21Ndffy3DMMxyDw8PDRs2TBEREbp8+XK66z506JBiYmLUpk0btWnTRhs3btTRo0czPHZfX1/98MMPunjxYobaHz16VM8//7xefPFF9ejRw6Hu4sWLmjdvnjp16qRGjRrpwoUL2rhxY4bHAgC4/xCkAADZplOnTvrpp5909OhRHT16VJs2bVKnTp3M+gMHDsgwDAUGBqb5/MDAQJ07d06nT592KH/55Zfl7u6uCRMmpLvuL774Qk2bNlWBAgXk5eWl0NBQh1B3N5999pk2b96sggUL6vHHH9eAAQO0adOmNNteuXJFrVq1Uvny5TVp0qRU9XPnzlXZsmVVvnx5OTs7q127dpoxY0aqdlOnTlXevHkdlrSuMwMA5DyCFAAg2xQuXFjNmzdXVFSUIiMj1bx5cxUqVChVu1uPOGWE3W5XRESEPvzwQ/3111+p6pOSkjRz5kyH0NapUydFRUUpOTk5Q+t46qmn9Pvvv2v16tVq3bq1du/erTp16mjMmDGp2nbv3l3nz5/XvHnz5OKS+qz5L774ItVY5s2bl+poV8eOHbVz506HJSIiIkPjBQDcWwQpAEC2euGFFxQVFaWZM2fqhRdecKgrU6aMbDab9u7dm+Zz9+7dqwIFCqhw4cKp6jp16qSAgAC9/fbbqep+/PFH/fnnn2rbtq1cXFzk4uKidu3a6ejRo1q9enWGx+7q6qo6depoyJAhWrFihSIiIjRmzBhdv37dbPP+++9r8eLFWrhwYZohcc+ePdqyZYtef/11cyw1a9bUlStXUs0+6OnpqTJlyjgs3t7eGR4vAODeIUgBALJVkyZNdP36dd24cUOhoaEOdQULFlSjRo00depUh9n9JCkuLk5z5sxR27ZtZbPZUvXr5ORkTuxw5MgRh7oZM2aoXbt2qY7upHdKXUYFBQXp5s2bunbtmiRp2bJlGjZsmCIjI1W5cuU0nzNjxgw99dRT2rVrl8NYBg4c+I/GAgDIWczaBwDIVs7OzuYRJ2dn51T1n3zyiWrVqqXQ0FC9/fbbKlmypHbv3q3BgweraNGieuedd9Ltu3nz5qpRo4Y+/fRT+fj4SJJOnz6txYsXa9GiRapQoYJD+y5duujZZ5/V2bNn5eXldcdx16tXT+3bt1f16tVVsGBB7dmzR2+++abq168vDw8PHThwQB06dFCPHj1Up06dVPeGcnNzU758+TRr1ixFRESkGkuPHj00YcIE7d69W+XLl7/jWG6XlJSknTt3OpTZ7fZ0rzUDAGQ9jkgBALKdh4eHPDw80qwrW7astm/frlKlSqlNmzYqXbq0evXqpfr16ysmJuaugef99983jxBJ0pdffqk8efKoYcOGqdo2bNhQuXLl0uzZs+865tDQUM2cOVONGzdWYGCg+vXrp9DQUH3zzTeSpOjoaJ0/f16ffvqpihQpkmp57rnntGjRIp05c0bPPvtsqv4DAwMVGBiYqaNSly5d0mOPPeawtGzZ0nI/AIDMsxlWr/AFAAAAgIccR6QAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACL/g9GCd5F6xYCdwAAAABJRU5ErkJggg==", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "value_counts = data['MONATSZAHL'].value_counts()\n", "plt.figure(figsize=(10, 6))\n", "sns.barplot(x=value_counts.index, y=value_counts.values)\n", "plt.xlabel('MONATSZAHL')\n", "plt.ylabel('Count')\n", "plt.title('Distribution of Unique Values in MONATSZAHL')\n", "plt.show()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## i saw some redundant information here so i converted the months to only have the months because the year would already provide relevant insight" ] }, { "cell_type": "code", "execution_count": 128, "metadata": { "id": "oYWFkNVw6GuR" }, "outputs": [], "source": [ "def convert_to_month_name(year_month):\n", "\n", " if year_month == 'Summe':\n", " return 'Summe'\n", " month = str(year_month)[4:6]\n", "\n", "\n", " month_name = calendar.month_name[int(month)]\n", " return month_name\n", "data['MONAT'] = data['MONAT'].apply(convert_to_month_name)" ] }, { "cell_type": "code", "execution_count": 129, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 509 }, "id": "2nZgB2c28Vub", "outputId": "96251141-a163-49e9-ea10-6bfedc892cc6" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
count
MONAT
January140
March140
February140
April140
May140
September140
June140
July140
August140
November140
October140
December140
Summe114
\n", "

" ], "text/plain": [ "MONAT\n", "January 140\n", "March 140\n", "February 140\n", "April 140\n", "May 140\n", "September 140\n", "June 140\n", "July 140\n", "August 140\n", "November 140\n", "October 140\n", "December 140\n", "Summe 114\n", "Name: count, dtype: int64" ] }, "execution_count": 129, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data['MONAT'].value_counts()" ] }, { "cell_type": "code", "execution_count": 130, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 723 }, "id": "5Dh3SgWE82hi", "outputId": "5455f4dd-88ea-4626-8fc8-000cfcc0afb5" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
count
JAHR
201990
201890
201790
201690
201590
201490
201390
201290
201190
201090
200990
200890
200790
200690
200590
200490
200390
200290
200190
200084
\n", "

" ], "text/plain": [ "JAHR\n", "2019 90\n", "2018 90\n", "2017 90\n", "2016 90\n", "2015 90\n", "2014 90\n", "2013 90\n", "2012 90\n", "2011 90\n", "2010 90\n", "2009 90\n", "2008 90\n", "2007 90\n", "2006 90\n", "2005 90\n", "2004 90\n", "2003 90\n", "2002 90\n", "2001 90\n", "2000 84\n", "Name: count, dtype: int64" ] }, "execution_count": 130, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data['JAHR'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### after checking for data imbalances , now we can train the model, we first one hot all the categorical columns, another good approach according to me would be to just use cardinal encoding but my results didnt seem excessively differing so i went with one hot" ] }, { "cell_type": "code", "execution_count": 134, "metadata": { "id": "O1m3dRKbGTN9" }, "outputs": [], "source": [ "one_hot_columns = data.columns[0:4]" ] }, { "cell_type": "code", "execution_count": 136, "metadata": { "id": "poFSghx08_ig" }, "outputs": [], "source": [ "\n", "def one_hot_encode(data, one_hot_columns):\n", "\n", " data_copy = data.copy()\n", "\n", "\n", " encoder = OneHotEncoder(sparse_output=False)\n", "\n", "\n", " encoded_columns = encoder.fit_transform(data_copy[one_hot_columns])\n", "\n", "\n", " encoded_column_names = encoder.get_feature_names_out(one_hot_columns)\n", "\n", "\n", " encoded_df = pd.DataFrame(\n", " encoded_columns,\n", " columns=encoded_column_names,\n", " index=data_copy.index\n", " )\n", "\n", "\n", " result_df = pd.concat([\n", " data_copy.drop(columns=one_hot_columns),\n", " encoded_df\n", " ], axis=1)\n", "\n", " return result_df, encoder\n", "\n", "def transform_new_data(new_data, encoder, original_one_hot_columns):\n", "\n", " new_data_copy = new_data.copy()\n", "\n", "\n", " encoded_columns = encoder.transform(new_data_copy[original_one_hot_columns])\n", "\n", "\n", " encoded_column_names = encoder.get_feature_names_out(original_one_hot_columns)\n", "\n", "\n", " encoded_df = pd.DataFrame(\n", " encoded_columns,\n", " columns=encoded_column_names,\n", " index=new_data_copy.index\n", " )\n", "\n", "\n", " result_df = pd.concat([\n", " new_data_copy.drop(columns=original_one_hot_columns),\n", " encoded_df\n", " ], axis=1)\n", "\n", " return result_df" ] }, { "cell_type": "code", "execution_count": 139, "metadata": { "id": "3a18WFefHY3F" }, "outputs": [], "source": [ "data, encoder = one_hot_encode(data, one_hot_columns)" ] }, { "cell_type": "code", "execution_count": 93, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "n8x7Qli20z_-", "outputId": "bd6fd971-0f61-4a08-b5e1-9863b285ea71" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'BUILTIN_PREFETCH_PRESENT': True, 'CUDA_VERSION': [11, 8], 'DEBUG': False, 'GCC_VERSION': [10, 3, 1], 'MM_PREFETCH_PRESENT': True, 'NCCL_VERSION': [2, 16, 5], 'THRUST_VERSION': [1, 15, 1], 'USE_CUDA': True, 'USE_DLOPEN_NCCL': True, 'USE_FEDERATED': True, 'USE_NCCL': True, 'USE_OPENMP': True, 'USE_RMM': False, 'libxgboost': '/usr/local/lib/python3.10/dist-packages/xgboost/lib/libxgboost.so'}\n" ] } ], "source": [ "print(xgboost.build_info())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 3. finally training the model and downloading it as pkl to use in api" ] }, { "cell_type": "code", "execution_count": 142, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "EVn2xhzhzQVa", "outputId": "6218d7ed-0415-45d3-bb38-73cd009bdf5f" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fitting 3 folds for each of 243 candidates, totalling 729 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.10/dist-packages/xgboost/core.py:158: UserWarning: [15:01:28] WARNING: /workspace/src/common/error_msg.cc:27: The tree method `gpu_hist` is deprecated since 2.0.0. To use GPU training, set the `device` parameter to CUDA instead.\n", "\n", " E.g. tree_method = \"hist\", device = \"cuda\"\n", "\n", " warnings.warn(smsg, UserWarning)\n", "/usr/local/lib/python3.10/dist-packages/xgboost/core.py:158: UserWarning: [15:01:28] WARNING: /workspace/src/learner.cc:740: \n", "Parameters: { \"predictor\" } are not used.\n", "\n", " warnings.warn(smsg, UserWarning)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Best parameters found: {'colsample_bytree': 0.8, 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 300, 'subsample': 0.7}\n", " param_n_estimators param_learning_rate param_max_depth \\\n", "0 100 0.01 3 \n", "1 100 0.01 3 \n", "2 100 0.01 3 \n", "3 200 0.01 3 \n", "4 200 0.01 3 \n", ".. ... ... ... \n", "238 200 0.20 7 \n", "239 200 0.20 7 \n", "240 300 0.20 7 \n", "241 300 0.20 7 \n", "242 300 0.20 7 \n", "\n", " param_subsample param_colsample_bytree mean_test_score \n", "0 0.7 0.7 836731.772750 \n", "1 0.8 0.7 829332.460518 \n", "2 0.9 0.7 829277.959373 \n", "3 0.7 0.7 359457.295642 \n", "4 0.8 0.7 351281.456970 \n", ".. ... ... ... \n", "238 0.8 0.9 31653.504011 \n", "239 0.9 0.9 31929.875660 \n", "240 0.7 0.9 32102.913523 \n", "241 0.8 0.9 31576.184742 \n", "242 0.9 0.9 31894.048866 \n", "\n", "[243 rows x 6 columns]\n", "Mean Squared Error on the test set: 28483.477323930427\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.10/dist-packages/xgboost/core.py:158: UserWarning: [15:01:29] WARNING: /workspace/src/common/error_msg.cc:27: The tree method `gpu_hist` is deprecated since 2.0.0. To use GPU training, set the `device` parameter to CUDA instead.\n", "\n", " E.g. tree_method = \"hist\", device = \"cuda\"\n", "\n", " warnings.warn(smsg, UserWarning)\n" ] } ], "source": [ "X = data.drop(columns=['WERT'])\n", "y = data['WERT']\n", "\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "\n", "\n", "xgb = XGBRegressor(\n", " tree_method='gpu_hist',\n", " predictor='gpu_predictor',\n", " verbosity=2\n", ")\n", "\n", "\n", "param_grid = {\n", " 'n_estimators': [100, 200, 300],\n", " 'learning_rate': [0.01, 0.1, 0.2],\n", " 'max_depth': [3, 5, 7],\n", " 'subsample': [0.7, 0.8, 0.9],\n", " 'colsample_bytree': [0.7, 0.8, 0.9]\n", "}\n", "\n", "\n", "grid_search = GridSearchCV(\n", " estimator=xgb,\n", " param_grid=param_grid,\n", " cv=3,\n", " scoring='neg_mean_squared_error',\n", " verbose=2,\n", " n_jobs=-1\n", ")\n", "\n", "grid_search.fit(X_train, y_train)\n", "\n", "\n", "best_params = grid_search.best_params_\n", "print(\"Best parameters found: \", best_params)\n", "\n", "\n", "results = pd.DataFrame(grid_search.cv_results_)\n", "\n", "\n", "results['mean_test_score'] = -results['mean_test_score']\n", "\n", "\n", "print(results[['param_n_estimators', 'param_learning_rate', 'param_max_depth', 'param_subsample', 'param_colsample_bytree', 'mean_test_score']])\n", "\n", "\n", "best_model = grid_search.best_estimator_\n", "y_pred = best_model.predict(X_test)\n", "\n", "mse = mean_squared_error(y_test, y_pred)\n", "print(\"Mean Squared Error on the test set: \", mse)\n" ] }, { "cell_type": "code", "execution_count": 165, "metadata": { "id": "zcb7oWV0HwH5" }, "outputs": [], "source": [ "ex = pd.DataFrame({\n", " 'MONATSZAHL': ['Alkoholunfälle'],\n", " 'AUSPRAEGUNG': ['Verletzte und Getötete'],\n", " 'JAHR': [2012],\n", " 'MONAT': ['201207']\n", "})\n", "ex['MONAT'] = ex['MONAT'].apply(convert_to_month_name)\n", "new = transform_new_data(ex, encoder, one_hot_columns)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "EhSjmlIAOAGk" }, "outputs": [], "source": [ "with open('xgb.pkl', 'wb') as file:\n", " pickle.dump(best_model, file)\n", "\n", "with open('encoder.pkl', 'wb') as file:\n", " pickle.dump(encoder, file)" ] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "T4", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.6" } }, "nbformat": 4, "nbformat_minor": 0 }