{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "a1e514e1-c921-4fdb-a877-fef7a22d73cd", "metadata": {}, "outputs": [], "source": [ "import re\n", "import pandas as pd\n", "\n", "# ───────────────────────── Парсер ──────────────────────────\n", "def parse_smart_log(path_or_str, top_n=15):\n", " \"\"\"Принимает путь к .txt‑файлу или сам текст лога, \n", " отдаёт DataFrame со шагами, dev/test/gap и всеми гиперпараметрами.\"\"\"\n", " \n", " # читаем либо из файла, либо из уже переданной строки\n", " if '\\n' in path_or_str or 'Шаг' in path_or_str:\n", " lines = path_or_str.splitlines()\n", " else:\n", " with open(path_or_str, encoding='utf-8') as f:\n", " lines = f.readlines()\n", "\n", " rows, current = [], {}\n", "\n", " step_re = re.compile(\n", " r\"Шаг\\s+(\\d+):\\s*([^=]+?)=\\s*\\((.*?)\\)\"\n", " )\n", " mean_re = re.compile(r\"MEAN\\s*=\\s*([0-9.]+)\")\n", " gap_re = re.compile(r\"GAP\\s*=\\s*([+-]?[0-9.]+)\")\n", "\n", " for i, raw in enumerate(lines):\n", " line = raw.rstrip(\"\\n\")\n", " \n", " # ── 1. ищем строку «Шаг N: …» ───────────────────\n", " m = step_re.search(line)\n", " if m:\n", " # если предыдущий step уже набрал все метрики — сохраняем\n", " if current.get('dev') and current.get('test'):\n", " current.setdefault('gap', round(current['test'] - current['dev'], 4))\n", " rows.append(current)\n", " # начинаем новый шаг\n", " current = {'step': int(m.group(1))}\n", " \n", " keys = [k.strip() for k in m.group(2).split('+')]\n", " raw_vals = re.findall(r\"'[^']*'|[^,]+\", m.group(3))\n", " vals = [v.strip().strip(\"'\") for v in raw_vals]\n", " for k, v in zip(keys, vals):\n", " try:\n", " current[k] = eval(v) # превращаем 0.001 → float, 8 → int\n", " except Exception:\n", " current[k] = v # если это строка без кавычек\n", " \n", " # ── 2. «Результаты (DEV):» ───────────────────────\n", " if \"Результаты (DEV):\" in line:\n", " for j in range(i + 1, len(lines)):\n", " m = mean_re.search(lines[j])\n", " if m:\n", " current['dev'] = float(m.group(1))\n", " break\n", " \n", " # ── 3. «Результаты (TEST):» + GAP ────────────────\n", " if \"Результаты (TEST):\" in line:\n", " for j in range(i + 1, len(lines)):\n", " m = mean_re.search(lines[j])\n", " if m:\n", " current['test'] = float(m.group(1))\n", " break\n", " for j in range(i + 1, len(lines)):\n", " g = gap_re.search(lines[j])\n", " if g:\n", " current['gap'] = float(g.group(1))\n", " break\n", "\n", " # не забываем «добавить хвост»\n", " if current.get('dev') and current.get('test'):\n", " current.setdefault('gap', round(current['test'] - current['dev'], 4))\n", " rows.append(current)\n", "\n", " df = pd.DataFrame(rows)\n", " if not df.empty:\n", " df = df.sort_values('test', ascending=False)\n", " if top_n is not None:\n", " df = df.head(top_n)\n", " df = df.reset_index(drop=True)\n", " return df\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "d40405fe-1159-4d73-94ff-1084124840a1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodevtestgap
0370.000184adamplateau0.050.59040.57560.0148
1380.000184adamplateau0.100.59040.57560.0148
2470.000185adamhuggingface_cosine_with_restarts0.050.58770.57710.0106
3400.000184adamhuggingface_cosine_with_restarts0.100.58560.57510.0105
4640.0001165adamhuggingface_cosine_with_restarts0.100.58300.57050.0126
5450.000185adamplateau0.050.58140.5825-0.0011
6460.000185adamplateau0.100.58140.5825-0.0011
730.001084sgdhuggingface_cosine_with_restarts0.050.58100.57880.0022
840.001084sgdhuggingface_cosine_with_restarts0.100.58010.57700.0031
9480.000185adamhuggingface_cosine_with_restarts0.100.57980.57310.0066
10210.0010164adamplateau0.050.57970.56860.0111
11220.0010164adamplateau0.100.57970.56860.0111
12620.0001165adamplateau0.100.57950.57260.0069
13610.0001165adamplateau0.050.57950.57260.0069
14530.0001164adamplateau0.050.57850.56780.0107
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dev test gap\n", "0 37 0.0001 8 4 adam plateau 0.05 0.5904 0.5756 0.0148\n", "1 38 0.0001 8 4 adam plateau 0.10 0.5904 0.5756 0.0148\n", "2 47 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.5877 0.5771 0.0106\n", "3 40 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.5856 0.5751 0.0105\n", "4 64 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.5830 0.5705 0.0126\n", "5 45 0.0001 8 5 adam plateau 0.05 0.5814 0.5825 -0.0011\n", "6 46 0.0001 8 5 adam plateau 0.10 0.5814 0.5825 -0.0011\n", "7 3 0.0010 8 4 sgd huggingface_cosine_with_restarts 0.05 0.5810 0.5788 0.0022\n", "8 4 0.0010 8 4 sgd huggingface_cosine_with_restarts 0.10 0.5801 0.5770 0.0031\n", "9 48 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.5798 0.5731 0.0066\n", "10 21 0.0010 16 4 adam plateau 0.05 0.5797 0.5686 0.0111\n", "11 22 0.0010 16 4 adam plateau 0.10 0.5797 0.5686 0.0111\n", "12 62 0.0001 16 5 adam plateau 0.10 0.5795 0.5726 0.0069\n", "13 61 0.0001 16 5 adam plateau 0.05 0.5795 0.5726 0.0069\n", "14 53 0.0001 16 4 adam plateau 0.05 0.5785 0.5678 0.0107" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/10.txt\",25)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(15))" ] }, { "cell_type": "code", "execution_count": 3, "id": "248d2c65-2222-44b5-a83e-20e1c2048ba4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodevtestgap
0560.0001164adamhuggingface_cosine_with_restarts0.100.58910.57770.0114
130.001084sgdhuggingface_cosine_with_restarts0.050.58530.58230.0030
2530.0001164adamplateau0.050.58450.57740.0072
3540.0001164adamplateau0.100.58450.57740.0072
4300.0010165adamplateau0.100.58390.56940.0145
5290.0010165adamplateau0.050.58390.56940.0145
6620.0001165adamplateau0.100.58380.57750.0063
7610.0001165adamplateau0.050.58380.57750.0063
8380.000184adamplateau0.100.58340.5836-0.0002
9370.000184adamplateau0.050.58340.5836-0.0002
10400.000184adamhuggingface_cosine_with_restarts0.100.58220.56630.0159
11190.0010164sgdhuggingface_cosine_with_restarts0.050.58220.58030.0019
12210.0010164adamplateau0.050.58050.57400.0065
13220.0010164adamplateau0.100.58050.57400.0065
14200.0010164sgdhuggingface_cosine_with_restarts0.100.58030.57060.0097
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dev test gap\n", "0 56 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.5891 0.5777 0.0114\n", "1 3 0.0010 8 4 sgd huggingface_cosine_with_restarts 0.05 0.5853 0.5823 0.0030\n", "2 53 0.0001 16 4 adam plateau 0.05 0.5845 0.5774 0.0072\n", "3 54 0.0001 16 4 adam plateau 0.10 0.5845 0.5774 0.0072\n", "4 30 0.0010 16 5 adam plateau 0.10 0.5839 0.5694 0.0145\n", "5 29 0.0010 16 5 adam plateau 0.05 0.5839 0.5694 0.0145\n", "6 62 0.0001 16 5 adam plateau 0.10 0.5838 0.5775 0.0063\n", "7 61 0.0001 16 5 adam plateau 0.05 0.5838 0.5775 0.0063\n", "8 38 0.0001 8 4 adam plateau 0.10 0.5834 0.5836 -0.0002\n", "9 37 0.0001 8 4 adam plateau 0.05 0.5834 0.5836 -0.0002\n", "10 40 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.5822 0.5663 0.0159\n", "11 19 0.0010 16 4 sgd huggingface_cosine_with_restarts 0.05 0.5822 0.5803 0.0019\n", "12 21 0.0010 16 4 adam plateau 0.05 0.5805 0.5740 0.0065\n", "13 22 0.0010 16 4 adam plateau 0.10 0.5805 0.5740 0.0065\n", "14 20 0.0010 16 4 sgd huggingface_cosine_with_restarts 0.10 0.5803 0.5706 0.0097" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/20.txt\",25)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(15))" ] }, { "cell_type": "code", "execution_count": 4, "id": "7e06dfea-d6cc-479b-8113-3b0140840db8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodevtestgap
0640.0001165adamhuggingface_cosine_with_restarts0.100.58120.57120.0100
1470.000185adamhuggingface_cosine_with_restarts0.050.58110.57100.0101
2480.000185adamhuggingface_cosine_with_restarts0.100.58000.57220.0078
3200.0010164sgdhuggingface_cosine_with_restarts0.100.57910.57240.0067
4270.0010165sgdhuggingface_cosine_with_restarts0.050.57900.57460.0044
5190.0010164sgdhuggingface_cosine_with_restarts0.050.57830.57000.0083
640.001084sgdhuggingface_cosine_with_restarts0.100.57740.56540.0120
7540.0001164adamplateau0.100.57730.56970.0075
8530.0001164adamplateau0.050.57730.56970.0075
9380.000184adamplateau0.100.57610.57370.0024
10370.000184adamplateau0.050.57610.57370.0024
1130.001084sgdhuggingface_cosine_with_restarts0.050.57590.57360.0023
12630.0001165adamhuggingface_cosine_with_restarts0.050.57570.56490.0108
13290.0010165adamplateau0.050.57540.56650.0090
14300.0010165adamplateau0.100.57540.56650.0090
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dev test gap\n", "0 64 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.5812 0.5712 0.0100\n", "1 47 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.5811 0.5710 0.0101\n", "2 48 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.5800 0.5722 0.0078\n", "3 20 0.0010 16 4 sgd huggingface_cosine_with_restarts 0.10 0.5791 0.5724 0.0067\n", "4 27 0.0010 16 5 sgd huggingface_cosine_with_restarts 0.05 0.5790 0.5746 0.0044\n", "5 19 0.0010 16 4 sgd huggingface_cosine_with_restarts 0.05 0.5783 0.5700 0.0083\n", "6 4 0.0010 8 4 sgd huggingface_cosine_with_restarts 0.10 0.5774 0.5654 0.0120\n", "7 54 0.0001 16 4 adam plateau 0.10 0.5773 0.5697 0.0075\n", "8 53 0.0001 16 4 adam plateau 0.05 0.5773 0.5697 0.0075\n", "9 38 0.0001 8 4 adam plateau 0.10 0.5761 0.5737 0.0024\n", "10 37 0.0001 8 4 adam plateau 0.05 0.5761 0.5737 0.0024\n", "11 3 0.0010 8 4 sgd huggingface_cosine_with_restarts 0.05 0.5759 0.5736 0.0023\n", "12 63 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.5757 0.5649 0.0108\n", "13 29 0.0010 16 5 adam plateau 0.05 0.5754 0.5665 0.0090\n", "14 30 0.0010 16 5 adam plateau 0.10 0.5754 0.5665 0.0090" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/30.txt\",25)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(15))" ] }, { "cell_type": "code", "execution_count": 5, "id": "c3f7929b-5279-4490-84e6-f0e4309f769d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodevtestgap
0370.000184adamplateau0.050.58270.57340.0093
1380.000184adamplateau0.100.58270.57340.0093
2560.0001164adamhuggingface_cosine_with_restarts0.100.58210.55890.0232
330.001084sgdhuggingface_cosine_with_restarts0.050.58130.58010.0012
4390.000184adamhuggingface_cosine_with_restarts0.050.58100.57160.0094
5170.0010164sgdplateau0.050.58060.57570.0049
6180.0010164sgdplateau0.100.58060.57570.0049
7630.0001165adamhuggingface_cosine_with_restarts0.050.57980.57670.0031
8280.0010165sgdhuggingface_cosine_with_restarts0.100.57970.57100.0087
9190.0010164sgdhuggingface_cosine_with_restarts0.050.57950.57310.0065
1040.001084sgdhuggingface_cosine_with_restarts0.100.57910.56810.0110
11200.0010164sgdhuggingface_cosine_with_restarts0.100.57900.56820.0109
1210.001084sgdplateau0.050.57780.56740.0104
1320.001084sgdplateau0.100.57780.56740.0104
14640.0001165adamhuggingface_cosine_with_restarts0.100.57550.56700.0085
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dev test gap\n", "0 37 0.0001 8 4 adam plateau 0.05 0.5827 0.5734 0.0093\n", "1 38 0.0001 8 4 adam plateau 0.10 0.5827 0.5734 0.0093\n", "2 56 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.5821 0.5589 0.0232\n", "3 3 0.0010 8 4 sgd huggingface_cosine_with_restarts 0.05 0.5813 0.5801 0.0012\n", "4 39 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.5810 0.5716 0.0094\n", "5 17 0.0010 16 4 sgd plateau 0.05 0.5806 0.5757 0.0049\n", "6 18 0.0010 16 4 sgd plateau 0.10 0.5806 0.5757 0.0049\n", "7 63 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.5798 0.5767 0.0031\n", "8 28 0.0010 16 5 sgd huggingface_cosine_with_restarts 0.10 0.5797 0.5710 0.0087\n", "9 19 0.0010 16 4 sgd huggingface_cosine_with_restarts 0.05 0.5795 0.5731 0.0065\n", "10 4 0.0010 8 4 sgd huggingface_cosine_with_restarts 0.10 0.5791 0.5681 0.0110\n", "11 20 0.0010 16 4 sgd huggingface_cosine_with_restarts 0.10 0.5790 0.5682 0.0109\n", "12 1 0.0010 8 4 sgd plateau 0.05 0.5778 0.5674 0.0104\n", "13 2 0.0010 8 4 sgd plateau 0.10 0.5778 0.5674 0.0104\n", "14 64 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.5755 0.5670 0.0085" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/40.txt\",25)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(15))" ] }, { "cell_type": "code", "execution_count": 6, "id": "70da89eb-18e6-4795-8b83-1116fe1fa968", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodevtestgap
0560.0001164adamhuggingface_cosine_with_restarts0.100.59190.57730.0146
1400.000184adamhuggingface_cosine_with_restarts0.100.59110.57710.0140
2540.0001164adamplateau0.100.58790.57740.0105
3530.0001164adamplateau0.050.58790.57740.0105
4370.000184adamplateau0.050.58730.57220.0152
5380.000184adamplateau0.100.58730.57220.0152
6180.0010164sgdplateau0.100.58610.57610.0100
7170.0010164sgdplateau0.050.58610.57610.0100
8190.0010164sgdhuggingface_cosine_with_restarts0.050.58380.58260.0012
9620.0001165adamplateau0.100.58370.57320.0105
10610.0001165adamplateau0.050.58370.57320.0105
11550.0001164adamhuggingface_cosine_with_restarts0.050.58360.56920.0144
12390.000184adamhuggingface_cosine_with_restarts0.050.58340.57050.0129
1340.001084sgdhuggingface_cosine_with_restarts0.100.58260.57950.0031
1430.001084sgdhuggingface_cosine_with_restarts0.050.58250.57930.0032
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dev test gap\n", "0 56 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.5919 0.5773 0.0146\n", "1 40 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.5911 0.5771 0.0140\n", "2 54 0.0001 16 4 adam plateau 0.10 0.5879 0.5774 0.0105\n", "3 53 0.0001 16 4 adam plateau 0.05 0.5879 0.5774 0.0105\n", "4 37 0.0001 8 4 adam plateau 0.05 0.5873 0.5722 0.0152\n", "5 38 0.0001 8 4 adam plateau 0.10 0.5873 0.5722 0.0152\n", "6 18 0.0010 16 4 sgd plateau 0.10 0.5861 0.5761 0.0100\n", "7 17 0.0010 16 4 sgd plateau 0.05 0.5861 0.5761 0.0100\n", "8 19 0.0010 16 4 sgd huggingface_cosine_with_restarts 0.05 0.5838 0.5826 0.0012\n", "9 62 0.0001 16 5 adam plateau 0.10 0.5837 0.5732 0.0105\n", "10 61 0.0001 16 5 adam plateau 0.05 0.5837 0.5732 0.0105\n", "11 55 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.5836 0.5692 0.0144\n", "12 39 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.5834 0.5705 0.0129\n", "13 4 0.0010 8 4 sgd huggingface_cosine_with_restarts 0.10 0.5826 0.5795 0.0031\n", "14 3 0.0010 8 4 sgd huggingface_cosine_with_restarts 0.05 0.5825 0.5793 0.0032" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/50.txt\",25)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(15))" ] }, { "cell_type": "code", "execution_count": 7, "id": "6614a37d-344e-46a5-b5ba-e49d4010027a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratioweight_decaydropoutdevtestgap
0270.000185adamplateau0.050.00.200.58500.57680.0082
1330.000185adamplateau0.100.00.200.58500.57680.0082
2320.000185adamplateau0.100.00.150.58270.57320.0095
3260.000185adamplateau0.050.00.150.58270.57320.0095
4490.0001164adamplateau0.050.00.100.58270.57090.0119
5550.0001164adamplateau0.100.00.100.58270.57090.0119
6360.000185adamplateau0.100.10.200.58200.57170.0104
7300.000185adamplateau0.050.10.200.58200.57170.0104
8290.000185adamplateau0.050.10.150.58150.57190.0096
9350.000185adamplateau0.100.10.150.58150.57190.0096
10510.0001164adamplateau0.050.00.200.58100.56940.0116
11570.0001164adamplateau0.100.00.200.58100.56940.0116
12680.0001164adamhuggingface_cosine_with_restarts0.100.00.150.58060.56680.0138
1330.000184adamplateau0.050.00.200.57970.57090.0088
1490.000184adamplateau0.100.00.200.57970.57090.0088
1580.000184adamplateau0.100.00.150.57910.56690.0123
1620.000184adamplateau0.050.00.150.57910.56690.0123
17610.0001164adamhuggingface_cosine_with_restarts0.050.00.100.57900.56640.0126
18310.000185adamplateau0.100.00.100.57880.56950.0093
19250.000185adamplateau0.050.00.100.57880.56950.0093
20770.0001165adamplateau0.050.10.150.57850.57330.0051
21830.0001165adamplateau0.100.10.150.57850.57330.0051
22500.0001164adamplateau0.050.00.150.57790.56700.0109
23560.0001164adamplateau0.100.00.150.57790.56700.0109
24620.0001164adamhuggingface_cosine_with_restarts0.050.00.150.57770.56220.0155
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio weight_decay dropout dev test \\\n", "0 27 0.0001 8 5 adam plateau 0.05 0.0 0.20 0.5850 0.5768 \n", "1 33 0.0001 8 5 adam plateau 0.10 0.0 0.20 0.5850 0.5768 \n", "2 32 0.0001 8 5 adam plateau 0.10 0.0 0.15 0.5827 0.5732 \n", "3 26 0.0001 8 5 adam plateau 0.05 0.0 0.15 0.5827 0.5732 \n", "4 49 0.0001 16 4 adam plateau 0.05 0.0 0.10 0.5827 0.5709 \n", "5 55 0.0001 16 4 adam plateau 0.10 0.0 0.10 0.5827 0.5709 \n", "6 36 0.0001 8 5 adam plateau 0.10 0.1 0.20 0.5820 0.5717 \n", "7 30 0.0001 8 5 adam plateau 0.05 0.1 0.20 0.5820 0.5717 \n", "8 29 0.0001 8 5 adam plateau 0.05 0.1 0.15 0.5815 0.5719 \n", "9 35 0.0001 8 5 adam plateau 0.10 0.1 0.15 0.5815 0.5719 \n", "10 51 0.0001 16 4 adam plateau 0.05 0.0 0.20 0.5810 0.5694 \n", "11 57 0.0001 16 4 adam plateau 0.10 0.0 0.20 0.5810 0.5694 \n", "12 68 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.0 0.15 0.5806 0.5668 \n", "13 3 0.0001 8 4 adam plateau 0.05 0.0 0.20 0.5797 0.5709 \n", "14 9 0.0001 8 4 adam plateau 0.10 0.0 0.20 0.5797 0.5709 \n", "15 8 0.0001 8 4 adam plateau 0.10 0.0 0.15 0.5791 0.5669 \n", "16 2 0.0001 8 4 adam plateau 0.05 0.0 0.15 0.5791 0.5669 \n", "17 61 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.10 0.5790 0.5664 \n", "18 31 0.0001 8 5 adam plateau 0.10 0.0 0.10 0.5788 0.5695 \n", "19 25 0.0001 8 5 adam plateau 0.05 0.0 0.10 0.5788 0.5695 \n", "20 77 0.0001 16 5 adam plateau 0.05 0.1 0.15 0.5785 0.5733 \n", "21 83 0.0001 16 5 adam plateau 0.10 0.1 0.15 0.5785 0.5733 \n", "22 50 0.0001 16 4 adam plateau 0.05 0.0 0.15 0.5779 0.5670 \n", "23 56 0.0001 16 4 adam plateau 0.10 0.0 0.15 0.5779 0.5670 \n", "24 62 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.15 0.5777 0.5622 \n", "\n", " gap \n", "0 0.0082 \n", "1 0.0082 \n", "2 0.0095 \n", "3 0.0095 \n", "4 0.0119 \n", "5 0.0119 \n", "6 0.0104 \n", "7 0.0104 \n", "8 0.0096 \n", "9 0.0096 \n", "10 0.0116 \n", "11 0.0116 \n", "12 0.0138 \n", "13 0.0088 \n", "14 0.0088 \n", "15 0.0123 \n", "16 0.0123 \n", "17 0.0126 \n", "18 0.0093 \n", "19 0.0093 \n", "20 0.0051 \n", "21 0.0051 \n", "22 0.0109 \n", "23 0.0109 \n", "24 0.0155 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/60.txt\",25)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(25))" ] }, { "cell_type": "code", "execution_count": 8, "id": "7f6b722c-c134-45ec-9cf0-b4b4f8eb0c3f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratioweight_decaydropoutdevtestgap
0430.000185adamhuggingface_cosine_with_restarts0.100.00.100.59350.57710.0164
1910.0001165adamhuggingface_cosine_with_restarts0.100.00.100.59020.56800.0221
2730.0001165adamplateau0.050.00.100.59000.56750.0224
3790.0001165adamplateau0.100.00.100.59000.56750.0224
4810.0001165adamplateau0.100.00.200.58580.57200.0138
5750.0001165adamplateau0.050.00.200.58580.57200.0138
6800.0001165adamplateau0.100.00.150.58560.57140.0141
7740.0001165adamplateau0.050.00.150.58560.57140.0141
8850.0001165adamhuggingface_cosine_with_restarts0.050.00.100.58480.56370.0211
9250.000185adamplateau0.050.00.100.58440.56900.0154
10310.000185adamplateau0.100.00.100.58440.56900.0154
11870.0001165adamhuggingface_cosine_with_restarts0.050.00.200.58370.56400.0197
12930.0001165adamhuggingface_cosine_with_restarts0.100.00.200.58340.56530.0180
13860.0001165adamhuggingface_cosine_with_restarts0.050.00.150.58320.56440.0189
14920.0001165adamhuggingface_cosine_with_restarts0.100.00.150.58190.56400.0180
15390.000185adamhuggingface_cosine_with_restarts0.050.00.200.58140.57290.0085
16450.000185adamhuggingface_cosine_with_restarts0.100.00.200.58120.57510.0060
17370.000185adamhuggingface_cosine_with_restarts0.050.00.100.58070.57170.0090
18320.000185adamplateau0.100.00.150.58020.56520.0150
19260.000185adamplateau0.050.00.150.58020.56520.0150
20270.000185adamplateau0.050.00.200.57930.56330.0160
21330.000185adamplateau0.100.00.200.57930.56330.0160
2290.000184adamplateau0.100.00.200.57830.56460.0137
2330.000184adamplateau0.050.00.200.57830.56460.0137
24490.0001164adamplateau0.050.00.100.57830.56190.0164
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio weight_decay dropout dev test \\\n", "0 43 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.10 0.5935 0.5771 \n", "1 91 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.10 0.5902 0.5680 \n", "2 73 0.0001 16 5 adam plateau 0.05 0.0 0.10 0.5900 0.5675 \n", "3 79 0.0001 16 5 adam plateau 0.10 0.0 0.10 0.5900 0.5675 \n", "4 81 0.0001 16 5 adam plateau 0.10 0.0 0.20 0.5858 0.5720 \n", "5 75 0.0001 16 5 adam plateau 0.05 0.0 0.20 0.5858 0.5720 \n", "6 80 0.0001 16 5 adam plateau 0.10 0.0 0.15 0.5856 0.5714 \n", "7 74 0.0001 16 5 adam plateau 0.05 0.0 0.15 0.5856 0.5714 \n", "8 85 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.10 0.5848 0.5637 \n", "9 25 0.0001 8 5 adam plateau 0.05 0.0 0.10 0.5844 0.5690 \n", "10 31 0.0001 8 5 adam plateau 0.10 0.0 0.10 0.5844 0.5690 \n", "11 87 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5837 0.5640 \n", "12 93 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.20 0.5834 0.5653 \n", "13 86 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.15 0.5832 0.5644 \n", "14 92 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.15 0.5819 0.5640 \n", "15 39 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5814 0.5729 \n", "16 45 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.20 0.5812 0.5751 \n", "17 37 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.10 0.5807 0.5717 \n", "18 32 0.0001 8 5 adam plateau 0.10 0.0 0.15 0.5802 0.5652 \n", "19 26 0.0001 8 5 adam plateau 0.05 0.0 0.15 0.5802 0.5652 \n", "20 27 0.0001 8 5 adam plateau 0.05 0.0 0.20 0.5793 0.5633 \n", "21 33 0.0001 8 5 adam plateau 0.10 0.0 0.20 0.5793 0.5633 \n", "22 9 0.0001 8 4 adam plateau 0.10 0.0 0.20 0.5783 0.5646 \n", "23 3 0.0001 8 4 adam plateau 0.05 0.0 0.20 0.5783 0.5646 \n", "24 49 0.0001 16 4 adam plateau 0.05 0.0 0.10 0.5783 0.5619 \n", "\n", " gap \n", "0 0.0164 \n", "1 0.0221 \n", "2 0.0224 \n", "3 0.0224 \n", "4 0.0138 \n", "5 0.0138 \n", "6 0.0141 \n", "7 0.0141 \n", "8 0.0211 \n", "9 0.0154 \n", "10 0.0154 \n", "11 0.0197 \n", "12 0.0180 \n", "13 0.0189 \n", "14 0.0180 \n", "15 0.0085 \n", "16 0.0060 \n", "17 0.0090 \n", "18 0.0150 \n", "19 0.0150 \n", "20 0.0160 \n", "21 0.0160 \n", "22 0.0137 \n", "23 0.0137 \n", "24 0.0164 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/70.txt\",25)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(25))" ] }, { "cell_type": "code", "execution_count": 9, "id": "1d4db04e-ee0f-4c2c-b0d2-45edbf2128dd", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratioweight_decaydropoutdevtestgap
020.000184adamplateau0.050.00.150.57350.56750.0060
180.000184adamplateau0.100.00.150.57350.56750.0060
230.000184adamplateau0.050.00.200.57340.56410.0094
390.000184adamplateau0.100.00.200.57340.56410.0094
4750.0001165adamplateau0.050.00.200.57230.57080.0015
5810.0001165adamplateau0.100.00.200.57230.57080.0015
6200.000184adamhuggingface_cosine_with_restarts0.100.00.150.57150.56870.0028
770.000184adamplateau0.100.00.100.57120.57060.0006
810.000184adamplateau0.050.00.100.57120.57060.0006
9740.0001165adamplateau0.050.00.150.57110.56790.0032
10800.0001165adamplateau0.100.00.150.57110.56790.0032
11630.0001164adamhuggingface_cosine_with_restarts0.050.00.200.57040.56120.0092
12330.000185adamplateau0.100.00.200.57030.56590.0044
13270.000185adamplateau0.050.00.200.57030.56590.0044
14450.000185adamhuggingface_cosine_with_restarts0.100.00.200.56990.56070.0092
15390.000185adamhuggingface_cosine_with_restarts0.050.00.200.56970.56870.0010
16210.000184adamhuggingface_cosine_with_restarts0.100.00.200.56920.55400.0152
17500.0001164adamplateau0.050.00.150.56870.56380.0048
18560.0001164adamplateau0.100.00.150.56870.56380.0048
19850.0001165adamhuggingface_cosine_with_restarts0.050.00.100.56860.56700.0016
20150.000184adamhuggingface_cosine_with_restarts0.050.00.200.56850.56290.0056
21680.0001164adamhuggingface_cosine_with_restarts0.100.00.150.56760.56170.0059
22920.0001165adamhuggingface_cosine_with_restarts0.100.00.150.56740.56380.0036
23140.000184adamhuggingface_cosine_with_restarts0.050.00.150.56730.56610.0011
24510.0001164adamplateau0.050.00.200.56710.56330.0038
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio weight_decay dropout dev test \\\n", "0 2 0.0001 8 4 adam plateau 0.05 0.0 0.15 0.5735 0.5675 \n", "1 8 0.0001 8 4 adam plateau 0.10 0.0 0.15 0.5735 0.5675 \n", "2 3 0.0001 8 4 adam plateau 0.05 0.0 0.20 0.5734 0.5641 \n", "3 9 0.0001 8 4 adam plateau 0.10 0.0 0.20 0.5734 0.5641 \n", "4 75 0.0001 16 5 adam plateau 0.05 0.0 0.20 0.5723 0.5708 \n", "5 81 0.0001 16 5 adam plateau 0.10 0.0 0.20 0.5723 0.5708 \n", "6 20 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.0 0.15 0.5715 0.5687 \n", "7 7 0.0001 8 4 adam plateau 0.10 0.0 0.10 0.5712 0.5706 \n", "8 1 0.0001 8 4 adam plateau 0.05 0.0 0.10 0.5712 0.5706 \n", "9 74 0.0001 16 5 adam plateau 0.05 0.0 0.15 0.5711 0.5679 \n", "10 80 0.0001 16 5 adam plateau 0.10 0.0 0.15 0.5711 0.5679 \n", "11 63 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5704 0.5612 \n", "12 33 0.0001 8 5 adam plateau 0.10 0.0 0.20 0.5703 0.5659 \n", "13 27 0.0001 8 5 adam plateau 0.05 0.0 0.20 0.5703 0.5659 \n", "14 45 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.20 0.5699 0.5607 \n", "15 39 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5697 0.5687 \n", "16 21 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.0 0.20 0.5692 0.5540 \n", "17 50 0.0001 16 4 adam plateau 0.05 0.0 0.15 0.5687 0.5638 \n", "18 56 0.0001 16 4 adam plateau 0.10 0.0 0.15 0.5687 0.5638 \n", "19 85 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.10 0.5686 0.5670 \n", "20 15 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5685 0.5629 \n", "21 68 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.0 0.15 0.5676 0.5617 \n", "22 92 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.15 0.5674 0.5638 \n", "23 14 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.15 0.5673 0.5661 \n", "24 51 0.0001 16 4 adam plateau 0.05 0.0 0.20 0.5671 0.5633 \n", "\n", " gap \n", "0 0.0060 \n", "1 0.0060 \n", "2 0.0094 \n", "3 0.0094 \n", "4 0.0015 \n", "5 0.0015 \n", "6 0.0028 \n", "7 0.0006 \n", "8 0.0006 \n", "9 0.0032 \n", "10 0.0032 \n", "11 0.0092 \n", "12 0.0044 \n", "13 0.0044 \n", "14 0.0092 \n", "15 0.0010 \n", "16 0.0152 \n", "17 0.0048 \n", "18 0.0048 \n", "19 0.0016 \n", "20 0.0056 \n", "21 0.0059 \n", "22 0.0036 \n", "23 0.0011 \n", "24 0.0038 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/80.txt\",25)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(25))" ] }, { "cell_type": "code", "execution_count": 10, "id": "ff64fbf5-cea1-4ee1-b7dc-d415ed7de9e1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratioweight_decaydropoutdevtestgap
0200.000184adamhuggingface_cosine_with_restarts0.100.00.150.58810.57130.0168
1570.0001164adamplateau0.100.00.200.58460.56720.0174
2510.0001164adamplateau0.050.00.200.58460.56720.0174
3140.000184adamhuggingface_cosine_with_restarts0.050.00.150.58330.56610.0172
4210.000184adamhuggingface_cosine_with_restarts0.100.00.200.58260.56990.0127
5150.000184adamhuggingface_cosine_with_restarts0.050.00.200.58190.56400.0179
6130.000184adamhuggingface_cosine_with_restarts0.050.00.100.58060.55870.0219
720.000184adamplateau0.050.00.150.58000.56860.0113
880.000184adamplateau0.100.00.150.58000.56860.0113
970.000184adamplateau0.100.00.100.57950.56930.0103
1010.000184adamplateau0.050.00.100.57950.56930.0103
11500.0001164adamplateau0.050.00.150.57870.56620.0125
12560.0001164adamplateau0.100.00.150.57870.56620.0125
13450.000185adamhuggingface_cosine_with_restarts0.100.00.200.57750.56370.0138
14550.0001164adamplateau0.100.00.100.57660.57170.0049
15490.0001164adamplateau0.050.00.100.57660.57170.0049
16610.0001164adamhuggingface_cosine_with_restarts0.050.00.100.57600.56200.0140
17810.0001165adamplateau0.100.00.200.57490.56840.0066
18750.0001165adamplateau0.050.00.200.57490.56840.0066
19310.000185adamplateau0.100.00.100.57450.56590.0086
20250.000185adamplateau0.050.00.100.57450.56590.0086
21370.000185adamhuggingface_cosine_with_restarts0.050.00.100.57450.57120.0034
22440.000185adamhuggingface_cosine_with_restarts0.100.00.150.57400.5744-0.0004
23190.000184adamhuggingface_cosine_with_restarts0.100.00.100.57360.56060.0130
24630.0001164adamhuggingface_cosine_with_restarts0.050.00.200.57340.56240.0110
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio weight_decay dropout dev test \\\n", "0 20 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.0 0.15 0.5881 0.5713 \n", "1 57 0.0001 16 4 adam plateau 0.10 0.0 0.20 0.5846 0.5672 \n", "2 51 0.0001 16 4 adam plateau 0.05 0.0 0.20 0.5846 0.5672 \n", "3 14 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.15 0.5833 0.5661 \n", "4 21 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.0 0.20 0.5826 0.5699 \n", "5 15 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5819 0.5640 \n", "6 13 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.10 0.5806 0.5587 \n", "7 2 0.0001 8 4 adam plateau 0.05 0.0 0.15 0.5800 0.5686 \n", "8 8 0.0001 8 4 adam plateau 0.10 0.0 0.15 0.5800 0.5686 \n", "9 7 0.0001 8 4 adam plateau 0.10 0.0 0.10 0.5795 0.5693 \n", "10 1 0.0001 8 4 adam plateau 0.05 0.0 0.10 0.5795 0.5693 \n", "11 50 0.0001 16 4 adam plateau 0.05 0.0 0.15 0.5787 0.5662 \n", "12 56 0.0001 16 4 adam plateau 0.10 0.0 0.15 0.5787 0.5662 \n", "13 45 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.20 0.5775 0.5637 \n", "14 55 0.0001 16 4 adam plateau 0.10 0.0 0.10 0.5766 0.5717 \n", "15 49 0.0001 16 4 adam plateau 0.05 0.0 0.10 0.5766 0.5717 \n", "16 61 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.10 0.5760 0.5620 \n", "17 81 0.0001 16 5 adam plateau 0.10 0.0 0.20 0.5749 0.5684 \n", "18 75 0.0001 16 5 adam plateau 0.05 0.0 0.20 0.5749 0.5684 \n", "19 31 0.0001 8 5 adam plateau 0.10 0.0 0.10 0.5745 0.5659 \n", "20 25 0.0001 8 5 adam plateau 0.05 0.0 0.10 0.5745 0.5659 \n", "21 37 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.10 0.5745 0.5712 \n", "22 44 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.15 0.5740 0.5744 \n", "23 19 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.0 0.10 0.5736 0.5606 \n", "24 63 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5734 0.5624 \n", "\n", " gap \n", "0 0.0168 \n", "1 0.0174 \n", "2 0.0174 \n", "3 0.0172 \n", "4 0.0127 \n", "5 0.0179 \n", "6 0.0219 \n", "7 0.0113 \n", "8 0.0113 \n", "9 0.0103 \n", "10 0.0103 \n", "11 0.0125 \n", "12 0.0125 \n", "13 0.0138 \n", "14 0.0049 \n", "15 0.0049 \n", "16 0.0140 \n", "17 0.0066 \n", "18 0.0066 \n", "19 0.0086 \n", "20 0.0086 \n", "21 0.0034 \n", "22 -0.0004 \n", "23 0.0130 \n", "24 0.0110 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/90.txt\",25)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(25))" ] }, { "cell_type": "code", "execution_count": 11, "id": "00b62770-f38f-405c-9c5c-630d4afd7d26", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratioweight_decaydropoutdevtestgap
0490.0001164adamplateau0.050.00.100.58600.57020.0158
1550.0001164adamplateau0.100.00.100.58600.57020.0158
2560.0001164adamplateau0.100.00.150.58270.57710.0056
3500.0001164adamplateau0.050.00.150.58270.57710.0056
420.000184adamplateau0.050.00.150.58160.57810.0034
580.000184adamplateau0.100.00.150.58160.57810.0034
6510.0001164adamplateau0.050.00.200.58050.57200.0085
7570.0001164adamplateau0.100.00.200.58050.57200.0085
870.000184adamplateau0.100.00.100.57930.57150.0079
910.000184adamplateau0.050.00.100.57930.57150.0079
10790.0001165adamplateau0.100.00.100.57890.56920.0097
11730.0001165adamplateau0.050.00.100.57890.56920.0097
1230.000184adamplateau0.050.00.200.57820.57340.0048
1390.000184adamplateau0.100.00.200.57820.57340.0048
14450.000185adamhuggingface_cosine_with_restarts0.100.00.200.57800.57770.0003
15390.000185adamhuggingface_cosine_with_restarts0.050.00.200.57780.57510.0026
16630.0001164adamhuggingface_cosine_with_restarts0.050.00.200.57670.57180.0048
17620.0001164adamhuggingface_cosine_with_restarts0.050.00.150.57660.57080.0059
18740.0001165adamplateau0.050.00.150.57560.56750.0080
19800.0001165adamplateau0.100.00.150.57560.56750.0080
20870.0001165adamhuggingface_cosine_with_restarts0.050.00.200.57540.56850.0069
21930.0001165adamhuggingface_cosine_with_restarts0.100.00.200.57520.57440.0009
22670.0001164adamhuggingface_cosine_with_restarts0.100.00.100.57400.56940.0045
23850.0001165adamhuggingface_cosine_with_restarts0.050.00.100.57370.57070.0030
24140.000184adamhuggingface_cosine_with_restarts0.050.00.150.57360.55650.0171
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio weight_decay dropout dev test \\\n", "0 49 0.0001 16 4 adam plateau 0.05 0.0 0.10 0.5860 0.5702 \n", "1 55 0.0001 16 4 adam plateau 0.10 0.0 0.10 0.5860 0.5702 \n", "2 56 0.0001 16 4 adam plateau 0.10 0.0 0.15 0.5827 0.5771 \n", "3 50 0.0001 16 4 adam plateau 0.05 0.0 0.15 0.5827 0.5771 \n", "4 2 0.0001 8 4 adam plateau 0.05 0.0 0.15 0.5816 0.5781 \n", "5 8 0.0001 8 4 adam plateau 0.10 0.0 0.15 0.5816 0.5781 \n", "6 51 0.0001 16 4 adam plateau 0.05 0.0 0.20 0.5805 0.5720 \n", "7 57 0.0001 16 4 adam plateau 0.10 0.0 0.20 0.5805 0.5720 \n", "8 7 0.0001 8 4 adam plateau 0.10 0.0 0.10 0.5793 0.5715 \n", "9 1 0.0001 8 4 adam plateau 0.05 0.0 0.10 0.5793 0.5715 \n", "10 79 0.0001 16 5 adam plateau 0.10 0.0 0.10 0.5789 0.5692 \n", "11 73 0.0001 16 5 adam plateau 0.05 0.0 0.10 0.5789 0.5692 \n", "12 3 0.0001 8 4 adam plateau 0.05 0.0 0.20 0.5782 0.5734 \n", "13 9 0.0001 8 4 adam plateau 0.10 0.0 0.20 0.5782 0.5734 \n", "14 45 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.20 0.5780 0.5777 \n", "15 39 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5778 0.5751 \n", "16 63 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5767 0.5718 \n", "17 62 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.15 0.5766 0.5708 \n", "18 74 0.0001 16 5 adam plateau 0.05 0.0 0.15 0.5756 0.5675 \n", "19 80 0.0001 16 5 adam plateau 0.10 0.0 0.15 0.5756 0.5675 \n", "20 87 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.20 0.5754 0.5685 \n", "21 93 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.0 0.20 0.5752 0.5744 \n", "22 67 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.0 0.10 0.5740 0.5694 \n", "23 85 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.0 0.10 0.5737 0.5707 \n", "24 14 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.0 0.15 0.5736 0.5565 \n", "\n", " gap \n", "0 0.0158 \n", "1 0.0158 \n", "2 0.0056 \n", "3 0.0056 \n", "4 0.0034 \n", "5 0.0034 \n", "6 0.0085 \n", "7 0.0085 \n", "8 0.0079 \n", "9 0.0079 \n", "10 0.0097 \n", "11 0.0097 \n", "12 0.0048 \n", "13 0.0048 \n", "14 0.0003 \n", "15 0.0026 \n", "16 0.0048 \n", "17 0.0059 \n", "18 0.0080 \n", "19 0.0080 \n", "20 0.0069 \n", "21 0.0009 \n", "22 0.0045 \n", "23 0.0030 \n", "24 0.0171 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/100.txt\",25)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(25))" ] }, { "cell_type": "code", "execution_count": null, "id": "fb9186a9-f0bc-406d-b2c3-724d7b5f9d43", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "0f9fbc1c-ab41-4dca-bbad-6a0eadd28f7f", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "75806125-04ac-4e18-968e-4632b92d1d16", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "4ac6d74a-3625-4c46-b1a1-7d70a7ef5446", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "99e64f6f-eeb4-4f1f-9bd0-b2cb230dc1da", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 12, "id": "467b04df-a408-4808-941d-5b0f2ebbf217", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodropoutdevtestgap
050.000184adamhuggingface_cosine_with_restarts0.050.100.57100.55750.0135
1140.000185adamhuggingface_cosine_with_restarts0.050.150.57070.56700.0037
2130.000185adamhuggingface_cosine_with_restarts0.050.100.56980.56500.0048
390.000185adamplateau0.050.100.56910.55650.0126
4110.000185adamplateau0.100.100.56910.55650.0126
5270.0001165adamplateau0.100.100.56900.56780.0011
6250.0001165adamplateau0.050.100.56900.56780.0011
770.000184adamhuggingface_cosine_with_restarts0.100.100.56710.55740.0097
8320.0001165adamhuggingface_cosine_with_restarts0.100.150.56640.56340.0031
9240.0001164adamhuggingface_cosine_with_restarts0.100.150.56630.56290.0034
10290.0001165adamhuggingface_cosine_with_restarts0.050.100.56620.55900.0072
11310.0001165adamhuggingface_cosine_with_restarts0.100.100.56610.56050.0056
12280.0001165adamplateau0.100.150.56560.5668-0.0012
13260.0001165adamplateau0.050.150.56560.5668-0.0012
14160.000185adamhuggingface_cosine_with_restarts0.100.150.56460.55520.0094
15150.000185adamhuggingface_cosine_with_restarts0.100.100.56400.55960.0044
16300.0001165adamhuggingface_cosine_with_restarts0.050.150.56380.55880.0050
17120.000185adamplateau0.100.150.56370.55340.0102
18100.000185adamplateau0.050.150.56370.55340.0102
1980.000184adamhuggingface_cosine_with_restarts0.100.150.56320.55610.0072
20210.0001164adamhuggingface_cosine_with_restarts0.050.100.56210.55220.0099
2120.000184adamplateau0.050.150.56150.55150.0100
2240.000184adamplateau0.100.150.56150.55150.0100
2360.000184adamhuggingface_cosine_with_restarts0.050.150.56130.55130.0100
2430.000184adamplateau0.100.100.56060.55250.0081
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dropout dev test gap\n", "0 5 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5710 0.5575 0.0135\n", "1 14 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.15 0.5707 0.5670 0.0037\n", "2 13 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.10 0.5698 0.5650 0.0048\n", "3 9 0.0001 8 5 adam plateau 0.05 0.10 0.5691 0.5565 0.0126\n", "4 11 0.0001 8 5 adam plateau 0.10 0.10 0.5691 0.5565 0.0126\n", "5 27 0.0001 16 5 adam plateau 0.10 0.10 0.5690 0.5678 0.0011\n", "6 25 0.0001 16 5 adam plateau 0.05 0.10 0.5690 0.5678 0.0011\n", "7 7 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.10 0.5671 0.5574 0.0097\n", "8 32 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.15 0.5664 0.5634 0.0031\n", "9 24 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.15 0.5663 0.5629 0.0034\n", "10 29 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.10 0.5662 0.5590 0.0072\n", "11 31 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.10 0.5661 0.5605 0.0056\n", "12 28 0.0001 16 5 adam plateau 0.10 0.15 0.5656 0.5668 -0.0012\n", "13 26 0.0001 16 5 adam plateau 0.05 0.15 0.5656 0.5668 -0.0012\n", "14 16 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.15 0.5646 0.5552 0.0094\n", "15 15 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.10 0.5640 0.5596 0.0044\n", "16 30 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.15 0.5638 0.5588 0.0050\n", "17 12 0.0001 8 5 adam plateau 0.10 0.15 0.5637 0.5534 0.0102\n", "18 10 0.0001 8 5 adam plateau 0.05 0.15 0.5637 0.5534 0.0102\n", "19 8 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.15 0.5632 0.5561 0.0072\n", "20 21 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5621 0.5522 0.0099\n", "21 2 0.0001 8 4 adam plateau 0.05 0.15 0.5615 0.5515 0.0100\n", "22 4 0.0001 8 4 adam plateau 0.10 0.15 0.5615 0.5515 0.0100\n", "23 6 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.15 0.5613 0.5513 0.0100\n", "24 3 0.0001 8 4 adam plateau 0.10 0.10 0.5606 0.5525 0.0081" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/bi/10.txt\",25)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(25))" ] }, { "cell_type": "code", "execution_count": 13, "id": "efba3e34-f64b-4962-9c8b-ef70294badb1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodropoutdevtestgap
070.000184adamhuggingface_cosine_with_restarts0.100.100.58040.56890.0116
1210.0001164adamhuggingface_cosine_with_restarts0.050.100.57910.57090.0082
2220.0001164adamhuggingface_cosine_with_restarts0.050.150.57390.56510.0088
3180.0001164adamplateau0.050.150.57370.56790.0058
4200.0001164adamplateau0.100.150.57370.56790.0058
5150.000185adamhuggingface_cosine_with_restarts0.100.100.57310.56380.0092
6130.000185adamhuggingface_cosine_with_restarts0.050.100.57310.56730.0058
750.000184adamhuggingface_cosine_with_restarts0.050.100.57300.56890.0041
880.000184adamhuggingface_cosine_with_restarts0.100.150.57240.56310.0093
9300.0001165adamhuggingface_cosine_with_restarts0.050.150.57190.57020.0017
1040.000184adamplateau0.100.150.57170.56620.0055
1120.000184adamplateau0.050.150.57170.56620.0055
12170.0001164adamplateau0.050.100.57120.56800.0032
13260.0001165adamplateau0.050.150.57120.56260.0086
14190.0001164adamplateau0.100.100.57120.56800.0032
15280.0001165adamplateau0.100.150.57120.56260.0086
1690.000185adamplateau0.050.100.57080.55330.0175
17110.000185adamplateau0.100.100.57080.55330.0175
18230.0001164adamhuggingface_cosine_with_restarts0.100.100.57060.5706-0.0001
19160.000185adamhuggingface_cosine_with_restarts0.100.150.57040.56490.0055
2060.000184adamhuggingface_cosine_with_restarts0.050.150.56970.56570.0041
21290.0001165adamhuggingface_cosine_with_restarts0.050.100.56960.56610.0035
22250.0001165adamplateau0.050.100.56950.56040.0092
23270.0001165adamplateau0.100.100.56950.56040.0092
24100.000185adamplateau0.050.150.56940.55250.0170
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dropout dev test gap\n", "0 7 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.10 0.5804 0.5689 0.0116\n", "1 21 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5791 0.5709 0.0082\n", "2 22 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.15 0.5739 0.5651 0.0088\n", "3 18 0.0001 16 4 adam plateau 0.05 0.15 0.5737 0.5679 0.0058\n", "4 20 0.0001 16 4 adam plateau 0.10 0.15 0.5737 0.5679 0.0058\n", "5 15 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.10 0.5731 0.5638 0.0092\n", "6 13 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.10 0.5731 0.5673 0.0058\n", "7 5 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5730 0.5689 0.0041\n", "8 8 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.15 0.5724 0.5631 0.0093\n", "9 30 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.15 0.5719 0.5702 0.0017\n", "10 4 0.0001 8 4 adam plateau 0.10 0.15 0.5717 0.5662 0.0055\n", "11 2 0.0001 8 4 adam plateau 0.05 0.15 0.5717 0.5662 0.0055\n", "12 17 0.0001 16 4 adam plateau 0.05 0.10 0.5712 0.5680 0.0032\n", "13 26 0.0001 16 5 adam plateau 0.05 0.15 0.5712 0.5626 0.0086\n", "14 19 0.0001 16 4 adam plateau 0.10 0.10 0.5712 0.5680 0.0032\n", "15 28 0.0001 16 5 adam plateau 0.10 0.15 0.5712 0.5626 0.0086\n", "16 9 0.0001 8 5 adam plateau 0.05 0.10 0.5708 0.5533 0.0175\n", "17 11 0.0001 8 5 adam plateau 0.10 0.10 0.5708 0.5533 0.0175\n", "18 23 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.10 0.5706 0.5706 -0.0001\n", "19 16 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.15 0.5704 0.5649 0.0055\n", "20 6 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.15 0.5697 0.5657 0.0041\n", "21 29 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.10 0.5696 0.5661 0.0035\n", "22 25 0.0001 16 5 adam plateau 0.05 0.10 0.5695 0.5604 0.0092\n", "23 27 0.0001 16 5 adam plateau 0.10 0.10 0.5695 0.5604 0.0092\n", "24 10 0.0001 8 5 adam plateau 0.05 0.15 0.5694 0.5525 0.0170" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/bi/20.txt\",25)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(25))" ] }, { "cell_type": "code", "execution_count": 14, "id": "5c48bc72-cac8-4119-96ad-a1f09cfed996", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodropoutdevtestgap
0200.0001164adamplateau0.100.150.56960.56310.0065
1180.0001164adamplateau0.050.150.56960.56310.0065
2190.0001164adamplateau0.100.100.56540.56360.0017
3170.0001164adamplateau0.050.100.56540.56360.0017
4260.0001165adamplateau0.050.150.56190.55940.0025
5280.0001165adamplateau0.100.150.56190.55940.0025
6110.000185adamplateau0.100.100.56170.55490.0069
790.000185adamplateau0.050.100.56170.55490.0069
8150.000185adamhuggingface_cosine_with_restarts0.100.100.56090.55470.0062
9270.0001165adamplateau0.100.100.56070.55840.0023
10250.0001165adamplateau0.050.100.56070.55840.0023
11300.0001165adamhuggingface_cosine_with_restarts0.050.150.56010.5603-0.0001
12120.000185adamplateau0.100.150.55960.55510.0045
13100.000185adamplateau0.050.150.55960.55510.0045
14140.000185adamhuggingface_cosine_with_restarts0.050.150.55930.55670.0026
1510.000184adamplateau0.050.100.55890.55530.0036
1630.000184adamplateau0.100.100.55890.55530.0036
17160.000185adamhuggingface_cosine_with_restarts0.100.150.55880.55470.0041
1850.000184adamhuggingface_cosine_with_restarts0.050.100.55880.55510.0037
19130.000185adamhuggingface_cosine_with_restarts0.050.100.55860.55200.0066
20210.0001164adamhuggingface_cosine_with_restarts0.050.100.55830.5599-0.0017
21320.0001165adamhuggingface_cosine_with_restarts0.100.150.55780.5591-0.0012
2260.000184adamhuggingface_cosine_with_restarts0.050.150.55760.5588-0.0012
2380.000184adamhuggingface_cosine_with_restarts0.100.150.55740.55220.0052
24310.0001165adamhuggingface_cosine_with_restarts0.100.100.55660.55230.0043
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dropout dev test gap\n", "0 20 0.0001 16 4 adam plateau 0.10 0.15 0.5696 0.5631 0.0065\n", "1 18 0.0001 16 4 adam plateau 0.05 0.15 0.5696 0.5631 0.0065\n", "2 19 0.0001 16 4 adam plateau 0.10 0.10 0.5654 0.5636 0.0017\n", "3 17 0.0001 16 4 adam plateau 0.05 0.10 0.5654 0.5636 0.0017\n", "4 26 0.0001 16 5 adam plateau 0.05 0.15 0.5619 0.5594 0.0025\n", "5 28 0.0001 16 5 adam plateau 0.10 0.15 0.5619 0.5594 0.0025\n", "6 11 0.0001 8 5 adam plateau 0.10 0.10 0.5617 0.5549 0.0069\n", "7 9 0.0001 8 5 adam plateau 0.05 0.10 0.5617 0.5549 0.0069\n", "8 15 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.10 0.5609 0.5547 0.0062\n", "9 27 0.0001 16 5 adam plateau 0.10 0.10 0.5607 0.5584 0.0023\n", "10 25 0.0001 16 5 adam plateau 0.05 0.10 0.5607 0.5584 0.0023\n", "11 30 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.15 0.5601 0.5603 -0.0001\n", "12 12 0.0001 8 5 adam plateau 0.10 0.15 0.5596 0.5551 0.0045\n", "13 10 0.0001 8 5 adam plateau 0.05 0.15 0.5596 0.5551 0.0045\n", "14 14 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.15 0.5593 0.5567 0.0026\n", "15 1 0.0001 8 4 adam plateau 0.05 0.10 0.5589 0.5553 0.0036\n", "16 3 0.0001 8 4 adam plateau 0.10 0.10 0.5589 0.5553 0.0036\n", "17 16 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.15 0.5588 0.5547 0.0041\n", "18 5 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5588 0.5551 0.0037\n", "19 13 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.10 0.5586 0.5520 0.0066\n", "20 21 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5583 0.5599 -0.0017\n", "21 32 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.15 0.5578 0.5591 -0.0012\n", "22 6 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.15 0.5576 0.5588 -0.0012\n", "23 8 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.15 0.5574 0.5522 0.0052\n", "24 31 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.10 0.5566 0.5523 0.0043" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/bi/30.txt\",25)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(25))" ] }, { "cell_type": "code", "execution_count": 15, "id": "a93f7c97-3e3c-4e6f-8bcb-454480aae5f8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodropoutdevtestgap
0190.0001164adamplateau0.100.100.58220.56710.0151
1170.0001164adamplateau0.050.100.58220.56710.0151
220.000184adamplateau0.050.150.57290.56430.0086
340.000184adamplateau0.100.150.57290.56430.0086
4200.0001164adamplateau0.100.150.57270.56010.0126
5180.0001164adamplateau0.050.150.57270.56010.0126
660.000184adamhuggingface_cosine_with_restarts0.050.150.56980.56260.0072
780.000184adamhuggingface_cosine_with_restarts0.100.150.56970.56230.0074
8110.000185adamplateau0.100.100.56910.56220.0069
990.000185adamplateau0.050.100.56910.56220.0069
10220.0001164adamhuggingface_cosine_with_restarts0.050.150.56810.56410.0039
11240.0001164adamhuggingface_cosine_with_restarts0.100.150.56760.56490.0027
12260.0001165adamplateau0.050.150.56750.56290.0045
13280.0001165adamplateau0.100.150.56750.56290.0045
1430.000184adamplateau0.100.100.56750.56130.0062
1510.000184adamplateau0.050.100.56750.56130.0062
1650.000184adamhuggingface_cosine_with_restarts0.050.100.56630.55750.0088
17250.0001165adamplateau0.050.100.56550.56180.0037
18270.0001165adamplateau0.100.100.56550.56180.0037
1970.000184adamhuggingface_cosine_with_restarts0.100.100.56440.56250.0019
20230.0001164adamhuggingface_cosine_with_restarts0.100.100.56410.5656-0.0015
21320.0001165adamhuggingface_cosine_with_restarts0.100.150.56340.55650.0069
22210.0001164adamhuggingface_cosine_with_restarts0.050.100.56320.5638-0.0006
23300.0001165adamhuggingface_cosine_with_restarts0.050.150.56270.56070.0020
24120.000185adamplateau0.100.150.56220.55850.0038
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dropout dev test gap\n", "0 19 0.0001 16 4 adam plateau 0.10 0.10 0.5822 0.5671 0.0151\n", "1 17 0.0001 16 4 adam plateau 0.05 0.10 0.5822 0.5671 0.0151\n", "2 2 0.0001 8 4 adam plateau 0.05 0.15 0.5729 0.5643 0.0086\n", "3 4 0.0001 8 4 adam plateau 0.10 0.15 0.5729 0.5643 0.0086\n", "4 20 0.0001 16 4 adam plateau 0.10 0.15 0.5727 0.5601 0.0126\n", "5 18 0.0001 16 4 adam plateau 0.05 0.15 0.5727 0.5601 0.0126\n", "6 6 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.15 0.5698 0.5626 0.0072\n", "7 8 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.15 0.5697 0.5623 0.0074\n", "8 11 0.0001 8 5 adam plateau 0.10 0.10 0.5691 0.5622 0.0069\n", "9 9 0.0001 8 5 adam plateau 0.05 0.10 0.5691 0.5622 0.0069\n", "10 22 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.15 0.5681 0.5641 0.0039\n", "11 24 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.15 0.5676 0.5649 0.0027\n", "12 26 0.0001 16 5 adam plateau 0.05 0.15 0.5675 0.5629 0.0045\n", "13 28 0.0001 16 5 adam plateau 0.10 0.15 0.5675 0.5629 0.0045\n", "14 3 0.0001 8 4 adam plateau 0.10 0.10 0.5675 0.5613 0.0062\n", "15 1 0.0001 8 4 adam plateau 0.05 0.10 0.5675 0.5613 0.0062\n", "16 5 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5663 0.5575 0.0088\n", "17 25 0.0001 16 5 adam plateau 0.05 0.10 0.5655 0.5618 0.0037\n", "18 27 0.0001 16 5 adam plateau 0.10 0.10 0.5655 0.5618 0.0037\n", "19 7 0.0001 8 4 adam huggingface_cosine_with_restarts 0.10 0.10 0.5644 0.5625 0.0019\n", "20 23 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.10 0.5641 0.5656 -0.0015\n", "21 32 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.15 0.5634 0.5565 0.0069\n", "22 21 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5632 0.5638 -0.0006\n", "23 30 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.15 0.5627 0.5607 0.0020\n", "24 12 0.0001 8 5 adam plateau 0.10 0.15 0.5622 0.5585 0.0038" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/bi/40.txt\",25)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(25))" ] }, { "cell_type": "code", "execution_count": 16, "id": "a488eb61-89a5-4c79-82a6-3980a5621fdd", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numberoptimizerscheduler_typewarmup_ratiodropoutdevtestgap
0190.0001164adamplateau0.100.100.57230.56760.0047
1170.0001164adamplateau0.050.100.57230.56760.0047
2200.0001164adamplateau0.100.150.57210.56910.0031
3180.0001164adamplateau0.050.150.57210.56910.0031
4300.0001165adamhuggingface_cosine_with_restarts0.050.150.57070.56540.0053
5210.0001164adamhuggingface_cosine_with_restarts0.050.100.57010.56300.0071
6290.0001165adamhuggingface_cosine_with_restarts0.050.100.57000.56950.0005
7240.0001164adamhuggingface_cosine_with_restarts0.100.150.56950.56290.0066
8270.0001165adamplateau0.100.100.56780.5712-0.0034
9250.0001165adamplateau0.050.100.56780.5712-0.0034
10130.000185adamhuggingface_cosine_with_restarts0.050.100.56640.55710.0092
1120.000184adamplateau0.050.150.56630.56300.0033
1240.000184adamplateau0.100.150.56630.56300.0033
13160.000185adamhuggingface_cosine_with_restarts0.100.150.56560.56300.0026
14110.000185adamplateau0.100.100.56500.55840.0066
1590.000185adamplateau0.050.100.56500.55840.0066
16230.0001164adamhuggingface_cosine_with_restarts0.100.100.56480.55950.0053
1750.000184adamhuggingface_cosine_with_restarts0.050.100.56470.55860.0061
18260.0001165adamplateau0.050.150.56430.55750.0067
19280.0001165adamplateau0.100.150.56430.55750.0067
2060.000184adamhuggingface_cosine_with_restarts0.050.150.56380.56080.0031
21140.000185adamhuggingface_cosine_with_restarts0.050.150.56330.56240.0009
22100.000185adamplateau0.050.150.56320.56030.0028
23120.000185adamplateau0.100.150.56320.56030.0028
24320.0001165adamhuggingface_cosine_with_restarts0.100.150.56320.5644-0.0012
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number optimizer scheduler_type warmup_ratio dropout dev test gap\n", "0 19 0.0001 16 4 adam plateau 0.10 0.10 0.5723 0.5676 0.0047\n", "1 17 0.0001 16 4 adam plateau 0.05 0.10 0.5723 0.5676 0.0047\n", "2 20 0.0001 16 4 adam plateau 0.10 0.15 0.5721 0.5691 0.0031\n", "3 18 0.0001 16 4 adam plateau 0.05 0.15 0.5721 0.5691 0.0031\n", "4 30 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.15 0.5707 0.5654 0.0053\n", "5 21 0.0001 16 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5701 0.5630 0.0071\n", "6 29 0.0001 16 5 adam huggingface_cosine_with_restarts 0.05 0.10 0.5700 0.5695 0.0005\n", "7 24 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.15 0.5695 0.5629 0.0066\n", "8 27 0.0001 16 5 adam plateau 0.10 0.10 0.5678 0.5712 -0.0034\n", "9 25 0.0001 16 5 adam plateau 0.05 0.10 0.5678 0.5712 -0.0034\n", "10 13 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.10 0.5664 0.5571 0.0092\n", "11 2 0.0001 8 4 adam plateau 0.05 0.15 0.5663 0.5630 0.0033\n", "12 4 0.0001 8 4 adam plateau 0.10 0.15 0.5663 0.5630 0.0033\n", "13 16 0.0001 8 5 adam huggingface_cosine_with_restarts 0.10 0.15 0.5656 0.5630 0.0026\n", "14 11 0.0001 8 5 adam plateau 0.10 0.10 0.5650 0.5584 0.0066\n", "15 9 0.0001 8 5 adam plateau 0.05 0.10 0.5650 0.5584 0.0066\n", "16 23 0.0001 16 4 adam huggingface_cosine_with_restarts 0.10 0.10 0.5648 0.5595 0.0053\n", "17 5 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.10 0.5647 0.5586 0.0061\n", "18 26 0.0001 16 5 adam plateau 0.05 0.15 0.5643 0.5575 0.0067\n", "19 28 0.0001 16 5 adam plateau 0.10 0.15 0.5643 0.5575 0.0067\n", "20 6 0.0001 8 4 adam huggingface_cosine_with_restarts 0.05 0.15 0.5638 0.5608 0.0031\n", "21 14 0.0001 8 5 adam huggingface_cosine_with_restarts 0.05 0.15 0.5633 0.5624 0.0009\n", "22 10 0.0001 8 5 adam plateau 0.05 0.15 0.5632 0.5603 0.0028\n", "23 12 0.0001 8 5 adam plateau 0.10 0.15 0.5632 0.5603 0.0028\n", "24 32 0.0001 16 5 adam huggingface_cosine_with_restarts 0.10 0.15 0.5632 0.5644 -0.0012" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/bi/50.txt\",25)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(25))" ] }, { "cell_type": "code", "execution_count": 17, "id": "1e7603d5-f30b-4304-b6b6-a59daf1f5356", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stepoptimizerlrweight_decaynum_transformer_headstr_layer_numberhidden_dimout_featuresdevtestgap
021adam0.000100.001652562560.58620.58200.0043
1262lion0.000010.01852565120.58470.57280.0120
2226lion0.000010.00852565120.58470.57300.0117
3128adam0.000010.011645125120.58450.57310.0114
492adam0.000010.001645125120.58400.57680.0072
5104adam0.000010.003245125120.58340.57430.0091
65adam0.000100.00842562560.58340.57400.0094
7127adam0.000010.011645122560.58330.57590.0074
880adam0.000010.00845125120.58260.57390.0087
9139adam0.000010.013245122560.58200.57360.0085
10103adam0.000010.003245122560.58200.57650.0055
1134adam0.000100.003252565120.58150.57880.0027
12108adam0.000010.003255125120.58140.57960.0018
13245lion0.000010.003242562560.58130.57580.0055
14233lion0.000010.001642562560.58120.57230.0088
1533adam0.000100.003252562560.58110.57920.0020
16281lion0.000010.013242562560.58080.57590.0050
17230lion0.000010.001632565120.58060.57140.0092
1849adam0.000100.011632562560.58050.57290.0076
19266lion0.000010.011632565120.58030.57180.0085
20116adam0.000010.01845125120.58020.57160.0085
219adam0.000100.00852562560.58010.57620.0039
22115adam0.000010.01845122560.57990.57190.0080
2322adam0.000100.001652565120.57980.57140.0084
2479adam0.000010.00845122560.57970.57590.0038
\n", "
" ], "text/plain": [ " step optimizer lr weight_decay num_transformer_heads tr_layer_number hidden_dim out_features dev test gap\n", "0 21 adam 0.00010 0.00 16 5 256 256 0.5862 0.5820 0.0043\n", "1 262 lion 0.00001 0.01 8 5 256 512 0.5847 0.5728 0.0120\n", "2 226 lion 0.00001 0.00 8 5 256 512 0.5847 0.5730 0.0117\n", "3 128 adam 0.00001 0.01 16 4 512 512 0.5845 0.5731 0.0114\n", "4 92 adam 0.00001 0.00 16 4 512 512 0.5840 0.5768 0.0072\n", "5 104 adam 0.00001 0.00 32 4 512 512 0.5834 0.5743 0.0091\n", "6 5 adam 0.00010 0.00 8 4 256 256 0.5834 0.5740 0.0094\n", "7 127 adam 0.00001 0.01 16 4 512 256 0.5833 0.5759 0.0074\n", "8 80 adam 0.00001 0.00 8 4 512 512 0.5826 0.5739 0.0087\n", "9 139 adam 0.00001 0.01 32 4 512 256 0.5820 0.5736 0.0085\n", "10 103 adam 0.00001 0.00 32 4 512 256 0.5820 0.5765 0.0055\n", "11 34 adam 0.00010 0.00 32 5 256 512 0.5815 0.5788 0.0027\n", "12 108 adam 0.00001 0.00 32 5 512 512 0.5814 0.5796 0.0018\n", "13 245 lion 0.00001 0.00 32 4 256 256 0.5813 0.5758 0.0055\n", "14 233 lion 0.00001 0.00 16 4 256 256 0.5812 0.5723 0.0088\n", "15 33 adam 0.00010 0.00 32 5 256 256 0.5811 0.5792 0.0020\n", "16 281 lion 0.00001 0.01 32 4 256 256 0.5808 0.5759 0.0050\n", "17 230 lion 0.00001 0.00 16 3 256 512 0.5806 0.5714 0.0092\n", "18 49 adam 0.00010 0.01 16 3 256 256 0.5805 0.5729 0.0076\n", "19 266 lion 0.00001 0.01 16 3 256 512 0.5803 0.5718 0.0085\n", "20 116 adam 0.00001 0.01 8 4 512 512 0.5802 0.5716 0.0085\n", "21 9 adam 0.00010 0.00 8 5 256 256 0.5801 0.5762 0.0039\n", "22 115 adam 0.00001 0.01 8 4 512 256 0.5799 0.5719 0.0080\n", "23 22 adam 0.00010 0.00 16 5 256 512 0.5798 0.5714 0.0084\n", "24 79 adam 0.00001 0.00 8 4 512 256 0.5797 0.5759 0.0038" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/5862_адам лучший.txt\",25)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(25))" ] }, { "cell_type": "code", "execution_count": 18, "id": "22d74f6b-4d7f-4a1a-a98b-6754c286712d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrmomentumnum_transformer_headstr_layer_numberhidden_dimout_featuresdevtestgap
050.00100.51642562560.58620.58410.0021
180.00100.9842562560.58570.56820.0175
2230.00010.91642562560.58070.57640.0043
320.00100.5842562560.58010.57070.0094
4110.00100.91642562560.57910.57320.0059
5200.00010.9842562560.57500.57380.0013
6240.00010.91652562560.56990.5715-0.0016
730.00100.5852562560.56960.5705-0.0009
8100.00100.91632562560.56880.55940.0094
9120.00100.91652562560.56720.5726-0.0054
1090.00100.9852562560.56570.5705-0.0049
11210.00010.9852562560.56520.5696-0.0045
1260.00100.51652562560.56430.5718-0.0075
1370.00100.9832562560.56120.55860.0026
1440.00100.51632562560.55800.5624-0.0044
15140.00010.5842562560.55690.5644-0.0075
16170.00010.51642562560.55670.55310.0036
17150.00010.5852562560.55520.54990.0053
1810.00100.5832562560.54960.5565-0.0069
19180.00010.51652562560.54770.5486-0.0009
20130.00010.5832562560.53610.5440-0.0078
21190.00010.9832562560.53590.5513-0.0154
22220.00010.91632562560.53390.5474-0.0135
23160.00010.51632562560.53130.5505-0.0192
\n", "
" ], "text/plain": [ " step lr momentum num_transformer_heads tr_layer_number hidden_dim out_features dev test gap\n", "0 5 0.0010 0.5 16 4 256 256 0.5862 0.5841 0.0021\n", "1 8 0.0010 0.9 8 4 256 256 0.5857 0.5682 0.0175\n", "2 23 0.0001 0.9 16 4 256 256 0.5807 0.5764 0.0043\n", "3 2 0.0010 0.5 8 4 256 256 0.5801 0.5707 0.0094\n", "4 11 0.0010 0.9 16 4 256 256 0.5791 0.5732 0.0059\n", "5 20 0.0001 0.9 8 4 256 256 0.5750 0.5738 0.0013\n", "6 24 0.0001 0.9 16 5 256 256 0.5699 0.5715 -0.0016\n", "7 3 0.0010 0.5 8 5 256 256 0.5696 0.5705 -0.0009\n", "8 10 0.0010 0.9 16 3 256 256 0.5688 0.5594 0.0094\n", "9 12 0.0010 0.9 16 5 256 256 0.5672 0.5726 -0.0054\n", "10 9 0.0010 0.9 8 5 256 256 0.5657 0.5705 -0.0049\n", "11 21 0.0001 0.9 8 5 256 256 0.5652 0.5696 -0.0045\n", "12 6 0.0010 0.5 16 5 256 256 0.5643 0.5718 -0.0075\n", "13 7 0.0010 0.9 8 3 256 256 0.5612 0.5586 0.0026\n", "14 4 0.0010 0.5 16 3 256 256 0.5580 0.5624 -0.0044\n", "15 14 0.0001 0.5 8 4 256 256 0.5569 0.5644 -0.0075\n", "16 17 0.0001 0.5 16 4 256 256 0.5567 0.5531 0.0036\n", "17 15 0.0001 0.5 8 5 256 256 0.5552 0.5499 0.0053\n", "18 1 0.0010 0.5 8 3 256 256 0.5496 0.5565 -0.0069\n", "19 18 0.0001 0.5 16 5 256 256 0.5477 0.5486 -0.0009\n", "20 13 0.0001 0.5 8 3 256 256 0.5361 0.5440 -0.0078\n", "21 19 0.0001 0.9 8 3 256 256 0.5359 0.5513 -0.0154\n", "22 22 0.0001 0.9 16 3 256 256 0.5339 0.5474 -0.0135\n", "23 16 0.0001 0.5 16 3 256 256 0.5313 0.5505 -0.0192" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/sgd_2.txt\",25)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(25))" ] }, { "cell_type": "code", "execution_count": 14, "id": "35ae96f7-7ceb-46da-9411-ceba258a98ac", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stepoptimizerlrnum_transformer_headstr_layer_numberhidden_dimout_featuresscheduler_typewarmup_ratiodevtestgap
068adam0.0000185256256plateau0.100.57680.5797-0.0029
167adam0.0000185256256plateau0.050.57680.5797-0.0029
292adam0.00001165256256plateau0.100.57420.5771-0.0028
391adam0.00001165256256plateau0.050.57420.5771-0.0028
465adam0.0000185256256huggingface_cosine_with_restarts0.050.57370.5766-0.0029
513adam0.0001084256512huggingface_cosine_with_restarts0.050.57490.5765-0.0015
634adam0.00010164256256huggingface_cosine_with_restarts0.100.57740.57600.0014
766adam0.0000185256256huggingface_cosine_with_restarts0.100.57650.57600.0005
833adam0.00010164256256huggingface_cosine_with_restarts0.050.57590.57570.0002
946adam0.00010165256512huggingface_cosine_with_restarts0.100.57840.57520.0032
1037adam0.00010164256512huggingface_cosine_with_restarts0.050.57620.57440.0018
1118adam0.0001085256256huggingface_cosine_with_restarts0.100.57300.57240.0006
1290adam0.00001165256256huggingface_cosine_with_restarts0.100.57280.57200.0008
1326adam0.00010163256256huggingface_cosine_with_restarts0.100.57810.57130.0068
1489adam0.00001165256256huggingface_cosine_with_restarts0.050.57100.5711-0.0001
1551adam0.0000183256256plateau0.050.57610.57010.0060
1652adam0.0000183256256plateau0.100.57610.57010.0060
179adam0.0001084256256huggingface_cosine_with_restarts0.050.57810.57000.0080
1819adam0.0001085256256plateau0.050.57430.56980.0045
1920adam0.0001085256256plateau0.100.57430.56980.0045
2017adam0.0001085256256huggingface_cosine_with_restarts0.050.56700.5697-0.0027
2173adam0.00001163256256huggingface_cosine_with_restarts0.050.57570.56970.0060
2244adam0.00010165256256plateau0.100.57790.56940.0085
2343adam0.00010165256256plateau0.050.57790.56940.0085
2482adam0.00001164256256huggingface_cosine_with_restarts0.100.57800.56930.0087
2581adam0.00001164256256huggingface_cosine_with_restarts0.050.57730.56920.0082
2641adam0.00010165256256huggingface_cosine_with_restarts0.050.57200.56890.0031
2716adam0.0001084256512plateau0.100.57830.56870.0096
2815adam0.0001084256512plateau0.050.57830.56870.0096
2942adam0.00010165256256huggingface_cosine_with_restarts0.100.57460.56860.0060
3085adam0.00001164256512huggingface_cosine_with_restarts0.050.57330.56850.0048
3138adam0.00010164256512huggingface_cosine_with_restarts0.100.57280.56820.0046
3294adam0.00001165256512huggingface_cosine_with_restarts0.100.57540.56760.0078
3370adam0.0000185256512huggingface_cosine_with_restarts0.100.57130.56750.0038
3486adam0.00001164256512huggingface_cosine_with_restarts0.100.57080.56750.0033
3550adam0.0000183256256huggingface_cosine_with_restarts0.100.57710.56740.0097
3630adam0.00010163256512huggingface_cosine_with_restarts0.100.57110.56740.0037
3758adam0.0000184256256huggingface_cosine_with_restarts0.100.57290.56730.0056
3848adam0.00010165256512plateau0.100.57150.56660.0049
3947adam0.00010165256512plateau0.050.57150.56660.0049
4035adam0.00010164256256plateau0.050.57670.56620.0105
4136adam0.00010164256256plateau0.100.57670.56620.0105
4210adam0.0001084256256huggingface_cosine_with_restarts0.100.57340.56610.0073
4383adam0.00001164256256plateau0.050.57420.56600.0081
4484adam0.00001164256256plateau0.100.57420.56600.0081
4557adam0.0000184256256huggingface_cosine_with_restarts0.050.57170.56580.0058
4675adam0.00001163256256plateau0.050.57170.56560.0061
4771adam0.0000185256512plateau0.050.57490.56560.0094
4872adam0.0000185256512plateau0.100.57490.56560.0094
4949adam0.0000183256256huggingface_cosine_with_restarts0.050.57860.56560.0130
\n", "
" ], "text/plain": [ " step optimizer lr num_transformer_heads tr_layer_number hidden_dim out_features scheduler_type warmup_ratio dev test \\\n", "0 68 adam 0.00001 8 5 256 256 plateau 0.10 0.5768 0.5797 \n", "1 67 adam 0.00001 8 5 256 256 plateau 0.05 0.5768 0.5797 \n", "2 92 adam 0.00001 16 5 256 256 plateau 0.10 0.5742 0.5771 \n", "3 91 adam 0.00001 16 5 256 256 plateau 0.05 0.5742 0.5771 \n", "4 65 adam 0.00001 8 5 256 256 huggingface_cosine_with_restarts 0.05 0.5737 0.5766 \n", "5 13 adam 0.00010 8 4 256 512 huggingface_cosine_with_restarts 0.05 0.5749 0.5765 \n", "6 34 adam 0.00010 16 4 256 256 huggingface_cosine_with_restarts 0.10 0.5774 0.5760 \n", "7 66 adam 0.00001 8 5 256 256 huggingface_cosine_with_restarts 0.10 0.5765 0.5760 \n", "8 33 adam 0.00010 16 4 256 256 huggingface_cosine_with_restarts 0.05 0.5759 0.5757 \n", "9 46 adam 0.00010 16 5 256 512 huggingface_cosine_with_restarts 0.10 0.5784 0.5752 \n", "10 37 adam 0.00010 16 4 256 512 huggingface_cosine_with_restarts 0.05 0.5762 0.5744 \n", "11 18 adam 0.00010 8 5 256 256 huggingface_cosine_with_restarts 0.10 0.5730 0.5724 \n", "12 90 adam 0.00001 16 5 256 256 huggingface_cosine_with_restarts 0.10 0.5728 0.5720 \n", "13 26 adam 0.00010 16 3 256 256 huggingface_cosine_with_restarts 0.10 0.5781 0.5713 \n", "14 89 adam 0.00001 16 5 256 256 huggingface_cosine_with_restarts 0.05 0.5710 0.5711 \n", "15 51 adam 0.00001 8 3 256 256 plateau 0.05 0.5761 0.5701 \n", "16 52 adam 0.00001 8 3 256 256 plateau 0.10 0.5761 0.5701 \n", "17 9 adam 0.00010 8 4 256 256 huggingface_cosine_with_restarts 0.05 0.5781 0.5700 \n", "18 19 adam 0.00010 8 5 256 256 plateau 0.05 0.5743 0.5698 \n", "19 20 adam 0.00010 8 5 256 256 plateau 0.10 0.5743 0.5698 \n", "20 17 adam 0.00010 8 5 256 256 huggingface_cosine_with_restarts 0.05 0.5670 0.5697 \n", "21 73 adam 0.00001 16 3 256 256 huggingface_cosine_with_restarts 0.05 0.5757 0.5697 \n", "22 44 adam 0.00010 16 5 256 256 plateau 0.10 0.5779 0.5694 \n", "23 43 adam 0.00010 16 5 256 256 plateau 0.05 0.5779 0.5694 \n", "24 82 adam 0.00001 16 4 256 256 huggingface_cosine_with_restarts 0.10 0.5780 0.5693 \n", "25 81 adam 0.00001 16 4 256 256 huggingface_cosine_with_restarts 0.05 0.5773 0.5692 \n", "26 41 adam 0.00010 16 5 256 256 huggingface_cosine_with_restarts 0.05 0.5720 0.5689 \n", "27 16 adam 0.00010 8 4 256 512 plateau 0.10 0.5783 0.5687 \n", "28 15 adam 0.00010 8 4 256 512 plateau 0.05 0.5783 0.5687 \n", "29 42 adam 0.00010 16 5 256 256 huggingface_cosine_with_restarts 0.10 0.5746 0.5686 \n", "30 85 adam 0.00001 16 4 256 512 huggingface_cosine_with_restarts 0.05 0.5733 0.5685 \n", "31 38 adam 0.00010 16 4 256 512 huggingface_cosine_with_restarts 0.10 0.5728 0.5682 \n", "32 94 adam 0.00001 16 5 256 512 huggingface_cosine_with_restarts 0.10 0.5754 0.5676 \n", "33 70 adam 0.00001 8 5 256 512 huggingface_cosine_with_restarts 0.10 0.5713 0.5675 \n", "34 86 adam 0.00001 16 4 256 512 huggingface_cosine_with_restarts 0.10 0.5708 0.5675 \n", "35 50 adam 0.00001 8 3 256 256 huggingface_cosine_with_restarts 0.10 0.5771 0.5674 \n", "36 30 adam 0.00010 16 3 256 512 huggingface_cosine_with_restarts 0.10 0.5711 0.5674 \n", "37 58 adam 0.00001 8 4 256 256 huggingface_cosine_with_restarts 0.10 0.5729 0.5673 \n", "38 48 adam 0.00010 16 5 256 512 plateau 0.10 0.5715 0.5666 \n", "39 47 adam 0.00010 16 5 256 512 plateau 0.05 0.5715 0.5666 \n", "40 35 adam 0.00010 16 4 256 256 plateau 0.05 0.5767 0.5662 \n", "41 36 adam 0.00010 16 4 256 256 plateau 0.10 0.5767 0.5662 \n", "42 10 adam 0.00010 8 4 256 256 huggingface_cosine_with_restarts 0.10 0.5734 0.5661 \n", "43 83 adam 0.00001 16 4 256 256 plateau 0.05 0.5742 0.5660 \n", "44 84 adam 0.00001 16 4 256 256 plateau 0.10 0.5742 0.5660 \n", "45 57 adam 0.00001 8 4 256 256 huggingface_cosine_with_restarts 0.05 0.5717 0.5658 \n", "46 75 adam 0.00001 16 3 256 256 plateau 0.05 0.5717 0.5656 \n", "47 71 adam 0.00001 8 5 256 512 plateau 0.05 0.5749 0.5656 \n", "48 72 adam 0.00001 8 5 256 512 plateau 0.10 0.5749 0.5656 \n", "49 49 adam 0.00001 8 3 256 256 huggingface_cosine_with_restarts 0.05 0.5786 0.5656 \n", "\n", " gap \n", "0 -0.0029 \n", "1 -0.0029 \n", "2 -0.0028 \n", "3 -0.0028 \n", "4 -0.0029 \n", "5 -0.0015 \n", "6 0.0014 \n", "7 0.0005 \n", "8 0.0002 \n", "9 0.0032 \n", "10 0.0018 \n", "11 0.0006 \n", "12 0.0008 \n", "13 0.0068 \n", "14 -0.0001 \n", "15 0.0060 \n", "16 0.0060 \n", "17 0.0080 \n", "18 0.0045 \n", "19 0.0045 \n", "20 -0.0027 \n", "21 0.0060 \n", "22 0.0085 \n", "23 0.0085 \n", "24 0.0087 \n", "25 0.0082 \n", "26 0.0031 \n", "27 0.0096 \n", "28 0.0096 \n", "29 0.0060 \n", "30 0.0048 \n", "31 0.0046 \n", "32 0.0078 \n", "33 0.0038 \n", "34 0.0033 \n", "35 0.0097 \n", "36 0.0037 \n", "37 0.0056 \n", "38 0.0049 \n", "39 0.0049 \n", "40 0.0105 \n", "41 0.0105 \n", "42 0.0073 \n", "43 0.0081 \n", "44 0.0081 \n", "45 0.0058 \n", "46 0.0061 \n", "47 0.0094 \n", "48 0.0094 \n", "49 0.0130 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/biformer.txt\",50)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(50))" ] }, { "cell_type": "code", "execution_count": 13, "id": "5832fe53-8308-4195-bb94-db7a86148076", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stepoptimizerlrnum_transformer_headstr_layer_numberhidden_dimout_featuresscheduler_typewarmup_ratiodevtestgap
028adam0.00010163256256plateau0.100.57500.5758-0.0008
127adam0.00010163256256plateau0.050.57500.5758-0.0008
225adam0.00010163256256huggingface_cosine_with_restarts0.050.57210.57160.0005
356adam0.0000183256512plateau0.100.57170.57050.0012
455adam0.0000183256512plateau0.050.57170.57050.0012
517adam0.0001085256256huggingface_cosine_with_restarts0.050.56830.5690-0.0006
626adam0.00010163256256huggingface_cosine_with_restarts0.100.57580.56870.0071
737adam0.00010164256512huggingface_cosine_with_restarts0.050.56740.5676-0.0002
844adam0.00010165256256plateau0.100.56810.56710.0010
943adam0.00010165256256plateau0.050.56810.56710.0010
1031adam0.00010163256512plateau0.050.56240.5659-0.0035
1132adam0.00010163256512plateau0.100.56240.5659-0.0035
122adam0.0001083256256huggingface_cosine_with_restarts0.100.56610.56560.0005
1380adam0.00001163256512plateau0.100.56890.56500.0040
1479adam0.00001163256512plateau0.050.56890.56500.0040
1553adam0.0000183256512huggingface_cosine_with_restarts0.050.56650.56490.0016
1677adam0.00001163256512huggingface_cosine_with_restarts0.050.56900.56490.0041
171adam0.0001083256256huggingface_cosine_with_restarts0.050.56390.5648-0.0009
1878adam0.00001163256512huggingface_cosine_with_restarts0.100.56560.56450.0011
1962adam0.0000184256512huggingface_cosine_with_restarts0.100.56490.56440.0005
2046adam0.00010165256512huggingface_cosine_with_restarts0.100.55960.5638-0.0042
2114adam0.0001084256512huggingface_cosine_with_restarts0.100.57220.56310.0091
2295adam0.00001165256512plateau0.050.56540.56310.0023
2396adam0.00001165256512plateau0.100.56540.56310.0023
2450adam0.0000183256256huggingface_cosine_with_restarts0.100.57020.56290.0073
2573adam0.00001163256256huggingface_cosine_with_restarts0.050.56740.56280.0046
266adam0.0001083256512huggingface_cosine_with_restarts0.100.57080.56260.0082
2761adam0.0000184256512huggingface_cosine_with_restarts0.050.56490.56240.0025
2874adam0.00001163256256huggingface_cosine_with_restarts0.100.56760.56230.0053
2991adam0.00001165256256plateau0.050.55860.5622-0.0036
3092adam0.00001165256256plateau0.100.55860.5622-0.0036
318adam0.0001083256512plateau0.100.56690.56160.0053
3238adam0.00010164256512huggingface_cosine_with_restarts0.100.56400.56160.0024
337adam0.0001083256512plateau0.050.56690.56160.0053
3454adam0.0000183256512huggingface_cosine_with_restarts0.100.56460.56150.0031
3593adam0.00001165256512huggingface_cosine_with_restarts0.050.56730.56120.0061
364adam0.0001083256256plateau0.100.56650.56100.0054
373adam0.0001083256256plateau0.050.56650.56100.0054
3849adam0.0000183256256huggingface_cosine_with_restarts0.050.57060.56090.0097
3975adam0.00001163256256plateau0.050.56560.56060.0050
4076adam0.00001163256256plateau0.100.56560.56060.0050
4142adam0.00010165256256huggingface_cosine_with_restarts0.100.56660.56030.0064
4289adam0.00001165256256huggingface_cosine_with_restarts0.050.56160.56000.0017
4310adam0.0001084256256huggingface_cosine_with_restarts0.100.56510.55970.0053
4470adam0.0000185256512huggingface_cosine_with_restarts0.100.56910.55940.0097
4534adam0.00010164256256huggingface_cosine_with_restarts0.100.56500.55890.0061
4647adam0.00010165256512plateau0.050.56130.55870.0026
4748adam0.00010165256512plateau0.100.56130.55870.0026
4839adam0.00010164256512plateau0.050.56120.55860.0026
4940adam0.00010164256512plateau0.100.56120.55860.0026
\n", "
" ], "text/plain": [ " step optimizer lr num_transformer_heads tr_layer_number hidden_dim out_features scheduler_type warmup_ratio dev test \\\n", "0 28 adam 0.00010 16 3 256 256 plateau 0.10 0.5750 0.5758 \n", "1 27 adam 0.00010 16 3 256 256 plateau 0.05 0.5750 0.5758 \n", "2 25 adam 0.00010 16 3 256 256 huggingface_cosine_with_restarts 0.05 0.5721 0.5716 \n", "3 56 adam 0.00001 8 3 256 512 plateau 0.10 0.5717 0.5705 \n", "4 55 adam 0.00001 8 3 256 512 plateau 0.05 0.5717 0.5705 \n", "5 17 adam 0.00010 8 5 256 256 huggingface_cosine_with_restarts 0.05 0.5683 0.5690 \n", "6 26 adam 0.00010 16 3 256 256 huggingface_cosine_with_restarts 0.10 0.5758 0.5687 \n", "7 37 adam 0.00010 16 4 256 512 huggingface_cosine_with_restarts 0.05 0.5674 0.5676 \n", "8 44 adam 0.00010 16 5 256 256 plateau 0.10 0.5681 0.5671 \n", "9 43 adam 0.00010 16 5 256 256 plateau 0.05 0.5681 0.5671 \n", "10 31 adam 0.00010 16 3 256 512 plateau 0.05 0.5624 0.5659 \n", "11 32 adam 0.00010 16 3 256 512 plateau 0.10 0.5624 0.5659 \n", "12 2 adam 0.00010 8 3 256 256 huggingface_cosine_with_restarts 0.10 0.5661 0.5656 \n", "13 80 adam 0.00001 16 3 256 512 plateau 0.10 0.5689 0.5650 \n", "14 79 adam 0.00001 16 3 256 512 plateau 0.05 0.5689 0.5650 \n", "15 53 adam 0.00001 8 3 256 512 huggingface_cosine_with_restarts 0.05 0.5665 0.5649 \n", "16 77 adam 0.00001 16 3 256 512 huggingface_cosine_with_restarts 0.05 0.5690 0.5649 \n", "17 1 adam 0.00010 8 3 256 256 huggingface_cosine_with_restarts 0.05 0.5639 0.5648 \n", "18 78 adam 0.00001 16 3 256 512 huggingface_cosine_with_restarts 0.10 0.5656 0.5645 \n", "19 62 adam 0.00001 8 4 256 512 huggingface_cosine_with_restarts 0.10 0.5649 0.5644 \n", "20 46 adam 0.00010 16 5 256 512 huggingface_cosine_with_restarts 0.10 0.5596 0.5638 \n", "21 14 adam 0.00010 8 4 256 512 huggingface_cosine_with_restarts 0.10 0.5722 0.5631 \n", "22 95 adam 0.00001 16 5 256 512 plateau 0.05 0.5654 0.5631 \n", "23 96 adam 0.00001 16 5 256 512 plateau 0.10 0.5654 0.5631 \n", "24 50 adam 0.00001 8 3 256 256 huggingface_cosine_with_restarts 0.10 0.5702 0.5629 \n", "25 73 adam 0.00001 16 3 256 256 huggingface_cosine_with_restarts 0.05 0.5674 0.5628 \n", "26 6 adam 0.00010 8 3 256 512 huggingface_cosine_with_restarts 0.10 0.5708 0.5626 \n", "27 61 adam 0.00001 8 4 256 512 huggingface_cosine_with_restarts 0.05 0.5649 0.5624 \n", "28 74 adam 0.00001 16 3 256 256 huggingface_cosine_with_restarts 0.10 0.5676 0.5623 \n", "29 91 adam 0.00001 16 5 256 256 plateau 0.05 0.5586 0.5622 \n", "30 92 adam 0.00001 16 5 256 256 plateau 0.10 0.5586 0.5622 \n", "31 8 adam 0.00010 8 3 256 512 plateau 0.10 0.5669 0.5616 \n", "32 38 adam 0.00010 16 4 256 512 huggingface_cosine_with_restarts 0.10 0.5640 0.5616 \n", "33 7 adam 0.00010 8 3 256 512 plateau 0.05 0.5669 0.5616 \n", "34 54 adam 0.00001 8 3 256 512 huggingface_cosine_with_restarts 0.10 0.5646 0.5615 \n", "35 93 adam 0.00001 16 5 256 512 huggingface_cosine_with_restarts 0.05 0.5673 0.5612 \n", "36 4 adam 0.00010 8 3 256 256 plateau 0.10 0.5665 0.5610 \n", "37 3 adam 0.00010 8 3 256 256 plateau 0.05 0.5665 0.5610 \n", "38 49 adam 0.00001 8 3 256 256 huggingface_cosine_with_restarts 0.05 0.5706 0.5609 \n", "39 75 adam 0.00001 16 3 256 256 plateau 0.05 0.5656 0.5606 \n", "40 76 adam 0.00001 16 3 256 256 plateau 0.10 0.5656 0.5606 \n", "41 42 adam 0.00010 16 5 256 256 huggingface_cosine_with_restarts 0.10 0.5666 0.5603 \n", "42 89 adam 0.00001 16 5 256 256 huggingface_cosine_with_restarts 0.05 0.5616 0.5600 \n", "43 10 adam 0.00010 8 4 256 256 huggingface_cosine_with_restarts 0.10 0.5651 0.5597 \n", "44 70 adam 0.00001 8 5 256 512 huggingface_cosine_with_restarts 0.10 0.5691 0.5594 \n", "45 34 adam 0.00010 16 4 256 256 huggingface_cosine_with_restarts 0.10 0.5650 0.5589 \n", "46 47 adam 0.00010 16 5 256 512 plateau 0.05 0.5613 0.5587 \n", "47 48 adam 0.00010 16 5 256 512 plateau 0.10 0.5613 0.5587 \n", "48 39 adam 0.00010 16 4 256 512 plateau 0.05 0.5612 0.5586 \n", "49 40 adam 0.00010 16 4 256 512 plateau 0.10 0.5612 0.5586 \n", "\n", " gap \n", "0 -0.0008 \n", "1 -0.0008 \n", "2 0.0005 \n", "3 0.0012 \n", "4 0.0012 \n", "5 -0.0006 \n", "6 0.0071 \n", "7 -0.0002 \n", "8 0.0010 \n", "9 0.0010 \n", "10 -0.0035 \n", "11 -0.0035 \n", "12 0.0005 \n", "13 0.0040 \n", "14 0.0040 \n", "15 0.0016 \n", "16 0.0041 \n", "17 -0.0009 \n", "18 0.0011 \n", "19 0.0005 \n", "20 -0.0042 \n", "21 0.0091 \n", "22 0.0023 \n", "23 0.0023 \n", "24 0.0073 \n", "25 0.0046 \n", "26 0.0082 \n", "27 0.0025 \n", "28 0.0053 \n", "29 -0.0036 \n", "30 -0.0036 \n", "31 0.0053 \n", "32 0.0024 \n", "33 0.0053 \n", "34 0.0031 \n", "35 0.0061 \n", "36 0.0054 \n", "37 0.0054 \n", "38 0.0097 \n", "39 0.0050 \n", "40 0.0050 \n", "41 0.0064 \n", "42 0.0017 \n", "43 0.0053 \n", "44 0.0097 \n", "45 0.0061 \n", "46 0.0026 \n", "47 0.0026 \n", "48 0.0026 \n", "49 0.0026 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/BiForm_wtb.txt\",50)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(50))" ] }, { "cell_type": "code", "execution_count": 5, "id": "0d46eb44-6c34-4cc9-8322-6eb476eb827c", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headssmoothing_probabilitymodel_namedevtestgap
0450.0000180.0BiFormer0.57680.5797-0.0029
1490.0000180.2BiFormer0.57530.5779-0.0026
2470.0000180.1BiFormer0.57450.5773-0.0028
3670.00001160.0BiFormer0.57420.5771-0.0028
4250.00010160.1BiFormer0.57790.57660.0013
5270.00010160.2BiFormer0.56950.5752-0.0057
6710.00001160.2BiFormer0.57040.5745-0.0041
7690.00001160.1BiFormer0.57130.5741-0.0029
8510.0000180.3BiFormer0.57510.57350.0016
9770.00001160.5BiFormer0.56460.5722-0.0076
10550.0000180.5BiFormer0.57080.5719-0.0011
11290.00010160.3BiFormer0.56960.5710-0.0014
12730.00001160.3BiFormer0.57080.57080.0000
13110.0001080.5BiFormer0.56750.5706-0.0031
14310.00010160.4BiFormer0.57200.57050.0016
15750.00001160.4BiFormer0.56740.5702-0.0028
1630.0001080.1BiFormer0.57410.56980.0042
1710.0001080.0BiFormer0.57430.56980.0045
18230.00010160.0BiFormer0.57790.56940.0085
19530.0000180.4BiFormer0.57430.56910.0052
20500.0000180.2BiFormerWithProb0.57220.56830.0039
21720.00001160.2BiFormerWithProb0.57330.56820.0051
22520.0000180.3BiFormerWithProb0.56730.5677-0.0004
23570.0000180.6BiFormer0.56740.5677-0.0003
24260.00010160.1BiFormerWithProb0.57290.56730.0056
25740.00001160.3BiFormerWithProb0.56960.56710.0025
26240.00010160.0BiFormerWithProb0.56810.56710.0010
27790.00001160.6BiFormer0.56230.5663-0.0040
2850.0001080.2BiFormer0.56870.56590.0028
29810.00001160.7BiFormer0.55960.5655-0.0059
30280.00010160.2BiFormerWithProb0.56490.5655-0.0006
31700.00001160.1BiFormerWithProb0.56840.56380.0046
32590.0000180.7BiFormer0.56820.56360.0046
33130.0001080.6BiFormer0.55930.5634-0.0042
3440.0001080.1BiFormerWithProb0.56450.56300.0014
3590.0001080.4BiFormer0.56840.56290.0055
36170.0001080.8BiFormer0.56230.56220.0001
37680.00001160.0BiFormerWithProb0.55860.5622-0.0036
38150.0001080.7BiFormer0.56250.56160.0009
3970.0001080.3BiFormer0.56890.56080.0081
40370.00010160.7BiFormer0.55900.5607-0.0017
41100.0001080.4BiFormerWithProb0.56360.56070.0029
42330.00010160.5BiFormer0.56270.56050.0022
43610.0000180.8BiFormer0.56100.56000.0010
44760.00001160.4BiFormerWithProb0.56520.55990.0053
45190.0001080.9BiFormer0.56150.55920.0024
46630.0000180.9BiFormer0.55730.5590-0.0017
47830.00001160.8BiFormer0.55660.5585-0.0019
4880.0001080.3BiFormerWithProb0.56260.55850.0041
4960.0001080.2BiFormerWithProb0.56120.55750.0037
\n", "
" ], "text/plain": [ " step lr num_transformer_heads smoothing_probability model_name dev test gap\n", "0 45 0.00001 8 0.0 BiFormer 0.5768 0.5797 -0.0029\n", "1 49 0.00001 8 0.2 BiFormer 0.5753 0.5779 -0.0026\n", "2 47 0.00001 8 0.1 BiFormer 0.5745 0.5773 -0.0028\n", "3 67 0.00001 16 0.0 BiFormer 0.5742 0.5771 -0.0028\n", "4 25 0.00010 16 0.1 BiFormer 0.5779 0.5766 0.0013\n", "5 27 0.00010 16 0.2 BiFormer 0.5695 0.5752 -0.0057\n", "6 71 0.00001 16 0.2 BiFormer 0.5704 0.5745 -0.0041\n", "7 69 0.00001 16 0.1 BiFormer 0.5713 0.5741 -0.0029\n", "8 51 0.00001 8 0.3 BiFormer 0.5751 0.5735 0.0016\n", "9 77 0.00001 16 0.5 BiFormer 0.5646 0.5722 -0.0076\n", "10 55 0.00001 8 0.5 BiFormer 0.5708 0.5719 -0.0011\n", "11 29 0.00010 16 0.3 BiFormer 0.5696 0.5710 -0.0014\n", "12 73 0.00001 16 0.3 BiFormer 0.5708 0.5708 0.0000\n", "13 11 0.00010 8 0.5 BiFormer 0.5675 0.5706 -0.0031\n", "14 31 0.00010 16 0.4 BiFormer 0.5720 0.5705 0.0016\n", "15 75 0.00001 16 0.4 BiFormer 0.5674 0.5702 -0.0028\n", "16 3 0.00010 8 0.1 BiFormer 0.5741 0.5698 0.0042\n", "17 1 0.00010 8 0.0 BiFormer 0.5743 0.5698 0.0045\n", "18 23 0.00010 16 0.0 BiFormer 0.5779 0.5694 0.0085\n", "19 53 0.00001 8 0.4 BiFormer 0.5743 0.5691 0.0052\n", "20 50 0.00001 8 0.2 BiFormerWithProb 0.5722 0.5683 0.0039\n", "21 72 0.00001 16 0.2 BiFormerWithProb 0.5733 0.5682 0.0051\n", "22 52 0.00001 8 0.3 BiFormerWithProb 0.5673 0.5677 -0.0004\n", "23 57 0.00001 8 0.6 BiFormer 0.5674 0.5677 -0.0003\n", "24 26 0.00010 16 0.1 BiFormerWithProb 0.5729 0.5673 0.0056\n", "25 74 0.00001 16 0.3 BiFormerWithProb 0.5696 0.5671 0.0025\n", "26 24 0.00010 16 0.0 BiFormerWithProb 0.5681 0.5671 0.0010\n", "27 79 0.00001 16 0.6 BiFormer 0.5623 0.5663 -0.0040\n", "28 5 0.00010 8 0.2 BiFormer 0.5687 0.5659 0.0028\n", "29 81 0.00001 16 0.7 BiFormer 0.5596 0.5655 -0.0059\n", "30 28 0.00010 16 0.2 BiFormerWithProb 0.5649 0.5655 -0.0006\n", "31 70 0.00001 16 0.1 BiFormerWithProb 0.5684 0.5638 0.0046\n", "32 59 0.00001 8 0.7 BiFormer 0.5682 0.5636 0.0046\n", "33 13 0.00010 8 0.6 BiFormer 0.5593 0.5634 -0.0042\n", "34 4 0.00010 8 0.1 BiFormerWithProb 0.5645 0.5630 0.0014\n", "35 9 0.00010 8 0.4 BiFormer 0.5684 0.5629 0.0055\n", "36 17 0.00010 8 0.8 BiFormer 0.5623 0.5622 0.0001\n", "37 68 0.00001 16 0.0 BiFormerWithProb 0.5586 0.5622 -0.0036\n", "38 15 0.00010 8 0.7 BiFormer 0.5625 0.5616 0.0009\n", "39 7 0.00010 8 0.3 BiFormer 0.5689 0.5608 0.0081\n", "40 37 0.00010 16 0.7 BiFormer 0.5590 0.5607 -0.0017\n", "41 10 0.00010 8 0.4 BiFormerWithProb 0.5636 0.5607 0.0029\n", "42 33 0.00010 16 0.5 BiFormer 0.5627 0.5605 0.0022\n", "43 61 0.00001 8 0.8 BiFormer 0.5610 0.5600 0.0010\n", "44 76 0.00001 16 0.4 BiFormerWithProb 0.5652 0.5599 0.0053\n", "45 19 0.00010 8 0.9 BiFormer 0.5615 0.5592 0.0024\n", "46 63 0.00001 8 0.9 BiFormer 0.5573 0.5590 -0.0017\n", "47 83 0.00001 16 0.8 BiFormer 0.5566 0.5585 -0.0019\n", "48 8 0.00010 8 0.3 BiFormerWithProb 0.5626 0.5585 0.0041\n", "49 6 0.00010 8 0.2 BiFormerWithProb 0.5612 0.5575 0.0037" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/smoothing.txt\",50)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(50))" ] }, { "cell_type": "code", "execution_count": 2, "id": "7b4e6f8c-4428-4c5c-a8ac-b1780372ee98", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrhidden_dimout_featuresmamba_d_statemamba_ker_sizemamba_layer_numberscheduler_typewarmup_ratiomodel_namedevtestgap
0760.00010256512844huggingface_cosine_with_restarts0.10BiMambaWithProb0.57420.5765-0.0024
13860.00001512256843huggingface_cosine_with_restarts0.05BiMambaWithProb0.57740.57090.0065
22600.00001256256843huggingface_cosine_with_restarts0.10BiMambaWithProb0.56530.5666-0.0012
32370.000105125121644plateau0.05BiMamba0.56120.5660-0.0048
42390.000105125121644plateau0.10BiMamba0.56120.5660-0.0048
51310.00010512256843huggingface_cosine_with_restarts0.10BiMamba0.56220.5649-0.0026
61200.000102565121653plateau0.10BiMambaWithProb0.57030.56430.0060
71180.000102565121653plateau0.05BiMambaWithProb0.57030.56430.0060
840.00010256256843huggingface_cosine_with_restarts0.10BiMambaWithProb0.56760.56430.0033
91100.000102565121644plateau0.05BiMambaWithProb0.56090.5636-0.0027
101120.000102565121644plateau0.10BiMambaWithProb0.56090.5636-0.0027
111940.00010512512843huggingface_cosine_with_restarts0.05BiMambaWithProb0.56410.56320.0008
123880.00001512256843huggingface_cosine_with_restarts0.10BiMambaWithProb0.57010.56300.0071
131500.00010512256853plateau0.05BiMambaWithProb0.56240.5629-0.0006
141520.00010512256853plateau0.10BiMambaWithProb0.56240.5629-0.0006
153380.00001256512853huggingface_cosine_with_restarts0.05BiMambaWithProb0.56200.5626-0.0006
163400.00001256512853huggingface_cosine_with_restarts0.10BiMambaWithProb0.56450.56260.0019
174020.00001512256853huggingface_cosine_with_restarts0.05BiMambaWithProb0.56100.5622-0.0011
181460.00010512256853huggingface_cosine_with_restarts0.05BiMambaWithProb0.56470.56130.0034
192580.00001256256843huggingface_cosine_with_restarts0.05BiMambaWithProb0.56340.56120.0022
202160.00010512512853plateau0.10BiMambaWithProb0.56430.56080.0035
212140.00010512512853plateau0.05BiMambaWithProb0.56430.56080.0035
224520.00001512512843huggingface_cosine_with_restarts0.10BiMambaWithProb0.56300.56080.0022
234040.00001512256853huggingface_cosine_with_restarts0.10BiMambaWithProb0.56140.56070.0007
241060.000102565121644huggingface_cosine_with_restarts0.05BiMambaWithProb0.56460.56030.0043
254500.00001512512843huggingface_cosine_with_restarts0.05BiMambaWithProb0.56680.56020.0066
261990.00010512512843plateau0.10BiMamba0.55910.5596-0.0004
272260.000105125121643huggingface_cosine_with_restarts0.05BiMambaWithProb0.56540.55960.0058
281970.00010512512843plateau0.05BiMamba0.55910.5596-0.0004
29250.00010256256854huggingface_cosine_with_restarts0.05BiMamba0.55690.5591-0.0022
301960.00010512512843huggingface_cosine_with_restarts0.10BiMambaWithProb0.56110.55900.0022
312120.00010512512853huggingface_cosine_with_restarts0.10BiMambaWithProb0.57000.55900.0109
323850.00001512256843huggingface_cosine_with_restarts0.05BiMamba0.55960.55860.0010
33460.000102562561644plateau0.05BiMambaWithProb0.56380.55810.0057
34480.000102562561644plateau0.10BiMambaWithProb0.56380.55810.0057
352420.000105125121653huggingface_cosine_with_restarts0.05BiMambaWithProb0.56110.55770.0034
361660.000105122561643plateau0.05BiMambaWithProb0.55960.55770.0020
371680.000105122561643plateau0.10BiMambaWithProb0.55960.55770.0020
381620.000105122561643huggingface_cosine_with_restarts0.05BiMambaWithProb0.56210.55740.0047
392660.00001256256844huggingface_cosine_with_restarts0.05BiMambaWithProb0.55430.5573-0.0030
401240.000102565121654huggingface_cosine_with_restarts0.10BiMambaWithProb0.55510.5573-0.0022
41530.000102562561653plateau0.05BiMamba0.55770.55720.0005
42550.000102562561653plateau0.10BiMamba0.55770.55720.0005
431160.000102565121653huggingface_cosine_with_restarts0.10BiMambaWithProb0.55820.55670.0015
443320.00001256512844huggingface_cosine_with_restarts0.10BiMambaWithProb0.55540.5567-0.0013
4520.00010256256843huggingface_cosine_with_restarts0.05BiMambaWithProb0.55280.5566-0.0037
461300.00010512256843huggingface_cosine_with_restarts0.05BiMambaWithProb0.56360.55650.0071
473930.00001512256844huggingface_cosine_with_restarts0.05BiMamba0.55920.55610.0032
481320.00010512256843huggingface_cosine_with_restarts0.10BiMambaWithProb0.56300.55590.0071
493150.000012562561654huggingface_cosine_with_restarts0.10BiMamba0.55630.55580.0005
\n", "
" ], "text/plain": [ " step lr hidden_dim out_features mamba_d_state mamba_ker_size mamba_layer_number scheduler_type warmup_ratio \\\n", "0 76 0.00010 256 512 8 4 4 huggingface_cosine_with_restarts 0.10 \n", "1 386 0.00001 512 256 8 4 3 huggingface_cosine_with_restarts 0.05 \n", "2 260 0.00001 256 256 8 4 3 huggingface_cosine_with_restarts 0.10 \n", "3 237 0.00010 512 512 16 4 4 plateau 0.05 \n", "4 239 0.00010 512 512 16 4 4 plateau 0.10 \n", "5 131 0.00010 512 256 8 4 3 huggingface_cosine_with_restarts 0.10 \n", "6 120 0.00010 256 512 16 5 3 plateau 0.10 \n", "7 118 0.00010 256 512 16 5 3 plateau 0.05 \n", "8 4 0.00010 256 256 8 4 3 huggingface_cosine_with_restarts 0.10 \n", "9 110 0.00010 256 512 16 4 4 plateau 0.05 \n", "10 112 0.00010 256 512 16 4 4 plateau 0.10 \n", "11 194 0.00010 512 512 8 4 3 huggingface_cosine_with_restarts 0.05 \n", "12 388 0.00001 512 256 8 4 3 huggingface_cosine_with_restarts 0.10 \n", "13 150 0.00010 512 256 8 5 3 plateau 0.05 \n", "14 152 0.00010 512 256 8 5 3 plateau 0.10 \n", "15 338 0.00001 256 512 8 5 3 huggingface_cosine_with_restarts 0.05 \n", "16 340 0.00001 256 512 8 5 3 huggingface_cosine_with_restarts 0.10 \n", "17 402 0.00001 512 256 8 5 3 huggingface_cosine_with_restarts 0.05 \n", "18 146 0.00010 512 256 8 5 3 huggingface_cosine_with_restarts 0.05 \n", "19 258 0.00001 256 256 8 4 3 huggingface_cosine_with_restarts 0.05 \n", "20 216 0.00010 512 512 8 5 3 plateau 0.10 \n", "21 214 0.00010 512 512 8 5 3 plateau 0.05 \n", "22 452 0.00001 512 512 8 4 3 huggingface_cosine_with_restarts 0.10 \n", "23 404 0.00001 512 256 8 5 3 huggingface_cosine_with_restarts 0.10 \n", "24 106 0.00010 256 512 16 4 4 huggingface_cosine_with_restarts 0.05 \n", "25 450 0.00001 512 512 8 4 3 huggingface_cosine_with_restarts 0.05 \n", "26 199 0.00010 512 512 8 4 3 plateau 0.10 \n", "27 226 0.00010 512 512 16 4 3 huggingface_cosine_with_restarts 0.05 \n", "28 197 0.00010 512 512 8 4 3 plateau 0.05 \n", "29 25 0.00010 256 256 8 5 4 huggingface_cosine_with_restarts 0.05 \n", "30 196 0.00010 512 512 8 4 3 huggingface_cosine_with_restarts 0.10 \n", "31 212 0.00010 512 512 8 5 3 huggingface_cosine_with_restarts 0.10 \n", "32 385 0.00001 512 256 8 4 3 huggingface_cosine_with_restarts 0.05 \n", "33 46 0.00010 256 256 16 4 4 plateau 0.05 \n", "34 48 0.00010 256 256 16 4 4 plateau 0.10 \n", "35 242 0.00010 512 512 16 5 3 huggingface_cosine_with_restarts 0.05 \n", "36 166 0.00010 512 256 16 4 3 plateau 0.05 \n", "37 168 0.00010 512 256 16 4 3 plateau 0.10 \n", "38 162 0.00010 512 256 16 4 3 huggingface_cosine_with_restarts 0.05 \n", "39 266 0.00001 256 256 8 4 4 huggingface_cosine_with_restarts 0.05 \n", "40 124 0.00010 256 512 16 5 4 huggingface_cosine_with_restarts 0.10 \n", "41 53 0.00010 256 256 16 5 3 plateau 0.05 \n", "42 55 0.00010 256 256 16 5 3 plateau 0.10 \n", "43 116 0.00010 256 512 16 5 3 huggingface_cosine_with_restarts 0.10 \n", "44 332 0.00001 256 512 8 4 4 huggingface_cosine_with_restarts 0.10 \n", "45 2 0.00010 256 256 8 4 3 huggingface_cosine_with_restarts 0.05 \n", "46 130 0.00010 512 256 8 4 3 huggingface_cosine_with_restarts 0.05 \n", "47 393 0.00001 512 256 8 4 4 huggingface_cosine_with_restarts 0.05 \n", "48 132 0.00010 512 256 8 4 3 huggingface_cosine_with_restarts 0.10 \n", "49 315 0.00001 256 256 16 5 4 huggingface_cosine_with_restarts 0.10 \n", "\n", " model_name dev test gap \n", "0 BiMambaWithProb 0.5742 0.5765 -0.0024 \n", "1 BiMambaWithProb 0.5774 0.5709 0.0065 \n", "2 BiMambaWithProb 0.5653 0.5666 -0.0012 \n", "3 BiMamba 0.5612 0.5660 -0.0048 \n", "4 BiMamba 0.5612 0.5660 -0.0048 \n", "5 BiMamba 0.5622 0.5649 -0.0026 \n", "6 BiMambaWithProb 0.5703 0.5643 0.0060 \n", "7 BiMambaWithProb 0.5703 0.5643 0.0060 \n", "8 BiMambaWithProb 0.5676 0.5643 0.0033 \n", "9 BiMambaWithProb 0.5609 0.5636 -0.0027 \n", "10 BiMambaWithProb 0.5609 0.5636 -0.0027 \n", "11 BiMambaWithProb 0.5641 0.5632 0.0008 \n", "12 BiMambaWithProb 0.5701 0.5630 0.0071 \n", "13 BiMambaWithProb 0.5624 0.5629 -0.0006 \n", "14 BiMambaWithProb 0.5624 0.5629 -0.0006 \n", "15 BiMambaWithProb 0.5620 0.5626 -0.0006 \n", "16 BiMambaWithProb 0.5645 0.5626 0.0019 \n", "17 BiMambaWithProb 0.5610 0.5622 -0.0011 \n", "18 BiMambaWithProb 0.5647 0.5613 0.0034 \n", "19 BiMambaWithProb 0.5634 0.5612 0.0022 \n", "20 BiMambaWithProb 0.5643 0.5608 0.0035 \n", "21 BiMambaWithProb 0.5643 0.5608 0.0035 \n", "22 BiMambaWithProb 0.5630 0.5608 0.0022 \n", "23 BiMambaWithProb 0.5614 0.5607 0.0007 \n", "24 BiMambaWithProb 0.5646 0.5603 0.0043 \n", "25 BiMambaWithProb 0.5668 0.5602 0.0066 \n", "26 BiMamba 0.5591 0.5596 -0.0004 \n", "27 BiMambaWithProb 0.5654 0.5596 0.0058 \n", "28 BiMamba 0.5591 0.5596 -0.0004 \n", "29 BiMamba 0.5569 0.5591 -0.0022 \n", "30 BiMambaWithProb 0.5611 0.5590 0.0022 \n", "31 BiMambaWithProb 0.5700 0.5590 0.0109 \n", "32 BiMamba 0.5596 0.5586 0.0010 \n", "33 BiMambaWithProb 0.5638 0.5581 0.0057 \n", "34 BiMambaWithProb 0.5638 0.5581 0.0057 \n", "35 BiMambaWithProb 0.5611 0.5577 0.0034 \n", "36 BiMambaWithProb 0.5596 0.5577 0.0020 \n", "37 BiMambaWithProb 0.5596 0.5577 0.0020 \n", "38 BiMambaWithProb 0.5621 0.5574 0.0047 \n", "39 BiMambaWithProb 0.5543 0.5573 -0.0030 \n", "40 BiMambaWithProb 0.5551 0.5573 -0.0022 \n", "41 BiMamba 0.5577 0.5572 0.0005 \n", "42 BiMamba 0.5577 0.5572 0.0005 \n", "43 BiMambaWithProb 0.5582 0.5567 0.0015 \n", "44 BiMambaWithProb 0.5554 0.5567 -0.0013 \n", "45 BiMambaWithProb 0.5528 0.5566 -0.0037 \n", "46 BiMambaWithProb 0.5636 0.5565 0.0071 \n", "47 BiMamba 0.5592 0.5561 0.0032 \n", "48 BiMambaWithProb 0.5630 0.5559 0.0071 \n", "49 BiMamba 0.5563 0.5558 0.0005 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/mambas.txt\",50)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(50))" ] }, { "cell_type": "code", "execution_count": 8, "id": "45434e0f-2af9-491a-bfc9-54cc3a91fca9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numberhidden_dim_gateddevtestgap
0130.000101641280.54180.5439-0.0022
1110.000101632560.54190.5430-0.0010
2160.000101651280.54320.54260.0005
3120.000101635120.55210.54070.0114
470.00010851280.54450.53950.0050
510.00010831280.53700.5394-0.0025
640.00010841280.54260.53680.0058
750.00010842560.54370.53680.0070
820.00010832560.53460.5366-0.0020
9100.000101631280.53220.5347-0.0025
1090.00010855120.54380.53430.0094
1180.00010852560.54190.53430.0076
12150.000101645120.53460.53190.0027
1360.00010845120.53380.53010.0037
14180.000101655120.52870.52850.0002
1530.00010835120.53540.52780.0076
16140.000101642560.53350.52720.0063
17170.000101652560.52160.51370.0080
18250.00001851280.51410.50380.0104
19290.000011632560.49450.4995-0.0049
20280.000011631280.50150.49640.0051
21220.00001841280.49930.49520.0042
22210.00001835120.49020.48310.0071
23340.000011651280.48210.47950.0026
24190.00001831280.48940.47930.0101
25200.00001832560.47560.4779-0.0023
26240.00001845120.47670.4777-0.0010
27300.000011635120.48620.47680.0094
28260.00001852560.48150.47450.0069
29230.00001842560.47410.47390.0003
30310.000011641280.47150.47050.0011
31360.000011655120.46690.46270.0042
32320.000011642560.46100.46060.0003
33330.000011645120.45300.45290.0001
34270.00001855120.45690.44770.0092
35350.000011652560.44480.44220.0025
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number hidden_dim_gated dev test gap\n", "0 13 0.00010 16 4 128 0.5418 0.5439 -0.0022\n", "1 11 0.00010 16 3 256 0.5419 0.5430 -0.0010\n", "2 16 0.00010 16 5 128 0.5432 0.5426 0.0005\n", "3 12 0.00010 16 3 512 0.5521 0.5407 0.0114\n", "4 7 0.00010 8 5 128 0.5445 0.5395 0.0050\n", "5 1 0.00010 8 3 128 0.5370 0.5394 -0.0025\n", "6 4 0.00010 8 4 128 0.5426 0.5368 0.0058\n", "7 5 0.00010 8 4 256 0.5437 0.5368 0.0070\n", "8 2 0.00010 8 3 256 0.5346 0.5366 -0.0020\n", "9 10 0.00010 16 3 128 0.5322 0.5347 -0.0025\n", "10 9 0.00010 8 5 512 0.5438 0.5343 0.0094\n", "11 8 0.00010 8 5 256 0.5419 0.5343 0.0076\n", "12 15 0.00010 16 4 512 0.5346 0.5319 0.0027\n", "13 6 0.00010 8 4 512 0.5338 0.5301 0.0037\n", "14 18 0.00010 16 5 512 0.5287 0.5285 0.0002\n", "15 3 0.00010 8 3 512 0.5354 0.5278 0.0076\n", "16 14 0.00010 16 4 256 0.5335 0.5272 0.0063\n", "17 17 0.00010 16 5 256 0.5216 0.5137 0.0080\n", "18 25 0.00001 8 5 128 0.5141 0.5038 0.0104\n", "19 29 0.00001 16 3 256 0.4945 0.4995 -0.0049\n", "20 28 0.00001 16 3 128 0.5015 0.4964 0.0051\n", "21 22 0.00001 8 4 128 0.4993 0.4952 0.0042\n", "22 21 0.00001 8 3 512 0.4902 0.4831 0.0071\n", "23 34 0.00001 16 5 128 0.4821 0.4795 0.0026\n", "24 19 0.00001 8 3 128 0.4894 0.4793 0.0101\n", "25 20 0.00001 8 3 256 0.4756 0.4779 -0.0023\n", "26 24 0.00001 8 4 512 0.4767 0.4777 -0.0010\n", "27 30 0.00001 16 3 512 0.4862 0.4768 0.0094\n", "28 26 0.00001 8 5 256 0.4815 0.4745 0.0069\n", "29 23 0.00001 8 4 256 0.4741 0.4739 0.0003\n", "30 31 0.00001 16 4 128 0.4715 0.4705 0.0011\n", "31 36 0.00001 16 5 512 0.4669 0.4627 0.0042\n", "32 32 0.00001 16 4 256 0.4610 0.4606 0.0003\n", "33 33 0.00001 16 4 512 0.4530 0.4529 0.0001\n", "34 27 0.00001 8 5 512 0.4569 0.4477 0.0092\n", "35 35 0.00001 16 5 256 0.4448 0.4422 0.0025" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/bigated.txt\",50)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(50))" ] }, { "cell_type": "code", "execution_count": 9, "id": "041a82f6-a5db-4033-8e89-f4ecc24479f1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numbernum_graph_headsdevtestgap
0350.000018580.57980.57740.0023
1110.000108580.57440.57010.0042
2450.0000116520.56930.5699-0.0005
3250.000018320.57260.56950.0031
4260.000018340.57370.56880.0049
5470.0000116580.57720.56880.0084
6230.0001016580.56870.56750.0012
790.000108520.56880.56690.0020
8170.0001016420.56850.56650.0020
9310.000018480.56770.56530.0024
10430.0000116480.56540.56460.0008
11140.0001016340.56210.5644-0.0022
12420.0000116440.57300.56400.0090
13280.0000183160.56570.56380.0019
1450.000108420.57220.56350.0086
15300.000018440.57290.56340.0095
16330.000018520.56250.56220.0004
17370.0000116320.56920.56220.0070
18460.0000116540.57300.56150.0114
19380.0000116340.56600.56110.0049
2080.0001084160.56470.56090.0038
2130.000108380.56850.56060.0079
22400.00001163160.56640.56050.0059
23440.00001164160.56690.56030.0066
24150.0001016380.56350.55960.0040
25100.000108540.57060.55960.0110
26290.000018420.56610.55910.0069
2760.000108440.56250.55890.0036
28200.00010164160.56420.55860.0056
29130.0001016320.56310.55840.0047
30210.0001016520.56100.55810.0028
3110.000108320.55850.55800.0005
32320.0000184160.56240.55790.0045
3370.000108480.56310.55770.0054
34410.0000116420.56610.55750.0086
35160.00010163160.56810.55750.0106
36270.000018380.55530.5563-0.0010
37220.0001016540.56290.55470.0082
38240.00010165160.55710.55450.0027
39190.0001016480.55900.55370.0053
40180.0001016440.56450.55370.0108
41390.0000116380.55420.55350.0006
42120.0001085160.55970.55340.0063
43480.00001165160.55420.55330.0009
44360.0000185160.55930.55300.0063
4520.000108340.56400.55260.0114
4640.0001083160.55560.55080.0048
47340.000018540.56530.55080.0145
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number num_graph_heads dev test gap\n", "0 35 0.00001 8 5 8 0.5798 0.5774 0.0023\n", "1 11 0.00010 8 5 8 0.5744 0.5701 0.0042\n", "2 45 0.00001 16 5 2 0.5693 0.5699 -0.0005\n", "3 25 0.00001 8 3 2 0.5726 0.5695 0.0031\n", "4 26 0.00001 8 3 4 0.5737 0.5688 0.0049\n", "5 47 0.00001 16 5 8 0.5772 0.5688 0.0084\n", "6 23 0.00010 16 5 8 0.5687 0.5675 0.0012\n", "7 9 0.00010 8 5 2 0.5688 0.5669 0.0020\n", "8 17 0.00010 16 4 2 0.5685 0.5665 0.0020\n", "9 31 0.00001 8 4 8 0.5677 0.5653 0.0024\n", "10 43 0.00001 16 4 8 0.5654 0.5646 0.0008\n", "11 14 0.00010 16 3 4 0.5621 0.5644 -0.0022\n", "12 42 0.00001 16 4 4 0.5730 0.5640 0.0090\n", "13 28 0.00001 8 3 16 0.5657 0.5638 0.0019\n", "14 5 0.00010 8 4 2 0.5722 0.5635 0.0086\n", "15 30 0.00001 8 4 4 0.5729 0.5634 0.0095\n", "16 33 0.00001 8 5 2 0.5625 0.5622 0.0004\n", "17 37 0.00001 16 3 2 0.5692 0.5622 0.0070\n", "18 46 0.00001 16 5 4 0.5730 0.5615 0.0114\n", "19 38 0.00001 16 3 4 0.5660 0.5611 0.0049\n", "20 8 0.00010 8 4 16 0.5647 0.5609 0.0038\n", "21 3 0.00010 8 3 8 0.5685 0.5606 0.0079\n", "22 40 0.00001 16 3 16 0.5664 0.5605 0.0059\n", "23 44 0.00001 16 4 16 0.5669 0.5603 0.0066\n", "24 15 0.00010 16 3 8 0.5635 0.5596 0.0040\n", "25 10 0.00010 8 5 4 0.5706 0.5596 0.0110\n", "26 29 0.00001 8 4 2 0.5661 0.5591 0.0069\n", "27 6 0.00010 8 4 4 0.5625 0.5589 0.0036\n", "28 20 0.00010 16 4 16 0.5642 0.5586 0.0056\n", "29 13 0.00010 16 3 2 0.5631 0.5584 0.0047\n", "30 21 0.00010 16 5 2 0.5610 0.5581 0.0028\n", "31 1 0.00010 8 3 2 0.5585 0.5580 0.0005\n", "32 32 0.00001 8 4 16 0.5624 0.5579 0.0045\n", "33 7 0.00010 8 4 8 0.5631 0.5577 0.0054\n", "34 41 0.00001 16 4 2 0.5661 0.5575 0.0086\n", "35 16 0.00010 16 3 16 0.5681 0.5575 0.0106\n", "36 27 0.00001 8 3 8 0.5553 0.5563 -0.0010\n", "37 22 0.00010 16 5 4 0.5629 0.5547 0.0082\n", "38 24 0.00010 16 5 16 0.5571 0.5545 0.0027\n", "39 19 0.00010 16 4 8 0.5590 0.5537 0.0053\n", "40 18 0.00010 16 4 4 0.5645 0.5537 0.0108\n", "41 39 0.00001 16 3 8 0.5542 0.5535 0.0006\n", "42 12 0.00010 8 5 16 0.5597 0.5534 0.0063\n", "43 48 0.00001 16 5 16 0.5542 0.5533 0.0009\n", "44 36 0.00001 8 5 16 0.5593 0.5530 0.0063\n", "45 2 0.00010 8 3 4 0.5640 0.5526 0.0114\n", "46 4 0.00010 8 3 16 0.5556 0.5508 0.0048\n", "47 34 0.00001 8 5 4 0.5653 0.5508 0.0145" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/bigraph.txt\",50)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(50))" ] }, { "cell_type": "code", "execution_count": 11, "id": "42ebb696-9e6d-4ba1-acd4-b8802ecf0fac", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numbernum_graph_headshidden_dim_gateddevtestgap
0500.000018582560.58050.57560.0048
1240.0001016485120.57230.57000.0024
2670.0000116581280.56670.5687-0.0019
3120.000108545120.56450.5685-0.0040
4320.0001016582560.56580.5678-0.0020
5640.0000116541280.56620.5676-0.0014
6680.0000116582560.57790.56690.0110
7550.0000116441280.57060.56690.0037
8490.000018581280.56420.5659-0.0018
9310.0001016581280.57500.56540.0095
10230.0001016482560.57160.56510.0065
11220.0001016481280.57430.56500.0093
12530.0000185162560.57320.56330.0099
13260.00010164162560.56360.56300.0006
1460.000108485120.56240.5626-0.0002
15100.000108541280.56550.56250.0029
16700.00001165161280.56180.56160.0002
17360.00010165165120.57530.56100.0143
18270.00010164165120.55870.5609-0.0022
19460.000018541280.56600.56080.0052
20190.0001016441280.56160.56040.0012
21440.0000184162560.56860.56000.0086
22350.00010165162560.56430.55980.0045
23130.000108581280.56210.55960.0025
24710.00001165162560.56670.55890.0078
25690.0000116585120.56380.55860.0052
2670.0001084161280.56260.55790.0047
27300.0001016545120.56190.55780.0041
28510.000018585120.56270.55770.0051
29520.0000185161280.55810.55740.0007
30370.000018441280.56080.55720.0036
3140.000108481280.56590.55710.0088
32140.000108582560.56340.55710.0064
33380.000018442560.56110.55690.0043
34570.0000116445120.56260.55690.0058
35180.0001085165120.55550.5568-0.0013
36620.00001164162560.56020.55650.0038
37160.0001085161280.56320.55640.0069
38590.0000116482560.56950.55630.0133
39470.000018542560.56430.55570.0086
4020.000108442560.55890.55550.0034
41280.0001016541280.56390.55550.0085
42430.0000184161280.56590.55540.0105
43580.0000116481280.56300.55540.0077
4410.000108441280.56330.55530.0080
45650.0000116542560.55840.55520.0032
46720.00001165165120.55920.55510.0041
47330.0001016585120.56170.55490.0067
48420.000018485120.56150.55460.0069
49450.0000184165120.56380.55460.0092
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number num_graph_heads hidden_dim_gated dev test gap\n", "0 50 0.00001 8 5 8 256 0.5805 0.5756 0.0048\n", "1 24 0.00010 16 4 8 512 0.5723 0.5700 0.0024\n", "2 67 0.00001 16 5 8 128 0.5667 0.5687 -0.0019\n", "3 12 0.00010 8 5 4 512 0.5645 0.5685 -0.0040\n", "4 32 0.00010 16 5 8 256 0.5658 0.5678 -0.0020\n", "5 64 0.00001 16 5 4 128 0.5662 0.5676 -0.0014\n", "6 68 0.00001 16 5 8 256 0.5779 0.5669 0.0110\n", "7 55 0.00001 16 4 4 128 0.5706 0.5669 0.0037\n", "8 49 0.00001 8 5 8 128 0.5642 0.5659 -0.0018\n", "9 31 0.00010 16 5 8 128 0.5750 0.5654 0.0095\n", "10 23 0.00010 16 4 8 256 0.5716 0.5651 0.0065\n", "11 22 0.00010 16 4 8 128 0.5743 0.5650 0.0093\n", "12 53 0.00001 8 5 16 256 0.5732 0.5633 0.0099\n", "13 26 0.00010 16 4 16 256 0.5636 0.5630 0.0006\n", "14 6 0.00010 8 4 8 512 0.5624 0.5626 -0.0002\n", "15 10 0.00010 8 5 4 128 0.5655 0.5625 0.0029\n", "16 70 0.00001 16 5 16 128 0.5618 0.5616 0.0002\n", "17 36 0.00010 16 5 16 512 0.5753 0.5610 0.0143\n", "18 27 0.00010 16 4 16 512 0.5587 0.5609 -0.0022\n", "19 46 0.00001 8 5 4 128 0.5660 0.5608 0.0052\n", "20 19 0.00010 16 4 4 128 0.5616 0.5604 0.0012\n", "21 44 0.00001 8 4 16 256 0.5686 0.5600 0.0086\n", "22 35 0.00010 16 5 16 256 0.5643 0.5598 0.0045\n", "23 13 0.00010 8 5 8 128 0.5621 0.5596 0.0025\n", "24 71 0.00001 16 5 16 256 0.5667 0.5589 0.0078\n", "25 69 0.00001 16 5 8 512 0.5638 0.5586 0.0052\n", "26 7 0.00010 8 4 16 128 0.5626 0.5579 0.0047\n", "27 30 0.00010 16 5 4 512 0.5619 0.5578 0.0041\n", "28 51 0.00001 8 5 8 512 0.5627 0.5577 0.0051\n", "29 52 0.00001 8 5 16 128 0.5581 0.5574 0.0007\n", "30 37 0.00001 8 4 4 128 0.5608 0.5572 0.0036\n", "31 4 0.00010 8 4 8 128 0.5659 0.5571 0.0088\n", "32 14 0.00010 8 5 8 256 0.5634 0.5571 0.0064\n", "33 38 0.00001 8 4 4 256 0.5611 0.5569 0.0043\n", "34 57 0.00001 16 4 4 512 0.5626 0.5569 0.0058\n", "35 18 0.00010 8 5 16 512 0.5555 0.5568 -0.0013\n", "36 62 0.00001 16 4 16 256 0.5602 0.5565 0.0038\n", "37 16 0.00010 8 5 16 128 0.5632 0.5564 0.0069\n", "38 59 0.00001 16 4 8 256 0.5695 0.5563 0.0133\n", "39 47 0.00001 8 5 4 256 0.5643 0.5557 0.0086\n", "40 2 0.00010 8 4 4 256 0.5589 0.5555 0.0034\n", "41 28 0.00010 16 5 4 128 0.5639 0.5555 0.0085\n", "42 43 0.00001 8 4 16 128 0.5659 0.5554 0.0105\n", "43 58 0.00001 16 4 8 128 0.5630 0.5554 0.0077\n", "44 1 0.00010 8 4 4 128 0.5633 0.5553 0.0080\n", "45 65 0.00001 16 5 4 256 0.5584 0.5552 0.0032\n", "46 72 0.00001 16 5 16 512 0.5592 0.5551 0.0041\n", "47 33 0.00010 16 5 8 512 0.5617 0.5549 0.0067\n", "48 42 0.00001 8 4 8 512 0.5615 0.5546 0.0069\n", "49 45 0.00001 8 4 16 512 0.5638 0.5546 0.0092" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/bigatedgraph.txt\",50)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(50))" ] }, { "cell_type": "code", "execution_count": 15, "id": "dfae755b-6a58-4c26-8c83-4884038b321e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrsmoothing_probabilitydevtestgap
0120.000010.00.57680.5797-0.0029
1140.000010.20.57530.5779-0.0026
2130.000010.10.57450.5773-0.0028
3150.000010.30.57510.57350.0016
4170.000010.50.57080.5719-0.0011
560.000100.50.56750.5706-0.0031
610.000100.00.57430.56980.0045
720.000100.10.57410.56980.0042
8160.000010.40.57430.56910.0052
9180.000010.60.56740.5677-0.0003
1030.000100.20.56870.56590.0028
11190.000010.70.56820.56360.0046
1270.000100.60.55930.5634-0.0042
1350.000100.40.56840.56290.0055
1490.000100.80.56230.56220.0001
1580.000100.70.56250.56160.0009
1640.000100.30.56890.56080.0081
17200.000010.80.56100.56000.0010
18100.000100.90.56150.55920.0024
19210.000010.90.55730.5590-0.0017
20220.000011.00.55580.5574-0.0016
21110.000101.00.55340.5564-0.0029
\n", "
" ], "text/plain": [ " step lr smoothing_probability dev test gap\n", "0 12 0.00001 0.0 0.5768 0.5797 -0.0029\n", "1 14 0.00001 0.2 0.5753 0.5779 -0.0026\n", "2 13 0.00001 0.1 0.5745 0.5773 -0.0028\n", "3 15 0.00001 0.3 0.5751 0.5735 0.0016\n", "4 17 0.00001 0.5 0.5708 0.5719 -0.0011\n", "5 6 0.00010 0.5 0.5675 0.5706 -0.0031\n", "6 1 0.00010 0.0 0.5743 0.5698 0.0045\n", "7 2 0.00010 0.1 0.5741 0.5698 0.0042\n", "8 16 0.00001 0.4 0.5743 0.5691 0.0052\n", "9 18 0.00001 0.6 0.5674 0.5677 -0.0003\n", "10 3 0.00010 0.2 0.5687 0.5659 0.0028\n", "11 19 0.00001 0.7 0.5682 0.5636 0.0046\n", "12 7 0.00010 0.6 0.5593 0.5634 -0.0042\n", "13 5 0.00010 0.4 0.5684 0.5629 0.0055\n", "14 9 0.00010 0.8 0.5623 0.5622 0.0001\n", "15 8 0.00010 0.7 0.5625 0.5616 0.0009\n", "16 4 0.00010 0.3 0.5689 0.5608 0.0081\n", "17 20 0.00001 0.8 0.5610 0.5600 0.0010\n", "18 10 0.00010 0.9 0.5615 0.5592 0.0024\n", "19 21 0.00001 0.9 0.5573 0.5590 -0.0017\n", "20 22 0.00001 1.0 0.5558 0.5574 -0.0016\n", "21 11 0.00010 1.0 0.5534 0.5564 -0.0029" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/smothing/phi.txt\",50)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(50))" ] }, { "cell_type": "code", "execution_count": 17, "id": "f4db3748-6be0-4ee6-abd1-c0e734efc8c8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrsmoothing_probabilitydevtestgap
0140.000010.20.58020.5808-0.0007
1120.000010.00.57680.5797-0.0029
2150.000010.30.57890.57820.0007
3160.000010.40.57590.5768-0.0008
4130.000010.10.57970.57620.0035
5170.000010.50.57450.5760-0.0015
6180.000010.60.56770.5710-0.0033
710.000100.00.57430.56980.0045
820.000100.10.56900.5696-0.0006
9190.000010.70.56440.5694-0.0050
10200.000010.80.56460.5690-0.0044
11210.000010.90.56510.5683-0.0032
1240.000100.30.57180.56620.0056
1330.000100.20.56560.56470.0009
14220.000011.00.55590.5610-0.0051
1550.000100.40.56500.56080.0042
1680.000100.70.56600.55950.0065
1790.000100.80.56360.55890.0047
1870.000100.60.55650.5587-0.0022
1960.000100.50.55960.55850.0011
20100.000100.90.55810.55610.0020
21110.000101.00.54270.5463-0.0036
\n", "
" ], "text/plain": [ " step lr smoothing_probability dev test gap\n", "0 14 0.00001 0.2 0.5802 0.5808 -0.0007\n", "1 12 0.00001 0.0 0.5768 0.5797 -0.0029\n", "2 15 0.00001 0.3 0.5789 0.5782 0.0007\n", "3 16 0.00001 0.4 0.5759 0.5768 -0.0008\n", "4 13 0.00001 0.1 0.5797 0.5762 0.0035\n", "5 17 0.00001 0.5 0.5745 0.5760 -0.0015\n", "6 18 0.00001 0.6 0.5677 0.5710 -0.0033\n", "7 1 0.00010 0.0 0.5743 0.5698 0.0045\n", "8 2 0.00010 0.1 0.5690 0.5696 -0.0006\n", "9 19 0.00001 0.7 0.5644 0.5694 -0.0050\n", "10 20 0.00001 0.8 0.5646 0.5690 -0.0044\n", "11 21 0.00001 0.9 0.5651 0.5683 -0.0032\n", "12 4 0.00010 0.3 0.5718 0.5662 0.0056\n", "13 3 0.00010 0.2 0.5656 0.5647 0.0009\n", "14 22 0.00001 1.0 0.5559 0.5610 -0.0051\n", "15 5 0.00010 0.4 0.5650 0.5608 0.0042\n", "16 8 0.00010 0.7 0.5660 0.5595 0.0065\n", "17 9 0.00010 0.8 0.5636 0.5589 0.0047\n", "18 7 0.00010 0.6 0.5565 0.5587 -0.0022\n", "19 6 0.00010 0.5 0.5596 0.5585 0.0011\n", "20 10 0.00010 0.9 0.5581 0.5561 0.0020\n", "21 11 0.00010 1.0 0.5427 0.5463 -0.0036" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/smothing/qwen.txt\",50)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(50))" ] }, { "cell_type": "code", "execution_count": 18, "id": "952affd8-2fea-481c-b8cc-3f53418b5472", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numbernum_graph_headsdevtestgap
030.0001016540.56320.56300.0002
110.000108540.56540.56160.0039
250.000018540.56560.55880.0068
360.000018580.56880.55800.0108
470.0000116540.56310.55740.0057
520.000108580.56190.55640.0054
680.0000116580.56530.55590.0094
740.0001016580.56310.55390.0092
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number num_graph_heads dev test gap\n", "0 3 0.00010 16 5 4 0.5632 0.5630 0.0002\n", "1 1 0.00010 8 5 4 0.5654 0.5616 0.0039\n", "2 5 0.00001 8 5 4 0.5656 0.5588 0.0068\n", "3 6 0.00001 8 5 8 0.5688 0.5580 0.0108\n", "4 7 0.00001 16 5 4 0.5631 0.5574 0.0057\n", "5 2 0.00010 8 5 8 0.5619 0.5564 0.0054\n", "6 8 0.00001 16 5 8 0.5653 0.5559 0.0094\n", "7 4 0.00010 16 5 8 0.5631 0.5539 0.0092" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/BiGraphFormerWithProb.txt\",50)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(50))" ] }, { "cell_type": "code", "execution_count": 19, "id": "c210111f-e1e1-4276-8e47-249bda79b189", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
steplrnum_transformer_headstr_layer_numbernum_graph_headshidden_dim_gateddevtestgap
030.0001016582560.56700.56560.0015
120.000108585120.57060.56160.0091
240.0001016585120.55600.5573-0.0013
380.0000116585120.55690.55290.0040
410.000108582560.55720.55140.0059
560.000018585120.55200.54740.0046
650.000018582560.55500.54720.0078
770.0000116582560.54880.54180.0071
\n", "
" ], "text/plain": [ " step lr num_transformer_heads tr_layer_number num_graph_heads hidden_dim_gated dev test gap\n", "0 3 0.00010 16 5 8 256 0.5670 0.5656 0.0015\n", "1 2 0.00010 8 5 8 512 0.5706 0.5616 0.0091\n", "2 4 0.00010 16 5 8 512 0.5560 0.5573 -0.0013\n", "3 8 0.00001 16 5 8 512 0.5569 0.5529 0.0040\n", "4 1 0.00010 8 5 8 256 0.5572 0.5514 0.0059\n", "5 6 0.00001 8 5 8 512 0.5520 0.5474 0.0046\n", "6 5 0.00001 8 5 8 256 0.5550 0.5472 0.0078\n", "7 7 0.00001 16 5 8 256 0.5488 0.5418 0.0071" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = parse_smart_log(\"C:/Users/Alexandr/Desktop/sampling/last/BiGatedGraphFormerWithProb.txt\",50)\n", "\n", "from IPython.display import display\n", "pd.set_option(\"display.max_columns\", None)\n", "pd.set_option(\"display.width\", 160)\n", "\n", "display(df.head(50))" ] }, { "cell_type": "code", "execution_count": null, "id": "d2e06ea3-d6ba-48d4-a013-15b1145d9a4a", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "7f9a4eae-c094-40c8-a0ee-ecf7c884d523", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "e8d39259-0192-4df1-8eef-531dee7f45b8", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "40afd60f-78b2-4b26-b5e2-314dddfa6c3f", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "9c90849c-69d8-484e-b6a4-96a8e6447bc7", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "d3325846-ffbc-444f-8549-2fbe2f5d6fde", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "03aaa192-92c8-4938-bc18-8eac086e4648", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 21, "id": "d32b1b7f-bcb6-45bd-930c-f93b4b40b3f8", "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "PretrainedAudioEmbeddingExtractor.__init__() got an unexpected keyword argument 'model_name'", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mTypeError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[21]\u001b[39m\u001b[32m, line 32\u001b[39m\n\u001b[32m 29\u001b[39m DEVICE = \u001b[33m\"\u001b[39m\u001b[33mcuda\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m torch.cuda.is_available() \u001b[38;5;28;01melse\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mcpu\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 30\u001b[39m SAMPLE_RATE = \u001b[32m16000\u001b[39m\n\u001b[32m---> \u001b[39m\u001b[32m32\u001b[39m audio_feat = \u001b[43mPretrainedAudioEmbeddingExtractor\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 33\u001b[39m \u001b[43m \u001b[49m\u001b[43mmodel_name\u001b[49m\u001b[43m=\u001b[49m\u001b[43mAUDIO_MODEL\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 34\u001b[39m \u001b[43m \u001b[49m\u001b[43mcheckpoint\u001b[49m\u001b[43m=\u001b[49m\u001b[43mAUDIO_CKPT\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 35\u001b[39m \u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m=\u001b[49m\u001b[43mDEVICE\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 36\u001b[39m \u001b[43m)\u001b[49m\n\u001b[32m 38\u001b[39m text_feat = PretrainedTextEmbeddingExtractor(\n\u001b[32m 39\u001b[39m model_name=TEXT_MODEL,\n\u001b[32m 40\u001b[39m checkpoint=TEXT_CKPT,\n\u001b[32m 41\u001b[39m device=DEVICE,\n\u001b[32m 42\u001b[39m )\n\u001b[32m 44\u001b[39m \u001b[38;5;66;03m# ---------- 4. Узнаём фактические размеры эмбеддингов ----------\u001b[39;00m\n", "\u001b[31mTypeError\u001b[39m: PretrainedAudioEmbeddingExtractor.__init__() got an unexpected keyword argument 'model_name'" ] } ], "source": [ "# ======================================================================\n", "# Проверка синтетического корпуса MELD-S на «битые» эмбеддинги\n", "# ======================================================================\n", "\n", "# ---------- 1. Импорты и базовые настройки ----------\n", "import os, logging, traceback\n", "import torch, torchaudio\n", "import pandas as pd\n", "from tqdm.auto import tqdm\n", "\n", "# --- если проект находится в другом каталоге, добавьте его в sys.path ---\n", "# import sys; sys.path.append(r\"C:\\Prgrm\\ESWA_2025\")\n", "\n", "from data_loading.feature_extractor import (\n", " PretrainedAudioEmbeddingExtractor,\n", " PretrainedTextEmbeddingExtractor,\n", ")\n", "\n", "# ---------- 2. Пути из вашего config.toml ----------\n", "synthetic_path = r\"E:/MELD_S\"\n", "synth_csv_path = os.path.join(synthetic_path, \"meld_s_train_labels.csv\")\n", "synth_wav_dir = os.path.join(synthetic_path, \"wavs\")\n", "\n", "# ---------- 3. Создаём экстракторы ровно как в основном проекте ----------\n", "AUDIO_MODEL = \"audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim\"\n", "AUDIO_CKPT = \"best_audio_model_2.pt\" # путь относительно запуска\n", "TEXT_MODEL = \"jinaai/jina-embeddings-v3\"\n", "TEXT_CKPT = \"best_text_model.pth\"\n", "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "SAMPLE_RATE = 16000\n", "\n", "audio_feat = PretrainedAudioEmbeddingExtractor(\n", " model_name=AUDIO_MODEL,\n", " checkpoint=AUDIO_CKPT,\n", " device=DEVICE,\n", ")\n", "\n", "text_feat = PretrainedTextEmbeddingExtractor(\n", " model_name=TEXT_MODEL,\n", " checkpoint=TEXT_CKPT,\n", " device=DEVICE,\n", ")\n", "\n", "# ---------- 4. Узнаём фактические размеры эмбеддингов ----------\n", "with torch.no_grad():\n", " dummy_wav = torch.zeros(1, SAMPLE_RATE) # секунда тишины\n", " _, a_emb = audio_feat.extract(dummy_wav[0], SAMPLE_RATE)\n", " AUDIO_DIM = a_emb[0].shape[-1]\n", "\n", " _, t_emb = text_feat.extract(\"hello world\")\n", " TEXT_DIM = t_emb[0].shape[-1]\n", "\n", "# сколько логитов выдаёт каждый классификатор\n", "NUM_EMOTIONS = 7 # [\"anger\", \"disgust\", ...] — как в config\n", "PRED_DIM = NUM_EMOTIONS\n", "\n", "EXPECTED_ALL = AUDIO_DIM + TEXT_DIM + 2 * PRED_DIM\n", "print(f\"AUDIO_DIM = {AUDIO_DIM}, TEXT_DIM = {TEXT_DIM}, \"\n", " f\"TOTAL EXPECTED = {EXPECTED_ALL}\")\n", "\n", "# ---------- 5. Читаем CSV синтетики ----------\n", "df = pd.read_csv(synth_csv_path)\n", "print(f\"Всего строк в CSV: {len(df)}\")\n", "\n", "bad_rows, good_cnt = [], 0\n", "\n", "# ---------- 6. Проходим по записям ----------\n", "for i, row in tqdm(df.iterrows(), total=len(df)):\n", " video_name = row[\"video_name\"]\n", " wav_path = os.path.join(synth_wav_dir, f\"{video_name}.wav\")\n", " txt = row.get(\"text\", \"\")\n", "\n", " reason = None\n", " try:\n", " # 6.1 Проверяем, существует ли wav-файл\n", " if not os.path.exists(wav_path):\n", " reason = \"file_missing\"\n", "\n", " # 6.2 Получаем аудио-эмбеддинг\n", " if reason is None:\n", " wf, sr = torchaudio.load(wav_path)\n", " if sr != SAMPLE_RATE:\n", " wf = torchaudio.transforms.Resample(sr, SAMPLE_RATE)(wf)\n", " a_pred, a_emb = audio_feat.extract(wf[0], SAMPLE_RATE)\n", " a_emb = a_emb[0]\n", " if a_emb.shape[-1] != AUDIO_DIM:\n", " reason = f\"audio_dim_{a_emb.shape[-1]}\"\n", "\n", " # 6.3 Получаем текст-эмбеддинг\n", " if reason is None:\n", " t_pred, t_emb = text_feat.extract(txt)\n", " t_emb = t_emb[0]\n", " if t_emb.shape[-1] != TEXT_DIM:\n", " reason = f\"text_dim_{t_emb.shape[-1]}\"\n", "\n", " # 6.4 Проверяем полную конкатенацию\n", " if reason is None:\n", " full_vec = torch.cat([a_emb, t_emb, a_pred[0], t_pred[0]], dim=-1)\n", " if full_vec.shape[-1] != EXPECTED_ALL:\n", " reason = f\"concat_dim_{full_vec.shape[-1]}\"\n", "\n", " except Exception as e:\n", " reason = \"exception_\" + e.__class__.__name__\n", " logging.error(f\"{video_name}: {traceback.format_exc(limit=2)}\")\n", "\n", " # 6.5 Сохраняем результат\n", " if reason:\n", " bad_rows.append({\n", " \"idx\": i,\n", " \"video_name\": video_name,\n", " \"reason\": reason,\n", " \"wav_path\": wav_path,\n", " \"text_len\": len(txt),\n", " })\n", " else:\n", " good_cnt += 1\n", "\n", "# ---------- 7. Итоги ----------\n", "print(f\"\\n✅ GOOD : {good_cnt}\")\n", "print(f\"❌ BAD : {len(bad_rows)}\")\n", "\n", "bad_df = pd.DataFrame(bad_rows)\n", "display(bad_df)\n", "\n", "# ---------- 8. (Необязательно) сохраняем список плохих файлов ----------\n", "out_csv = os.path.join(synthetic_path, \"bad_synth_meld.csv\")\n", "bad_df.to_csv(out_csv, index=False)\n", "print(f\"\\nСписок «битых» примеров сохранён в: {out_csv}\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "a232f93d-7f7c-41d3-9204-74445e43d071", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }