Update README.md
Browse files
    	
        README.md
    CHANGED
    
    | 
         @@ -62,14 +62,14 @@ Debugged vibecoder dataset 
     | 
|
| 62 | 
         | 
| 63 | 
         
             
            ### 📊 Model Evaluation Results
         
     | 
| 64 | 
         | 
| 65 | 
         
            -
            | Tasks                    |Version 
     | 
| 66 | 
         
            -
             
     | 
| 67 | 
         
            -
            | gsm8k_cot 
     | 
| 68 | 
         
            -
            | humaneval 
     | 
| 69 | 
         
            -
            | mmlu_college_biology 
     | 
| 70 | 
         
            -
            | mmlu_HS_computer_science 
     | 
| 71 | 
         
            -
            | computer_security 
     | 
| 72 | 
         
            -
            | college_computer_science 
     | 
| 73 | 
         | 
| 74 | 
         
             
            ---
         
     | 
| 75 | 
         | 
| 
         | 
|
| 62 | 
         | 
| 63 | 
         
             
            ### 📊 Model Evaluation Results
         
     | 
| 64 | 
         | 
| 65 | 
         
            +
            | Tasks                    | Version | n-shot | Metric         | VibeCoder-20b-0.02-D*_RL | gpt-oss-20 | Qwen 3 235B |
         
     | 
| 66 | 
         
            +
            |---------------------------|----------|--------|----------------|---------------------------|-------------|--------------|
         
     | 
| 67 | 
         
            +
            | gsm8k_cot                 | 3        | 3      | exact_match ↑  | 0.8452 (+0.7667)          | 0.78        | 0.82         |
         
     | 
| 68 | 
         
            +
            | humaneval                 | 1        | 0      | exact_match ↑  | 0.933 (+0.8)              | 0.73        | 0.92         |
         
     | 
| 69 | 
         
            +
            | mmlu_college_biology      | 1        | 0      | exact_match ↑  | 1.000 (+ – )              | —           | —            |
         
     | 
| 70 | 
         
            +
            | mmlu_HS_computer_science  | 1        | 0      | exact_match ↑  | 1.000 (+0.9)              | —           | —            |
         
     | 
| 71 | 
         
            +
            | computer_security         | 1        | 2      | acc ↑          | 0.8528 (+0.700)           | —           | —            |
         
     | 
| 72 | 
         
            +
            | college_computer_science  | 1        | 2      | acc ↑          | 0.8528 (+0.700)           | —           | —            |
         
     | 
| 73 | 
         | 
| 74 | 
         
             
            ---
         
     | 
| 75 | 
         |