mlopez6132 commited on
Commit
9d2bb4c
Β·
verified Β·
1 Parent(s): 4f7f312

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +196 -0
app.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hugging Face Space App for Free H200 Training
3
+ This app runs nano-coder training on HF's free H200 GPU (4 minutes daily)
4
+ """
5
+
6
+ import os
7
+ import subprocess
8
+ import time
9
+ import gradio as gr
10
+ from datetime import datetime, timedelta
11
+
12
+ # Configuration
13
+ MAX_TRAINING_TIME = 3.5 * 60 # 3.5 minutes to be safe
14
+ TRAINING_SCRIPT = "hf_free_training.py"
15
+ DATA_PREP_SCRIPT = "prepare_code_dataset.py"
16
+
17
+ def check_daily_limit():
18
+ """Check if we've used today's free H200 time."""
19
+ today = datetime.now().date()
20
+ limit_file = f"daily_limit_{today}.txt"
21
+
22
+ if os.path.exists(limit_file):
23
+ with open(limit_file, 'r') as f:
24
+ last_run = f.read().strip()
25
+ if last_run == str(today):
26
+ return False, "Daily H200 limit reached. Try again tomorrow!"
27
+
28
+ return True, "Ready to train!"
29
+
30
+ def mark_daily_usage():
31
+ """Mark that we've used today's free time."""
32
+ today = datetime.now().date()
33
+ limit_file = f"daily_limit_{today}.txt"
34
+
35
+ with open(limit_file, 'w') as f:
36
+ f.write(str(today))
37
+
38
+ def run_training():
39
+ """Run the free H200 training."""
40
+
41
+ # Check daily limit
42
+ can_run, message = check_daily_limit()
43
+ if not can_run:
44
+ return message
45
+
46
+ try:
47
+ # Mark usage
48
+ mark_daily_usage()
49
+
50
+ # Prepare dataset if not already done
51
+ if not os.path.exists("data/python-codes-25k/train.bin"):
52
+ print("Preparing dataset...")
53
+ subprocess.run(["python", DATA_PREP_SCRIPT], check=True)
54
+
55
+ # Run training
56
+ print("Starting free H200 training...")
57
+ start_time = time.time()
58
+
59
+ # Run training with timeout
60
+ process = subprocess.Popen(
61
+ ["python", TRAINING_SCRIPT],
62
+ stdout=subprocess.PIPE,
63
+ stderr=subprocess.STDOUT,
64
+ universal_newlines=True
65
+ )
66
+
67
+ output_lines = []
68
+ while True:
69
+ elapsed = time.time() - start_time
70
+ if elapsed > MAX_TRAINING_TIME:
71
+ process.terminate()
72
+ output_lines.append(f"\n⏰ Time limit reached ({elapsed/60:.1f} minutes)")
73
+ break
74
+
75
+ line = process.stdout.readline()
76
+ if not line and process.poll() is not None:
77
+ break
78
+
79
+ if line:
80
+ output_lines.append(line.strip())
81
+ print(line.strip())
82
+
83
+ # Wait for process to finish
84
+ process.wait()
85
+
86
+ # Check if training completed successfully
87
+ if process.returncode == 0:
88
+ result = "βœ… Training completed successfully!\n\n" + "\n".join(output_lines[-20:]) # Last 20 lines
89
+ else:
90
+ result = "❌ Training failed or was interrupted.\n\n" + "\n".join(output_lines[-20:])
91
+
92
+ return result
93
+
94
+ except Exception as e:
95
+ return f"❌ Error during training: {str(e)}"
96
+
97
+ def check_model_status():
98
+ """Check if trained model exists."""
99
+ model_path = "out-nano-coder-free/ckpt.pt"
100
+ if os.path.exists(model_path):
101
+ # Get file size
102
+ size = os.path.getsize(model_path) / (1024 * 1024) # MB
103
+ return f"βœ… Model found! Size: {size:.1f} MB"
104
+ else:
105
+ return "❌ No trained model found. Run training first."
106
+
107
+ def generate_sample_code(prompt, max_tokens=100, temperature=0.8):
108
+ """Generate code using the trained model."""
109
+ if not os.path.exists("out-nano-coder-free/ckpt.pt"):
110
+ return "❌ No trained model found. Please run training first."
111
+
112
+ try:
113
+ # Import and run sampling
114
+ from sample_nano_coder import load_model, load_vocab, generate_code
115
+
116
+ model, checkpoint = load_model()
117
+ stoi, itos = load_vocab()
118
+
119
+ # Generate code
120
+ completion = generate_code(model, stoi, itos, prompt, max_tokens, temperature, 200)
121
+
122
+ return f"Generated code:\n\n{completion}"
123
+
124
+ except Exception as e:
125
+ return f"❌ Error generating code: {str(e)}"
126
+
127
+ # Create Gradio interface
128
+ with gr.Blocks(title="Nano-Coder Free H200 Training") as demo:
129
+ gr.Markdown("# πŸš€ Nano-Coder Free H200 Training")
130
+ gr.Markdown("Train a nanoGPT model for Python code generation using Hugging Face's free H200 GPU (4 minutes daily)")
131
+
132
+ with gr.Row():
133
+ with gr.Column():
134
+ gr.Markdown("### 🎯 Training Control")
135
+ train_button = gr.Button("πŸš€ Start Free H200 Training", variant="primary")
136
+ status_text = gr.Textbox(label="Training Status", lines=10, interactive=False)
137
+
138
+ with gr.Column():
139
+ gr.Markdown("### πŸ“Š Model Status")
140
+ model_status_button = gr.Button("πŸ” Check Model Status")
141
+ model_status_text = gr.Textbox(label="Model Status", lines=2, interactive=False)
142
+
143
+ with gr.Row():
144
+ with gr.Column():
145
+ gr.Markdown("### 🎨 Code Generation")
146
+ code_prompt = gr.Textbox(
147
+ label="Code Prompt",
148
+ placeholder="def fibonacci(n):\n ",
149
+ lines=3
150
+ )
151
+ with gr.Row():
152
+ max_tokens = gr.Slider(50, 500, 100, label="Max Tokens")
153
+ temperature = gr.Slider(0.1, 2.0, 0.8, label="Temperature")
154
+ generate_button = gr.Button("✨ Generate Code")
155
+ generated_code = gr.Textbox(label="Generated Code", lines=10, interactive=False)
156
+
157
+ # Event handlers
158
+ train_button.click(
159
+ fn=run_training,
160
+ outputs=status_text
161
+ )
162
+
163
+ model_status_button.click(
164
+ fn=check_model_status,
165
+ outputs=model_status_text
166
+ )
167
+
168
+ generate_button.click(
169
+ fn=generate_sample_code,
170
+ inputs=[code_prompt, max_tokens, temperature],
171
+ outputs=generated_code
172
+ )
173
+
174
+ gr.Markdown("""
175
+ ### πŸ“‹ Instructions
176
+
177
+ 1. **Daily Limit**: You get 4 minutes of free H200 GPU time per day
178
+ 2. **Training**: Click "Start Free H200 Training" to begin
179
+ 3. **Model**: Check model status after training
180
+ 4. **Generation**: Use the trained model to generate Python code
181
+
182
+ ### βš™οΈ Model Configuration (Free Tier)
183
+ - **Layers**: 6 (reduced from 12)
184
+ - **Heads**: 6 (reduced from 12)
185
+ - **Embedding**: 384 (reduced from 768)
186
+ - **Context**: 512 tokens
187
+ - **Parameters**: ~15M (vs 124M full model)
188
+
189
+ ### πŸ’‘ Tips
190
+ - Training automatically stops at 3.5 minutes to be safe
191
+ - Model checkpoints are saved to HF Hub
192
+ - Use shorter prompts for better results
193
+ """)
194
+
195
+ if __name__ == "__main__":
196
+ demo.launch()