ghostai1 commited on
Commit
06685cd
·
verified ·
1 Parent(s): 1f59f51

Upload 8 files

Browse files
Files changed (2) hide show
  1. public/publicapi.py +683 -751
  2. public/styles.css +54 -61
public/publicapi.py CHANGED
@@ -1,6 +1,11 @@
 
1
  #!/usr/bin/env python3
2
  # -*- coding: utf-8 -*-
3
 
 
 
 
 
4
  import os
5
  import sys
6
  import gc
@@ -9,55 +14,39 @@ import json
9
  import time
10
  import mmap
11
  import math
12
- import tempfile
13
  import random
14
  import logging
15
  import warnings
16
  import traceback
17
  import subprocess
18
- import configparser
19
- from typing import Optional, Tuple, Dict, Any, List
20
-
21
  import numpy as np
22
- import torch
23
  import torchaudio
24
  import gradio as gr
25
  import gradio_client.utils
 
 
26
  from pydub import AudioSegment
27
  from datetime import datetime
28
  from pathlib import Path
 
29
  from torch.cuda.amp import autocast
 
30
 
31
- from fastapi import FastAPI, HTTPException, Query
32
  from fastapi.middleware.cors import CORSMiddleware
33
  from pydantic import BaseModel
34
  import uvicorn
35
- import threading
36
 
37
  from colorama import init as colorama_init, Fore, Style
38
 
39
- # ======================================================================================
40
- # RELEASE / PATHS
41
- # ======================================================================================
42
-
43
- RELEASE = "v1.7.0"
44
- APP_TITLE = f"GhostAI Music Generator • {RELEASE}"
45
-
46
- BASE_DIR = Path(__file__).parent.resolve()
47
- LOG_DIR = BASE_DIR / "logs"
48
- MP3_DIR = BASE_DIR / "mp3"
49
- CSS_FILE = BASE_DIR / "styles.css"
50
- PROMPTS_FILE = BASE_DIR / "prompts.ini"
51
- EXAMPLE_MD = BASE_DIR / "example.md"
52
- SETTINGS_FILE = BASE_DIR / "settings.json"
53
-
54
- LOG_DIR.mkdir(parents=True, exist_ok=True)
55
- MP3_DIR.mkdir(parents=True, exist_ok=True)
56
 
57
  # ======================================================================================
58
- # PATCHES & RUNTIME SETUP
59
  # ======================================================================================
60
 
 
61
  _original_get_type = gradio_client.utils.get_type
62
  def _patched_get_type(schema):
63
  if isinstance(schema, bool):
@@ -65,88 +54,55 @@ def _patched_get_type(schema):
65
  return _original_get_type(schema)
66
  gradio_client.utils.get_type = _patched_get_type
67
 
 
68
  warnings.filterwarnings("ignore")
 
 
69
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
70
  torch.backends.cudnn.benchmark = False
71
  torch.backends.cudnn.deterministic = True
72
 
73
- # ======================================================================================
74
- # LOGGING (SINGLE FILE, MAX 5MB, AUTO-TRIM)
75
- # ======================================================================================
76
-
77
- colorama_init(autoreset=True)
78
-
79
- LOG_FILE = LOG_DIR / "musicgen.log"
80
- MAX_LOG_BYTES = 5 * 1024 * 1024 # 5 MB
81
-
82
- class TrimmingFileHandler(logging.FileHandler):
83
- def emit(self, record):
84
- try:
85
- super().emit(record)
86
- self._trim_if_needed()
87
- except Exception:
88
- pass
89
-
90
- def _trim_if_needed(self):
91
- try:
92
- if self.stream:
93
- self.stream.flush()
94
- size = LOG_FILE.stat().st_size if LOG_FILE.exists() else 0
95
- if size <= MAX_LOG_BYTES:
96
- return
97
- keep = int(1.5 * 1024 * 1024)
98
- with open(LOG_FILE, "rb") as f:
99
- if size > keep:
100
- f.seek(-keep, 2)
101
- tail = f.read()
102
- else:
103
- tail = f.read()
104
- with open(LOG_FILE, "wb") as f:
105
- f.write(b"[log trimmed]\n")
106
- f.write(tail)
107
- except Exception:
108
- pass
109
-
110
- class ColorFormatter(logging.Formatter):
111
- COLORS = {
112
- "DEBUG": Fore.BLUE,
113
- "INFO": Fore.GREEN,
114
- "WARNING": Fore.YELLOW,
115
- "ERROR": Fore.RED,
116
- "CRITICAL": Fore.RED + Style.BRIGHT,
117
- }
118
- def format(self, record):
119
- levelname = record.levelname
120
- color = self.COLORS.get(levelname, "")
121
- reset = Style.RESET_ALL
122
- record.levelname = f"{color}{levelname}{reset}"
123
- return super().format(record)
124
-
125
- console_handler = logging.StreamHandler(sys.stdout)
126
- console_handler.setLevel(logging.DEBUG)
127
- console_handler.setFormatter(ColorFormatter("%(asctime)s [%(levelname)s] %(message)s"))
128
-
129
- file_handler = TrimmingFileHandler(LOG_FILE, mode="a", encoding="utf-8", delay=False)
130
- file_handler.setLevel(logging.DEBUG)
131
- file_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s"))
132
 
133
- logging.basicConfig(level=logging.DEBUG, handlers=[console_handler, file_handler])
 
134
  logger = logging.getLogger("ghostai-musicgen")
135
- logger.info(f"Starting GhostAI Music Generator {RELEASE}")
 
 
 
 
 
 
136
 
137
- # ======================================================================================
138
- # DEVICE
139
- # ======================================================================================
140
 
 
141
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
142
  if DEVICE != "cuda":
 
143
  logger.error("CUDA is required. Exiting.")
144
  sys.exit(1)
145
- logger.info(f"GPU: {torch.cuda.get_device_name(0)}")
146
- logger.info("Precision: fp16 model, fp32 CPU audio ops")
 
 
 
 
 
 
 
 
147
 
148
  # ======================================================================================
149
- # SETTINGS PERSISTENCE
150
  # ======================================================================================
151
 
152
  DEFAULT_SETTINGS: Dict[str, Any] = {
@@ -170,28 +126,26 @@ DEFAULT_SETTINGS: Dict[str, Any] = {
170
  "instrumental_prompt": ""
171
  }
172
 
173
- def load_settings_from_file() -> Dict[str, Any]:
174
- try:
175
- if SETTINGS_FILE.exists():
176
- with open(SETTINGS_FILE, "r", encoding="utf-8") as f:
177
- data = json.load(f)
178
  for k, v in DEFAULT_SETTINGS.items():
179
  data.setdefault(k, v)
180
- logger.info(f"Loaded settings from {SETTINGS_FILE}")
181
  return data
182
- except Exception as e:
183
- logger.error(f"Failed reading {SETTINGS_FILE}: {e}")
184
  return DEFAULT_SETTINGS.copy()
185
 
186
- def save_settings_to_file(settings: Dict[str, Any]) -> None:
187
  try:
188
- with open(SETTINGS_FILE, "w", encoding="utf-8") as f:
189
- json.dump(settings, f, indent=2)
190
- logger.info(f"Saved settings to {SETTINGS_FILE}")
191
  except Exception as e:
192
- logger.error(f"Failed saving {SETTINGS_FILE}: {e}")
193
 
194
- CURRENT_SETTINGS = load_settings_from_file()
195
 
196
  # ======================================================================================
197
  # VRAM / DISK / MEMORY
@@ -214,21 +168,20 @@ def clean_memory() -> Optional[float]:
214
  def check_vram():
215
  try:
216
  r = subprocess.run(
217
- ['nvidia-smi', '--query-gpu=memory.used,memory.total', '--format=csv'],
218
  capture_output=True, text=True
219
  )
220
  lines = r.stdout.splitlines()
221
  if len(lines) > 1:
222
- used_mb, total_mb = map(int, re.findall(r'\d+', lines[1]))
223
  free_mb = total_mb - used_mb
224
  logger.info(f"VRAM: used {used_mb} MiB | free {free_mb} MiB | total {total_mb} MiB")
225
  if free_mb < 5000:
226
- logger.warning(f"Low free VRAM ({free_mb} MiB). Running processes:")
227
  procs = subprocess.run(
228
- ['nvidia-smi', '--query-compute-apps=pid,used_memory', '--format=csv'],
229
  capture_output=True, text=True
230
  )
231
- logger.info(f"\n{procs.stdout}")
232
  return free_mb
233
  except Exception as e:
234
  logger.error(f"check_vram failed: {e}")
@@ -249,229 +202,207 @@ def check_disk_space(path=".") -> bool:
249
  # AUDIO UTILS (CPU)
250
  # ======================================================================================
251
 
252
- def ensure_stereo(audio_segment: AudioSegment, sample_rate=48000, sample_width=2) -> AudioSegment:
253
  try:
254
- if audio_segment.channels != 2:
255
- audio_segment = audio_segment.set_channels(2)
256
- if audio_segment.frame_rate != sample_rate:
257
- audio_segment = audio_segment.set_frame_rate(sample_rate)
258
- return audio_segment
259
  except Exception as e:
260
  logger.error(f"ensure_stereo failed: {e}")
261
- return audio_segment
262
 
263
- def calculate_rms(segment: AudioSegment) -> float:
264
  try:
265
- samples = np.array(segment.get_array_of_samples(), dtype=np.float32)
266
  return float(np.sqrt(np.mean(samples**2)))
267
- except Exception as e:
268
- logger.error(f"calculate_rms failed: {e}")
269
  return 0.0
270
 
271
- def hard_limit(audio_segment: AudioSegment, limit_db=-3.0, sample_rate=48000) -> AudioSegment:
272
  try:
273
- audio_segment = ensure_stereo(audio_segment, sample_rate, audio_segment.sample_width)
274
- limit = 10 ** (limit_db / 20.0) * (2**23 if audio_segment.sample_width == 3 else 32767)
275
- samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
276
- samples = np.clip(samples, -limit, limit).astype(np.int32 if audio_segment.sample_width == 3 else np.int16)
277
  if len(samples) % 2 != 0:
278
  samples = samples[:-1]
279
  return AudioSegment(
280
  samples.tobytes(),
281
  frame_rate=sample_rate,
282
- sample_width=audio_segment.sample_width,
283
  channels=2
284
  )
285
  except Exception as e:
286
  logger.error(f"hard_limit failed: {e}")
287
- return audio_segment
288
 
289
- def rms_normalize(segment: AudioSegment, target_rms_db=-23.0, peak_limit_db=-3.0, sample_rate=48000) -> AudioSegment:
290
  try:
291
- segment = ensure_stereo(segment, sample_rate, segment.sample_width)
292
- target_rms = 10 ** (target_rms_db / 20) * (2**23 if segment.sample_width == 3 else 32767)
293
- current_rms = calculate_rms(segment)
294
- if current_rms > 0:
295
- gain_factor = target_rms / current_rms
296
- segment = segment.apply_gain(20 * np.log10(max(gain_factor, 1e-6)))
297
- segment = hard_limit(segment, limit_db=peak_limit_db, sample_rate=sample_rate)
298
- return segment
299
  except Exception as e:
300
  logger.error(f"rms_normalize failed: {e}")
301
- return segment
302
 
303
- def balance_stereo(audio_segment: AudioSegment, noise_threshold=-40, sample_rate=48000) -> AudioSegment:
304
  try:
305
- audio_segment = ensure_stereo(audio_segment, sample_rate, audio_segment.sample_width)
306
- samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
307
- if audio_segment.channels != 2:
308
- return audio_segment
309
- stereo = samples.reshape(-1, 2)
310
  db = 20 * np.log10(np.abs(stereo) + 1e-10)
311
  mask = db > noise_threshold
312
  stereo = stereo * mask
313
- left = stereo[:, 0]
314
- right = stereo[:, 1]
315
  l_rms = np.sqrt(np.mean(left[left != 0] ** 2)) if np.any(left != 0) else 0
316
  r_rms = np.sqrt(np.mean(right[right != 0] ** 2)) if np.any(right != 0) else 0
317
  if l_rms > 0 and r_rms > 0:
318
  avg = (l_rms + r_rms) / 2
319
  stereo[:, 0] *= (avg / l_rms)
320
  stereo[:, 1] *= (avg / r_rms)
321
- out = stereo.flatten().astype(np.int32 if audio_segment.sample_width == 3 else np.int16)
322
  if len(out) % 2 != 0:
323
  out = out[:-1]
324
- return AudioSegment(
325
- out.tobytes(),
326
- frame_rate=sample_rate,
327
- sample_width=audio_segment.sample_width,
328
- channels=2
329
- )
330
  except Exception as e:
331
  logger.error(f"balance_stereo failed: {e}")
332
- return audio_segment
333
 
334
- def apply_noise_gate(audio_segment: AudioSegment, threshold_db=-80, sample_rate=48000) -> AudioSegment:
335
  try:
336
- audio_segment = ensure_stereo(audio_segment, sample_rate, audio_segment.sample_width)
337
- samples = np.array(audio_segment.get_array_of_samples(), dtype=np.float32)
338
- if audio_segment.channels != 2:
339
- return audio_segment
340
- stereo = samples.reshape(-1, 2)
341
  for _ in range(2):
342
  db = 20 * np.log10(np.abs(stereo) + 1e-10)
343
- mask = db > threshold_db
344
- stereo = stereo * mask
345
- out = stereo.flatten().astype(np.int32 if audio_segment.sample_width == 3 else np.int16)
346
  if len(out) % 2 != 0:
347
  out = out[:-1]
348
- return AudioSegment(
349
- out.tobytes(),
350
- frame_rate=sample_rate,
351
- sample_width=audio_segment.sample_width,
352
- channels=2
353
- )
354
  except Exception as e:
355
  logger.error(f"apply_noise_gate failed: {e}")
356
- return audio_segment
357
 
358
- def apply_eq(segment: AudioSegment, sample_rate=48000) -> AudioSegment:
359
  try:
360
- segment = ensure_stereo(segment, sample_rate, segment.sample_width)
361
- segment = segment.high_pass_filter(20)
362
- segment = segment.low_pass_filter(8000)
363
- segment = segment - 3
364
- segment = segment - 3
365
- segment = segment - 10
366
- return segment
367
  except Exception as e:
368
  logger.error(f"apply_eq failed: {e}")
369
- return segment
370
 
371
- def apply_fade(segment: AudioSegment, fade_in_duration=500, fade_out_duration=800) -> AudioSegment:
372
  try:
373
- segment = ensure_stereo(segment, segment.frame_rate, segment.sample_width)
374
- segment = segment.fade_in(fade_in_duration).fade_out(fade_out_duration)
375
- return segment
376
  except Exception as e:
377
  logger.error(f"apply_fade failed: {e}")
378
- return segment
379
 
380
  # ======================================================================================
381
- # PROMPTS.INI LOADING / VARIABLE PROMPT BUILDER
382
  # ======================================================================================
383
 
384
- def _csv(v: str) -> List[str]:
385
- if not v or v.strip().lower() == "none":
386
- return []
387
- return [x.strip() for x in v.split(",") if x.strip()]
388
-
389
- def load_profiles_from_ini(prompts_file: Path) -> Dict[str, Dict[str, Any]]:
390
- if not prompts_file.exists():
391
- raise FileNotFoundError(f"Required prompts file missing: {prompts_file}")
392
- cfg = configparser.ConfigParser()
393
- cfg.read(prompts_file, encoding="utf-8")
394
- profiles: Dict[str, Dict[str, Any]] = {}
395
- for sect in cfg.sections():
396
- s = cfg[sect]
397
- profiles[sect] = {
398
- "label": s.get("label", sect.replace("_", " ").title()),
399
- "bpm_min": s.getint("bpm_min", 100),
400
- "bpm_max": s.getint("bpm_max", 140),
401
- "drum_beat": _csv(s.get("drum_beat", "none")),
402
- "synthesizer": _csv(s.get("synthesizer", "none")),
403
- "rhythmic_steps": _csv(s.get("rhythmic_steps", "steady steps")),
404
- "bass_style": _csv(s.get("bass_style", "melodic bass")),
405
- "guitar_style": _csv(s.get("guitar_style", "clean")),
406
- "mood": _csv(s.get("mood", "energetic")),
407
- "structure": _csv(s.get("structure", "intro,verse,chorus,outro")),
408
- "api_name": s.get("api_name", f"/set_{sect}_prompt"),
409
- "prompt_template": s.get(
410
- "prompt_template",
411
- "Instrumental track {guitar}{bass}{drum}{synth}{rhythm}, {mood} {section} at {bpm} BPM."
412
- ),
413
- }
414
- if not profiles:
415
- raise RuntimeError("No profiles found in prompts.ini")
416
- return profiles
417
-
418
- def rand_choice(lst: List[str], fallback: str = "") -> str:
419
- if not lst:
420
- return fallback
421
- return random.choice(lst)
422
-
423
- def assemble_prompt(profiles: Dict[str, Dict[str, Any]], style_key: str, bpm_hint: int, chunk_idx: int) -> str:
424
- prof = profiles.get(style_key)
425
- if not prof:
426
- return "Instrumental track, energetic, intro at 120 BPM."
427
- bpm_min, bpm_max = prof["bpm_min"], prof["bpm_max"]
428
- bpm = bpm_hint if bpm_hint != 120 else random.randint(bpm_min, bpm_max)
429
- drum = rand_choice(prof["drum_beat"])
430
- synth = rand_choice(prof["synthesizer"])
431
- rhythm = rand_choice(prof["rhythmic_steps"])
432
- bass = rand_choice(prof["bass_style"])
433
- guitar = rand_choice(prof["guitar_style"])
434
- mood = rand_choice(prof["mood"], "dynamic")
435
-
436
- struct = prof["structure"] or ["intro", "verse", "chorus", "outro"]
437
- if chunk_idx <= 1:
438
- section = struct[0] if struct else "intro"
439
- else:
440
- section = rand_choice(struct[1:]) if len(struct) > 1 else "chorus"
441
-
442
- def fmt(val, suffix=""):
443
- if not val or val == "none":
444
  return ""
445
- return f", {val}{suffix}"
446
-
447
- template = prof["prompt_template"]
448
- prompt = template.format(
449
- bpm=bpm,
450
- drum=fmt(drum, " drums"),
451
- synth=fmt(synth),
452
- rhythm=fmt(rhythm),
453
- bass=fmt(bass + " bass" if bass and "bass" not in bass else bass),
454
- guitar=fmt(guitar + " guitar" if guitar and "guitar" not in guitar else guitar),
455
- mood=mood,
456
- section=section,
457
- )
458
- return prompt
459
-
460
- # ======================================================================================
461
- # PRESETS
462
- # ======================================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
463
 
464
- PRESETS = {
465
- "default": {"cfg_scale": 5.8, "top_k": 250, "top_p": 0.95, "temperature": 0.90},
466
- "rock": {"cfg_scale": 5.8, "top_k": 250, "top_p": 0.95, "temperature": 0.90},
467
- "techno": {"cfg_scale": 5.2, "top_k": 300, "top_p": 0.96, "temperature": 0.95},
468
- "grunge": {"cfg_scale": 6.2, "top_k": 220, "top_p": 0.94, "temperature": 0.90},
469
- "indie": {"cfg_scale": 5.5, "top_k": 240, "top_p": 0.95, "temperature": 0.92},
470
- "funk_rock": {"cfg_scale": 5.8, "top_k": 260, "top_p": 0.96, "temperature": 0.94},
471
- }
472
 
473
  # ======================================================================================
474
- # MODEL LOAD
475
  # ======================================================================================
476
 
477
  try:
@@ -481,8 +412,8 @@ except Exception as e:
481
  raise
482
 
483
  def load_model():
484
- free_vram = check_vram()
485
- if free_vram is not None and free_vram < 5000:
486
  logger.warning("Low free VRAM; consider closing other apps.")
487
  clean_memory()
488
  local_model_path = str(BASE_DIR / "models" / "musicgen-large")
@@ -499,17 +430,16 @@ def load_model():
499
  musicgen_model = load_model()
500
 
501
  # ======================================================================================
502
- # GENERATION PIPELINE
503
  # ======================================================================================
504
 
505
  def _export_torch_to_segment(audio_tensor: torch.Tensor, sample_rate: int, bit_depth_int: int) -> Optional[AudioSegment]:
506
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
507
- tmp_path = tmp.name
508
  try:
509
- torchaudio.save(tmp_path, audio_tensor, sample_rate, bits_per_sample=bit_depth_int)
510
- with open(tmp_path, "rb") as f:
511
  mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
512
- seg = AudioSegment.from_wav(tmp_path)
513
  mm.close()
514
  return seg
515
  except Exception as e:
@@ -518,30 +448,28 @@ def _export_torch_to_segment(audio_tensor: torch.Tensor, sample_rate: int, bit_d
518
  return None
519
  finally:
520
  try:
521
- os.remove(tmp_path)
 
522
  except OSError:
523
  pass
524
 
525
- def _crossfade_segments(seg_a: AudioSegment, seg_b: AudioSegment, overlap_ms: int, sample_rate: int, bit_depth_int: int) -> AudioSegment:
526
  try:
527
- seg_a = ensure_stereo(seg_a, sample_rate, seg_a.sample_width)
528
- seg_b = ensure_stereo(seg_b, sample_rate, seg_b.sample_width)
529
  if overlap_ms <= 0 or len(seg_a) < overlap_ms or len(seg_b) < overlap_ms:
530
  return seg_a + seg_b
531
-
532
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as prev_wav, \
533
- tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as curr_wav:
534
- prev_path, curr_path = prev_wav.name, curr_wav.name
535
-
536
  try:
537
- seg_a[-overlap_ms:].export(prev_path, format="wav")
538
- seg_b[:overlap_ms].export(curr_path, format="wav")
539
- a_audio, sr_a = torchaudio.load(prev_path)
540
- b_audio, sr_b = torchaudio.load(curr_path)
541
- if sr_a != sample_rate:
542
- a_audio = torchaudio.functional.resample(a_audio, sr_a, sample_rate, lowpass_filter_width=64)
543
- if sr_b != sample_rate:
544
- b_audio = torchaudio.functional.resample(b_audio, sr_b, sample_rate, lowpass_filter_width=64)
545
  n = min(a_audio.shape[1], b_audio.shape[1])
546
  n = n - (n % 2)
547
  if n <= 0:
@@ -551,29 +479,29 @@ def _crossfade_segments(seg_a: AudioSegment, seg_b: AudioSegment, overlap_ms: in
551
  hann = torch.hann_window(n, periodic=False)
552
  fade_in = hann
553
  fade_out = hann.flip(0)
554
- blended = (a * fade_out + b * fade_in).to(torch.float32)
555
- blended = torch.clamp(blended, -1.0, 1.0)
556
-
557
  scale = (2**23 if bit_depth_int == 24 else 32767)
558
  blended_i = (blended * scale).to(torch.int32 if bit_depth_int == 24 else torch.int16)
559
-
560
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_x:
561
- temp_x = tmp_x.name
562
- torchaudio.save(temp_x, blended_i, sample_rate, bits_per_sample=bit_depth_int)
563
- blended_seg = AudioSegment.from_wav(temp_x)
564
- blended_seg = ensure_stereo(blended_seg, sample_rate, blended_seg.sample_width)
565
-
566
- result = seg_a[:-overlap_ms] + blended_seg + seg_b[overlap_ms:]
 
 
567
  return result
568
  finally:
569
- for p in [prev_path, curr_path, locals().get("temp_x", None)]:
570
  try:
571
- if p and os.path.exists(p):
572
  os.remove(p)
573
  except OSError:
574
  pass
575
  except Exception as e:
576
- logger.error(f"_crossfade_segments failed: {e}")
577
  return seg_a + seg_b
578
 
579
  def generate_music(
@@ -597,171 +525,160 @@ def generate_music(
597
  output_sample_rate: str,
598
  bit_depth: str
599
  ) -> Tuple[Optional[str], str, str]:
600
- global musicgen_model
601
 
602
- if not instrumental_prompt or not instrumental_prompt.strip():
603
- return None, "⚠️ Please enter a valid instrumental prompt!", vram_status_text
604
 
 
605
  try:
606
- if preset != "default":
607
- p = PRESETS.get(preset, PRESETS["default"])
608
- cfg_scale, top_k, top_p, temperature = p["cfg_scale"], p["top_k"], p["top_p"], p["temperature"]
609
- logger.info(f"Preset '{preset}' applied: cfg={cfg_scale} top_k={top_k} top_p={top_p} temp={temperature}")
610
-
611
- try:
612
- output_sr_int = int(output_sample_rate)
613
- except:
614
- return None, "❌ Invalid output sampling rate; choose 22050/44100/48000", vram_status_text
615
- try:
616
- bit_depth_int = int(bit_depth)
617
- sample_width = 3 if bit_depth_int == 24 else 2
618
- except:
619
- return None, "❌ Invalid bit depth; choose 16 or 24", vram_status_text
620
-
621
- if not check_disk_space():
622
- return None, "⚠️ Low disk space (<1GB).", vram_status_text
623
-
624
- CHUNK_SEC = 30
625
- total_duration = max(30, min(int(total_duration), 120))
626
- num_chunks = math.ceil(total_duration / CHUNK_SEC)
627
-
628
- PROCESS_SR = 48000
629
- OVERLAP_SEC = 0.20
630
-
631
- seed = random.randint(0, 2**31 - 1)
632
- random.seed(seed)
633
- torch.manual_seed(seed)
634
- np.random.seed(seed)
635
- torch.cuda.manual_seed_all(seed)
636
-
637
- musicgen_model.set_generation_params(
638
- duration=CHUNK_SEC,
639
- use_sampling=True,
640
- top_k=int(top_k),
641
- top_p=float(top_p),
642
- temperature=float(temperature),
643
- cfg_coef=float(cfg_scale),
644
- two_step_cfg=False,
645
- )
646
-
647
- vram_status_text = f"Start VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
648
-
649
- segments: List[AudioSegment] = []
650
- start_time = time.time()
651
 
652
- for idx in range(num_chunks):
653
- chunk_idx = idx + 1
654
- dur = CHUNK_SEC if (idx < num_chunks - 1) else (total_duration - CHUNK_SEC * (num_chunks - 1) or CHUNK_SEC)
655
- logger.info(f"Generating chunk {chunk_idx}/{num_chunks} ({dur}s)")
656
 
657
- prompt_text = instrumental_prompt # UI sends fully-assembled prompt (or manual text)
 
 
 
658
 
659
- try:
660
- with torch.no_grad():
661
- with autocast(dtype=torch.float16):
662
- clean_memory()
663
- if idx == 0:
664
- audio = musicgen_model.generate([prompt_text], progress=True)[0].cpu()
665
- else:
666
- prev_seg = segments[-1]
667
- prev_seg = apply_noise_gate(prev_seg, threshold_db=-80, sample_rate=PROCESS_SR)
668
- prev_seg = balance_stereo(prev_seg, noise_threshold=-40, sample_rate=PROCESS_SR)
669
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_prev:
670
- temp_prev = tmp_prev.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
671
  try:
672
- prev_seg.export(temp_prev, format="wav")
673
- prev_audio, prev_sr = torchaudio.load(temp_prev)
674
- if prev_sr != PROCESS_SR:
675
- prev_audio = torchaudio.functional.resample(prev_audio, prev_sr, PROCESS_SR, lowpass_filter_width=64)
676
- if prev_audio.shape[0] != 2:
677
- prev_audio = prev_audio.repeat(2, 1)[:, :prev_audio.shape[1]]
678
- prev_audio = prev_audio.to(DEVICE)
679
- tail = prev_audio[:, -int(PROCESS_SR * OVERLAP_SEC):]
680
-
681
- audio = musicgen_model.generate_continuation(
682
- prompt=tail,
683
- prompt_sample_rate=PROCESS_SR,
684
- descriptions=[prompt_text],
685
- progress=True
686
- )[0].cpu()
687
- del prev_audio, tail
688
- finally:
689
- try:
690
- if os.path.exists(temp_prev):
691
- os.remove(temp_prev)
692
- except OSError:
693
- pass
694
- clean_memory()
695
- except Exception as e:
696
- logger.error(f"Chunk {chunk_idx} generation failed: {e}")
697
- logger.error(traceback.format_exc())
698
- return None, f"❌ Failed to generate chunk {chunk_idx}: {e}", vram_status_text
699
 
700
- try:
701
- if audio.shape[0] != 2:
702
- audio = audio.repeat(2, 1)[:, :audio.shape[1]]
703
- audio = audio.to(dtype=torch.float32)
704
- audio = torchaudio.functional.resample(audio, 32000, PROCESS_SR, lowpass_filter_width=64)
705
- seg = _export_torch_to_segment(audio, PROCESS_SR, bit_depth_int)
706
- if seg is None:
707
- return None, f"❌ Failed to convert audio for chunk {chunk_idx}", vram_status_text
708
- seg = ensure_stereo(seg, PROCESS_SR, sample_width)
709
- seg = seg - 15
710
- seg = apply_noise_gate(seg, threshold_db=-80, sample_rate=PROCESS_SR)
711
- seg = balance_stereo(seg, noise_threshold=-40, sample_rate=PROCESS_SR)
712
- seg = rms_normalize(seg, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=PROCESS_SR)
713
- seg = apply_eq(seg, sample_rate=PROCESS_SR)
714
- seg = seg[:dur * 1000]
715
- segments.append(seg)
716
- del audio
717
- clean_memory()
718
- vram_status_text = f"VRAM after chunk {chunk_idx}: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
719
- except Exception as e:
720
- logger.error(f"Post-processing failed (chunk {chunk_idx}): {e}")
721
- logger.error(traceback.format_exc())
722
- return None, f"❌ Failed to process chunk {chunk_idx}: {e}", vram_status_text
723
-
724
- if not segments:
725
- return None, "❌ No audio generated.", vram_status_text
726
-
727
- logger.info("Combining chunks...")
728
- final_seg = segments[0]
729
- overlap_ms = int(0.20 * 1000)
730
- for i in range(1, len(segments)):
731
- final_seg = _crossfade_segments(final_seg, segments[i], overlap_ms, PROCESS_SR, bit_depth_int)
732
-
733
- final_seg = final_seg[:total_duration * 1000]
734
-
735
- final_seg = apply_noise_gate(final_seg, threshold_db=-80, sample_rate=PROCESS_SR)
736
- final_seg = balance_stereo(final_seg, noise_threshold=-40, sample_rate=PROCESS_SR)
737
- final_seg = rms_normalize(final_seg, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=PROCESS_SR)
738
- final_seg = apply_eq(final_seg, sample_rate=PROCESS_SR)
739
- final_seg = apply_fade(final_seg, 500, 800)
740
- final_seg = final_seg - 10
741
- final_seg = final_seg.set_frame_rate(output_sr_int)
742
-
743
- mp3_path = MP3_DIR / f"ghostai_music_{int(time.time())}.mp3"
744
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
745
  clean_memory()
746
- final_seg.export(str(mp3_path), format="mp3", bitrate=bitrate, tags={"title": "GhostAI Instrumental", "artist": "GhostAI"})
747
  except Exception as e:
748
- logger.error(f"MP3 export failed ({bitrate}): {e}")
749
- fb = MP3_DIR / f"ghostai_music_fallback_{int(time.time())}.mp3"
750
- try:
751
- final_seg.export(str(fb), format="mp3", bitrate="128k")
752
- mp3_path = fb
753
- except Exception as ee:
754
- return None, f"❌ Failed to export MP3: {ee}", vram_status_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
755
 
756
- elapsed = time.time() - start_time
757
- vram_status_text = f"Final VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
758
- logger.info(f"Done in {elapsed:.2f}s -> {mp3_path}")
759
- return str(mp3_path), "✅ Done! 30s chunking unified seamlessly. Check output loudness/quality.", vram_status_text
760
 
761
- except Exception as e:
762
- logger.error(f"Generation failed: {e}")
763
- logger.error(traceback.format_exc())
764
- return None, f"❌ Generation failed: {e}", vram_status_text
765
  finally:
766
  clean_memory()
767
 
@@ -775,7 +692,7 @@ def clear_inputs():
775
  )
776
 
777
  # ======================================================================================
778
- # SERVER STATUS (BUSY/IDLE) & RENDER API & STYLE PROMPT API
779
  # ======================================================================================
780
 
781
  BUSY_LOCK = threading.Lock()
@@ -833,13 +750,9 @@ class RenderRequest(BaseModel):
833
  output_sample_rate: Optional[str] = None
834
  bit_depth: Optional[str] = None
835
 
836
- class SettingsUpdate(BaseModel):
837
- settings: Dict[str, Any]
838
-
839
  fastapp = FastAPI(title=f"GhostAI Music Server {RELEASE}", version=RELEASE)
840
  fastapp.add_middleware(
841
- CORSMiddleware,
842
- allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"],
843
  )
844
 
845
  @fastapp.get("/health")
@@ -848,55 +761,53 @@ def health():
848
 
849
  @fastapp.get("/status")
850
  def status():
851
- busy = is_busy()
852
- return {
853
- "busy": busy,
854
- "job_id": CURRENT_JOB["id"],
855
- "since": CURRENT_JOB["start"],
856
- "elapsed": job_elapsed(),
857
- "lockfile": os.path.exists(BUSY_FILE),
858
- "release": RELEASE
859
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
860
 
861
  @fastapp.get("/config")
862
  def get_config():
863
  return {"defaults": CURRENT_SETTINGS, "release": RELEASE}
864
 
865
  @fastapp.post("/settings")
866
- def set_settings(payload: SettingsUpdate):
867
  try:
868
  s = CURRENT_SETTINGS.copy()
869
- s.update(payload.settings or {})
870
- save_settings_to_file(s)
871
  for k, v in s.items():
872
  CURRENT_SETTINGS[k] = v
873
  return {"ok": True, "saved": s}
874
  except Exception as e:
875
  raise HTTPException(status_code=400, detail=str(e))
876
 
877
- def register_style_endpoints(app: FastAPI, profiles: Dict[str, Dict[str, Any]]):
878
- for key, prof in profiles.items():
879
- route = prof.get("api_name") or f"/set_{key}_prompt"
880
- async def style_endpoint(style_key=key):
881
- return {"style": style_key, "prompt": assemble_prompt(profiles, style_key, 120, 1), "release": RELEASE}
882
- app.add_api_route(route, style_endpoint, methods=["GET"])
883
-
884
- @fastapp.get("/styles")
885
- def list_styles():
886
- return {
887
- "styles": [
888
- {"key": k, "label": v["label"], "api_name": v["api_name"]}
889
- for k, v in PROFILES.items()
890
- ],
891
- "release": RELEASE
892
- }
893
-
894
- @fastapp.get("/prompt")
895
- def get_prompt(style: str = Query(...), bpm: int = Query(120), chunk: int = Query(1)):
896
- if style not in PROFILES:
897
- raise HTTPException(status_code=404, detail=f"Unknown style '{style}'")
898
- return {"style": style, "prompt": assemble_prompt(PROFILES, style, bpm, chunk), "release": RELEASE}
899
-
900
  @fastapp.post("/render")
901
  def render(req: RenderRequest):
902
  if is_busy():
@@ -938,276 +849,297 @@ def render(req: RenderRequest):
938
  def _start_fastapi():
939
  uvicorn.run(fastapp, host="0.0.0.0", port=8555, log_level="info")
940
 
941
- # Load profiles from prompts.ini (required) and register endpoints
942
- try:
943
- PROFILES = load_profiles_from_ini(PROMPTS_FILE)
944
- except Exception as e:
945
- logger.error(f"Failed to load {PROMPTS_FILE}: {e}")
946
- sys.exit(1)
947
- register_style_endpoints(fastapp, PROFILES)
948
-
949
  api_thread = threading.Thread(target=_start_fastapi, daemon=True)
950
  api_thread.start()
951
- logger.info(f"FastAPI server started on http://0.0.0.0:8555 ({RELEASE})")
952
 
953
  # ======================================================================================
954
- # GRADIO UI (TABS + ACCESSIBLE THEME + 4→5-COLUMN GRID FOR BAND BUTTONS)
955
  # ======================================================================================
956
 
957
- def read_css_text() -> str:
958
  try:
959
  return CSS_FILE.read_text(encoding="utf-8")
960
  except Exception as e:
961
- logger.warning(f"styles.css not found or unreadable: {e}")
962
- return "" # no fallback CSS hard-coded
963
 
964
- def read_example_md() -> str:
965
  try:
966
- return EXAMPLE_MD.read_text(encoding="utf-8")
967
- except Exception as e:
968
- logger.warning(f"example.md not found or unreadable: {e}")
969
- return "## Info\nProvide an `example.md` to populate this tab."
970
 
971
- def ui_prompt_from_style(style_key, bpm, *_):
972
- return assemble_prompt(PROFILES, style_key, int(bpm), 1)
973
-
974
- def get_latest_log() -> str:
975
- try:
976
- return LOG_FILE.read_text(encoding="utf-8") if LOG_FILE.exists() else "No log file yet."
977
- except Exception as e:
978
- return f"Error reading log: {e}"
979
-
980
- def set_bitrate_128(): return "128k"
981
- def set_bitrate_192(): return "192k"
982
- def set_bitrate_320(): return "320k"
983
- def set_sample_rate_22050(): return "22050"
984
- def set_sample_rate_44100(): return "44100"
985
- def set_sample_rate_48000(): return "48000"
986
- def set_bit_depth_16(): return "16"
987
- def set_bit_depth_24(): return "24"
988
-
989
- CSS = read_css_text()
990
  loaded = CURRENT_SETTINGS
991
 
992
- logger.info(f"Building Gradio UI {RELEASE} ...")
993
- with gr.Blocks(css=CSS, analytics_enabled=False, title=APP_TITLE, theme=gr.themes.Soft()) as demo:
994
- with gr.TabItem(f"Generator {RELEASE}", id="tab-generator"):
995
- gr.Markdown(f"""
996
- <div class="header" role="banner" aria-label="{APP_TITLE}">
997
- <div class="logo" aria-hidden="true">👻</div>
998
- <h1>{APP_TITLE}</h1>
999
- <p>30/60/90/120s chunking · seamless joins · API + style endpoints</p>
1000
- </div>
1001
- """)
1002
-
1003
- with gr.Column(elem_classes="input-container"):
1004
- gr.Markdown("### Prompt")
1005
- instrumental_prompt = gr.Textbox(
1006
- label="Instrumental Prompt",
1007
- placeholder="Type your instrumental prompt or click a style button",
1008
- lines=4,
1009
- value=loaded.get("instrumental_prompt", ""),
1010
- )
1011
-
1012
- gr.Markdown("#### Band / Style (auto grid: 4 per row, 5 on wide screens)")
1013
- style_buttons = {}
1014
- with gr.Group(elem_id="genre-grid"):
1015
- # Put all buttons as direct children (no rows) so CSS grid works cleanly
1016
- for key in PROFILES.keys():
1017
- style_buttons[key] = gr.Button(PROFILES[key]["label"], elem_classes=["style-btn"])
1018
-
1019
- with gr.Column(elem_classes="settings-container"):
1020
- gr.Markdown("### Settings")
1021
- with gr.Group(elem_classes="group-container"):
1022
- cfg_scale = gr.Slider(1.0, 10.0, step=0.1, value=float(loaded.get("cfg_scale", DEFAULT_SETTINGS["cfg_scale"])), label="CFG Scale")
1023
- top_k = gr.Slider(10, 500, step=10, value=int(loaded.get("top_k", DEFAULT_SETTINGS["top_k"])), label="Top-K")
1024
- top_p = gr.Slider(0.0, 1.0, step=0.01, value=float(loaded.get("top_p", DEFAULT_SETTINGS["top_p"])), label="Top-P")
1025
- temperature = gr.Slider(0.1, 2.0, step=0.01, value=float(loaded.get("temperature", DEFAULT_SETTINGS["temperature"])), label="Temperature")
1026
- total_duration = gr.Dropdown(choices=[30, 60, 90, 120], value=int(loaded.get("total_duration", 60)), label="Song Length (seconds)")
1027
- bpm = gr.Slider(60, 180, step=1, value=int(loaded.get("bpm", 120)), label="Tempo (BPM)")
1028
- drum_beat = gr.Dropdown(choices=["none", "standard rock", "funk groove", "techno kick", "jazz swing", "orchestral percussion", "tympani"], value=str(loaded.get("drum_beat", "none")), label="Drum Beat")
1029
- synthesizer = gr.Dropdown(choices=["none", "analog synth", "digital pad", "arpeggiated synth"], value=str(loaded.get("synthesizer", "none")), label="Synthesizer")
1030
- rhythmic_steps = gr.Dropdown(choices=["none", "syncopated steps", "steady steps", "complex steps", "martial march", "triplet swells", "staccato ostinato"], value=str(loaded.get("rhythmic_steps", "none")), label="Rhythmic Steps")
1031
- bass_style = gr.Dropdown(choices=["none", "slap bass", "deep bass", "melodic bass", "low brass", "cellos", "double basses"], value=str(loaded.get("bass_style", "none")), label="Bass / Low End")
1032
- guitar_style = gr.Dropdown(choices=["none", "distorted", "clean", "jangle", "downpicked", "thrash riffing"], value=str(loaded.get("guitar_style", "none")), label="Guitar Style")
1033
- target_volume = gr.Slider(-30.0, -20.0, step=0.5, value=float(loaded.get("target_volume", -23.0)), label="Target Loudness (dBFS RMS)")
1034
- preset = gr.Dropdown(choices=["default", "rock", "techno", "grunge", "indie", "funk_rock"], value=str(loaded.get("preset", "default")), label="Preset")
1035
- max_steps = gr.Dropdown(choices=[1000, 1200, 1300, 1500], value=int(loaded.get("max_steps", 1500)), label="Max Steps (per chunk hint)")
1036
-
1037
- bitrate_state = gr.State(value=str(loaded.get("bitrate", "192k")))
1038
- sample_rate_state = gr.State(value=str(loaded.get("output_sample_rate", "48000")))
1039
- bit_depth_state = gr.State(value=str(loaded.get("bit_depth", "16")))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1040
 
1041
  with gr.Row():
1042
- bitrate_128_btn = gr.Button("Bitrate 128k")
1043
- bitrate_192_btn = gr.Button("Bitrate 192k")
1044
- bitrate_320_btn = gr.Button("Bitrate 320k")
1045
- with gr.Row():
1046
- sample_rate_22050_btn = gr.Button("SR 22.05k")
1047
- sample_rate_44100_btn = gr.Button("SR 44.1k")
1048
- sample_rate_48000_btn = gr.Button("SR 48k")
1049
- with gr.Row():
1050
- bit_depth_16_btn = gr.Button("16-bit")
1051
- bit_depth_24_btn = gr.Button("24-bit")
1052
-
1053
- with gr.Row():
1054
- gen_btn = gr.Button("Generate Music 🚀")
1055
- clr_btn = gr.Button("Clear 🧹")
1056
- save_btn = gr.Button("Save Settings 💾")
1057
- load_btn = gr.Button("Load Settings 📂")
1058
- reset_btn = gr.Button("Reset Defaults ♻️")
1059
-
1060
- with gr.Column(elem_classes="output-container"):
1061
- gr.Markdown("### Output")
1062
- out_audio = gr.Audio(label="Generated Track", type="filepath")
1063
- status_box = gr.Textbox(label="Status", interactive=False)
1064
- vram_box = gr.Textbox(label="VRAM Usage", interactive=False, value="")
1065
-
1066
- with gr.Column(elem_classes="logs-container"):
1067
- gr.Markdown("### Logs")
1068
- log_output = gr.Textbox(label="Last Log File", lines=16, interactive=False)
1069
- log_btn = gr.Button("View Last Log")
1070
-
1071
- # Wire style buttons -> prompt textbox
1072
- for key, btn in style_buttons.items():
1073
- btn.click(
1074
- ui_prompt_from_style,
1075
- inputs=[gr.State(key), bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style],
1076
- outputs=instrumental_prompt
1077
- )
1078
-
1079
- # Quick sets
1080
- bitrate_128_btn.click(set_bitrate_128, outputs=bitrate_state)
1081
- bitrate_192_btn.click(set_bitrate_192, outputs=bitrate_state)
1082
- bitrate_320_btn.click(set_bitrate_320, outputs=bitrate_state)
1083
- sample_rate_22050_btn.click(set_sample_rate_22050, outputs=sample_rate_state)
1084
- sample_rate_44100_btn.click(set_sample_rate_44100, outputs=sample_rate_state)
1085
- sample_rate_48000_btn.click(set_sample_rate_48000, outputs=sample_rate_state)
1086
- bit_depth_16_btn.click(set_bit_depth_16, outputs=bit_depth_state)
1087
- bit_depth_24_btn.click(set_bit_depth_24, outputs=bit_depth_state)
1088
-
1089
- # Generate
1090
- gen_btn.click(
1091
- generate_music,
1092
- inputs=[
1093
- instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm,
1094
- drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume,
1095
- preset, max_steps, vram_box, bitrate_state, sample_rate_state, bit_depth_state
1096
- ],
1097
- outputs=[out_audio, status_box, vram_box]
1098
  )
1099
 
1100
- # Clear
1101
- clr_btn.click(
1102
- clear_inputs, outputs=[
1103
- instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm,
1104
- drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume,
1105
- preset, max_steps, bitrate_state, sample_rate_state, bit_depth_state
1106
- ]
1107
- )
 
 
 
 
 
 
 
 
 
 
 
 
1108
 
1109
- # Save / Load / Reset
1110
- def _save_action(
1111
- instrumental_prompt_v, cfg_v, top_k_v, top_p_v, temp_v, dur_v, bpm_v,
1112
- drum_v, synth_v, steps_v, bass_v, guitar_v, vol_v, preset_v, maxsteps_v, br_v, sr_v, bd_v
1113
- ):
1114
- s = {
1115
- "instrumental_prompt": instrumental_prompt_v,
1116
- "cfg_scale": float(cfg_v),
1117
- "top_k": int(top_k_v),
1118
- "top_p": float(top_p_v),
1119
- "temperature": float(temp_v),
1120
- "total_duration": int(dur_v),
1121
- "bpm": int(bpm_v),
1122
- "drum_beat": str(drum_v),
1123
- "synthesizer": str(synth_v),
1124
- "rhythmic_steps": str(steps_v),
1125
- "bass_style": str(bass_v),
1126
- "guitar_style": str(guitar_v),
1127
- "target_volume": float(vol_v),
1128
- "preset": str(preset_v),
1129
- "max_steps": int(maxsteps_v),
1130
- "bitrate": str(br_v),
1131
- "output_sample_rate": str(sr_v),
1132
- "bit_depth": str(bd_v)
1133
- }
1134
- save_settings_to_file(s)
1135
- for k, v in s.items():
1136
- CURRENT_SETTINGS[k] = v
1137
- return "✅ Settings saved."
1138
-
1139
- def _load_action():
1140
- s = load_settings_from_file()
1141
- for k, v in s.items():
1142
- CURRENT_SETTINGS[k] = v
1143
- return (
1144
- s["instrumental_prompt"], s["cfg_scale"], s["top_k"], s["top_p"], s["temperature"],
1145
- s["total_duration"], s["bpm"], s["drum_beat"], s["synthesizer"], s["rhythmic_steps"],
1146
- s["bass_style"], s["guitar_style"], s["target_volume"], s["preset"], s["max_steps"],
1147
- s["bitrate"], s["output_sample_rate"], s["bit_depth"],
1148
- "✅ Settings loaded."
1149
- )
1150
-
1151
- def _reset_action():
1152
- s = DEFAULT_SETTINGS.copy()
1153
- save_settings_to_file(s)
1154
- for k, v in s.items():
1155
- CURRENT_SETTINGS[k] = v
1156
- return (
1157
- s["instrumental_prompt"], s["cfg_scale"], s["top_k"], s["top_p"], s["temperature"],
1158
- s["total_duration"], s["bpm"], s["drum_beat"], s["synthesizer"], s["rhythmic_steps"],
1159
- s["bass_style"], s["guitar_style"], s["target_volume"], s["preset"], s["max_steps"],
1160
- s["bitrate"], s["output_sample_rate"], s["bit_depth"],
1161
- "✅ Defaults restored."
1162
- )
1163
-
1164
- save_btn.click(
1165
- _save_action,
1166
- inputs=[
1167
- instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm,
1168
- drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume,
1169
- preset, max_steps, bitrate_state, sample_rate_state, bit_depth_state
1170
- ],
1171
- outputs=status_box
1172
- )
1173
 
1174
- load_btn.click(
1175
- _load_action,
1176
- outputs=[
1177
- instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm,
1178
- drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume,
1179
- preset, max_steps, bitrate_state, sample_rate_state, bit_depth_state, status_box
1180
- ]
1181
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1182
 
1183
- reset_btn.click(
1184
- _reset_action,
1185
- outputs=[
1186
- instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm,
1187
- drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume,
1188
- preset, max_steps, bitrate_state, sample_rate_state, bit_depth_state, status_box
1189
- ]
 
 
 
1190
  )
1191
 
1192
- log_btn.click(get_latest_log, outputs=log_output)
 
 
 
 
 
 
 
 
 
 
 
1193
 
1194
- with gr.TabItem("Info", id="tab-info"):
1195
- gr.Markdown(read_example_md())
 
 
 
 
 
 
 
1196
 
1197
- # ======================================================================================
1198
- # LAUNCH GRADIO
1199
- # ======================================================================================
 
 
 
 
 
1200
 
1201
- logger.info(f"Launching Gradio UI at http://0.0.0.0:9999 ({RELEASE}) ...")
1202
- try:
1203
- demo.launch(
1204
- server_name="0.0.0.0",
1205
- server_port=9999,
1206
- share=False,
1207
- inbrowser=False,
1208
- show_error=True
1209
  )
1210
- except Exception as e:
1211
- logger.error(f"Failed to launch Gradio UI: {e}")
1212
- logger.error(traceback.format_exc())
1213
- sys.exit(1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
  #!/usr/bin/env python3
3
  # -*- coding: utf-8 -*-
4
 
5
+ # GhostAI Music Generator — Release v1.3.0
6
+ # Gradio UI + FastAPI server, externalized styles (CSS), prompts (INI), and examples (MD).
7
+ # Saves MP3s to ./mp3, single rotating log (max 5MB) in ./logs, colorized console.
8
+
9
  import os
10
  import sys
11
  import gc
 
14
  import time
15
  import mmap
16
  import math
17
+ import torch
18
  import random
19
  import logging
20
  import warnings
21
  import traceback
22
  import subprocess
 
 
 
23
  import numpy as np
 
24
  import torchaudio
25
  import gradio as gr
26
  import gradio_client.utils
27
+ import threading
28
+ import configparser
29
  from pydub import AudioSegment
30
  from datetime import datetime
31
  from pathlib import Path
32
+ from typing import Optional, Tuple, Dict, Any, List
33
  from torch.cuda.amp import autocast
34
+ from logging.handlers import RotatingFileHandler
35
 
36
+ from fastapi import FastAPI, HTTPException
37
  from fastapi.middleware.cors import CORSMiddleware
38
  from pydantic import BaseModel
39
  import uvicorn
 
40
 
41
  from colorama import init as colorama_init, Fore, Style
42
 
43
+ RELEASE = "v1.3.0"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  # ======================================================================================
46
+ # PATCHES & RUNTIME
47
  # ======================================================================================
48
 
49
+ # Gradio bool schema patch
50
  _original_get_type = gradio_client.utils.get_type
51
  def _patched_get_type(schema):
52
  if isinstance(schema, bool):
 
54
  return _original_get_type(schema)
55
  gradio_client.utils.get_type = _patched_get_type
56
 
57
+ # Warnings
58
  warnings.filterwarnings("ignore")
59
+
60
+ # CUDA allocator
61
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
62
  torch.backends.cudnn.benchmark = False
63
  torch.backends.cudnn.deterministic = True
64
 
65
+ # Folders
66
+ BASE_DIR = Path(__file__).parent.resolve()
67
+ LOG_DIR = BASE_DIR / "logs"
68
+ MP3_DIR = BASE_DIR / "mp3"
69
+ LOG_DIR.mkdir(parents=True, exist_ok=True)
70
+ MP3_DIR.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ # Logging (single rotating file, max 5MB)
73
+ LOG_FILE = LOG_DIR / "ghostai_musicgen.log"
74
  logger = logging.getLogger("ghostai-musicgen")
75
+ logger.setLevel(logging.DEBUG)
76
+ file_handler = RotatingFileHandler(LOG_FILE, maxBytes=5 * 1024 * 1024, backupCount=0, encoding="utf-8")
77
+ file_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s"))
78
+ console_handler = logging.StreamHandler(sys.stdout)
79
+ console_handler.setFormatter(logging.Formatter("%(message)s"))
80
+ logger.addHandler(file_handler)
81
+ logger.addHandler(console_handler)
82
 
83
+ # Color console banner
84
+ colorama_init()
85
+ print(f"{Fore.CYAN}GhostAI Music Generator {Fore.MAGENTA}{RELEASE}{Fore.RESET} — {Fore.GREEN}Booting...{Fore.RESET}")
86
 
87
+ # Device
88
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
89
  if DEVICE != "cuda":
90
+ print(f"{Fore.RED}CUDA not available. Exiting.{Fore.RESET}")
91
  logger.error("CUDA is required. Exiting.")
92
  sys.exit(1)
93
+
94
+ gpu_name = torch.cuda.get_device_name(0)
95
+ print(f"{Fore.YELLOW}GPU:{Fore.RESET} {gpu_name}")
96
+ print(f"{Fore.YELLOW}Precision:{Fore.RESET} fp16 (model) / fp32 (CPU audio ops)")
97
+
98
+ # External assets
99
+ CSS_FILE = BASE_DIR / "styles.css"
100
+ PROMPTS_INI = BASE_DIR / "prompts.ini"
101
+ EXAMPLES_MD = BASE_DIR / "examples.md"
102
+ SETTINGS_FILE = BASE_DIR / "settings.json"
103
 
104
  # ======================================================================================
105
+ # SETTINGS (PERSISTED)
106
  # ======================================================================================
107
 
108
  DEFAULT_SETTINGS: Dict[str, Any] = {
 
126
  "instrumental_prompt": ""
127
  }
128
 
129
+ def load_settings() -> Dict[str, Any]:
130
+ if SETTINGS_FILE.exists():
131
+ try:
132
+ data = json.loads(SETTINGS_FILE.read_text())
 
133
  for k, v in DEFAULT_SETTINGS.items():
134
  data.setdefault(k, v)
135
+ logger.info("Settings loaded.")
136
  return data
137
+ except Exception as e:
138
+ logger.error(f"Settings read failed: {e}")
139
  return DEFAULT_SETTINGS.copy()
140
 
141
+ def save_settings(s: Dict[str, Any]) -> None:
142
  try:
143
+ SETTINGS_FILE.write_text(json.dumps(s, indent=2))
144
+ logger.info("Settings saved.")
 
145
  except Exception as e:
146
+ logger.error(f"Settings write failed: {e}")
147
 
148
+ CURRENT_SETTINGS = load_settings()
149
 
150
  # ======================================================================================
151
  # VRAM / DISK / MEMORY
 
168
  def check_vram():
169
  try:
170
  r = subprocess.run(
171
+ ["nvidia-smi", "--query-gpu=memory.used,memory.total", "--format=csv"],
172
  capture_output=True, text=True
173
  )
174
  lines = r.stdout.splitlines()
175
  if len(lines) > 1:
176
+ used_mb, total_mb = map(int, re.findall(r"\d+", lines[1]))
177
  free_mb = total_mb - used_mb
178
  logger.info(f"VRAM: used {used_mb} MiB | free {free_mb} MiB | total {total_mb} MiB")
179
  if free_mb < 5000:
 
180
  procs = subprocess.run(
181
+ ["nvidia-smi", "--query-compute-apps=pid,used_memory", "--format=csv"],
182
  capture_output=True, text=True
183
  )
184
+ logger.info(f"GPU processes:\n{procs.stdout}")
185
  return free_mb
186
  except Exception as e:
187
  logger.error(f"check_vram failed: {e}")
 
202
  # AUDIO UTILS (CPU)
203
  # ======================================================================================
204
 
205
+ def ensure_stereo(seg: AudioSegment, sample_rate=48000, sample_width=2) -> AudioSegment:
206
  try:
207
+ if seg.channels != 2:
208
+ seg = seg.set_channels(2)
209
+ if seg.frame_rate != sample_rate:
210
+ seg = seg.set_frame_rate(sample_rate)
211
+ return seg
212
  except Exception as e:
213
  logger.error(f"ensure_stereo failed: {e}")
214
+ return seg
215
 
216
+ def calculate_rms(seg: AudioSegment) -> float:
217
  try:
218
+ samples = np.array(seg.get_array_of_samples(), dtype=np.float32)
219
  return float(np.sqrt(np.mean(samples**2)))
220
+ except Exception:
 
221
  return 0.0
222
 
223
+ def hard_limit(seg: AudioSegment, limit_db=-3.0, sample_rate=48000) -> AudioSegment:
224
  try:
225
+ seg = ensure_stereo(seg, sample_rate, seg.sample_width)
226
+ limit = 10 ** (limit_db / 20.0) * (2**23 if seg.sample_width == 3 else 32767)
227
+ samples = np.array(seg.get_array_of_samples(), dtype=np.float32)
228
+ samples = np.clip(samples, -limit, limit).astype(np.int32 if seg.sample_width == 3 else np.int16)
229
  if len(samples) % 2 != 0:
230
  samples = samples[:-1]
231
  return AudioSegment(
232
  samples.tobytes(),
233
  frame_rate=sample_rate,
234
+ sample_width=seg.sample_width,
235
  channels=2
236
  )
237
  except Exception as e:
238
  logger.error(f"hard_limit failed: {e}")
239
+ return seg
240
 
241
+ def rms_normalize(seg: AudioSegment, target_rms_db=-23.0, peak_limit_db=-3.0, sample_rate=48000) -> AudioSegment:
242
  try:
243
+ seg = ensure_stereo(seg, sample_rate, seg.sample_width)
244
+ target_rms = 10 ** (target_rms_db / 20) * (2**23 if seg.sample_width == 3 else 32767)
245
+ current = calculate_rms(seg)
246
+ if current > 0:
247
+ gain = target_rms / current
248
+ seg = seg.apply_gain(20 * np.log10(max(gain, 1e-6)))
249
+ return hard_limit(seg, peak_limit_db, sample_rate)
 
250
  except Exception as e:
251
  logger.error(f"rms_normalize failed: {e}")
252
+ return seg
253
 
254
+ def balance_stereo(seg: AudioSegment, noise_threshold=-40, sample_rate=48000) -> AudioSegment:
255
  try:
256
+ seg = ensure_stereo(seg, sample_rate, seg.sample_width)
257
+ arr = np.array(seg.get_array_of_samples(), dtype=np.float32)
258
+ if seg.channels != 2:
259
+ return seg
260
+ stereo = arr.reshape(-1, 2)
261
  db = 20 * np.log10(np.abs(stereo) + 1e-10)
262
  mask = db > noise_threshold
263
  stereo = stereo * mask
264
+ left, right = stereo[:, 0], stereo[:, 1]
 
265
  l_rms = np.sqrt(np.mean(left[left != 0] ** 2)) if np.any(left != 0) else 0
266
  r_rms = np.sqrt(np.mean(right[right != 0] ** 2)) if np.any(right != 0) else 0
267
  if l_rms > 0 and r_rms > 0:
268
  avg = (l_rms + r_rms) / 2
269
  stereo[:, 0] *= (avg / l_rms)
270
  stereo[:, 1] *= (avg / r_rms)
271
+ out = stereo.flatten().astype(np.int32 if seg.sample_width == 3 else np.int16)
272
  if len(out) % 2 != 0:
273
  out = out[:-1]
274
+ return AudioSegment(out.tobytes(), frame_rate=sample_rate, sample_width=seg.sample_width, channels=2)
 
 
 
 
 
275
  except Exception as e:
276
  logger.error(f"balance_stereo failed: {e}")
277
+ return seg
278
 
279
+ def apply_noise_gate(seg: AudioSegment, threshold_db=-80, sample_rate=48000) -> AudioSegment:
280
  try:
281
+ seg = ensure_stereo(seg, sample_rate, seg.sample_width)
282
+ arr = np.array(seg.get_array_of_samples(), dtype=np.float32)
283
+ if seg.channels != 2:
284
+ return seg
285
+ stereo = arr.reshape(-1, 2)
286
  for _ in range(2):
287
  db = 20 * np.log10(np.abs(stereo) + 1e-10)
288
+ stereo = stereo * (db > threshold_db)
289
+ out = stereo.flatten().astype(np.int32 if seg.sample_width == 3 else np.int16)
 
290
  if len(out) % 2 != 0:
291
  out = out[:-1]
292
+ return AudioSegment(out.tobytes(), frame_rate=sample_rate, sample_width=seg.sample_width, channels=2)
 
 
 
 
 
293
  except Exception as e:
294
  logger.error(f"apply_noise_gate failed: {e}")
295
+ return seg
296
 
297
+ def apply_eq(seg: AudioSegment, sample_rate=48000) -> AudioSegment:
298
  try:
299
+ seg = ensure_stereo(seg, sample_rate, seg.sample_width)
300
+ seg = seg.high_pass_filter(20)
301
+ seg = seg.low_pass_filter(8000)
302
+ seg = seg - 3
303
+ seg = seg - 3
304
+ seg = seg - 10
305
+ return seg
306
  except Exception as e:
307
  logger.error(f"apply_eq failed: {e}")
308
+ return seg
309
 
310
+ def apply_fade(seg: AudioSegment, fade_in=500, fade_out=800) -> AudioSegment:
311
  try:
312
+ seg = ensure_stereo(seg, seg.frame_rate, seg.sample_width)
313
+ return seg.fade_in(fade_in).fade_out(fade_out)
 
314
  except Exception as e:
315
  logger.error(f"apply_fade failed: {e}")
316
+ return seg
317
 
318
  # ======================================================================================
319
+ # PROMPTS (FROM INI)
320
  # ======================================================================================
321
 
322
+ class StylesConfig:
323
+ def __init__(self, path: Path):
324
+ self.path = path
325
+ self.cfg = configparser.ConfigParser()
326
+ self.mtime = 0.0
327
+ self.styles: Dict[str, Dict[str, Any]] = {}
328
+ self._load()
329
+
330
+ def _load(self):
331
+ if not self.path.exists():
332
+ logger.error(f"prompts.ini not found: {self.path}")
333
+ self.cfg = configparser.ConfigParser()
334
+ self.styles = {}
335
+ self.mtime = 0.0
336
+ return
337
+ self.cfg.read(self.path, encoding="utf-8")
338
+ self.styles = {}
339
+ for sec in self.cfg.sections():
340
+ d = {k: v for k, v in self.cfg.items(sec)}
341
+ # split csv fields
342
+ for key in ["drum_beat", "synthesizer", "rhythmic_steps", "bass_style", "guitar_style", "variations"]:
343
+ if key in d:
344
+ d[key] = [s.strip() for s in d[key].split(",") if s.strip()]
345
+ self.styles[sec] = d
346
+ self.mtime = self.path.stat().st_mtime
347
+ logger.info(f"Loaded {len(self.styles)} styles from prompts.ini")
348
+
349
+ def maybe_reload(self):
350
+ if self.path.exists():
351
+ mt = self.path.stat().st_mtime
352
+ if mt != self.mtime:
353
+ self._load()
354
+
355
+ def list_styles(self) -> List[str]:
356
+ self.maybe_reload()
357
+ return list(self.styles.keys())
358
+
359
+ def build_prompt(self, style: str, bpm: int, chunk_num: int = 1,
360
+ drum_beat="none", synthesizer="none", rhythmic_steps="none",
361
+ bass_style="none", guitar_style="none") -> str:
362
+ self.maybe_reload()
363
+ if style not in self.styles:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
  return ""
365
+ s = self.styles[style]
366
+ bpm_min = int(s.get("bpm_min", "100"))
367
+ bpm_max = int(s.get("bpm_max", "140"))
368
+ final_bpm = bpm if bpm != 120 else random.randint(bpm_min, bpm_max)
369
+
370
+ def pick(field_name: str, incoming: str) -> str:
371
+ if incoming and incoming != "none":
372
+ return incoming
373
+ vals = s.get(field_name, [])
374
+ return random.choice(vals) if vals else "none"
375
+
376
+ d = pick("drum_beat", drum_beat)
377
+ syn = pick("synthesizer", synthesizer)
378
+ r = pick("rhythmic_steps", rhythmic_steps)
379
+ b = pick("bass_style", bass_style)
380
+ g = pick("guitar_style", guitar_style)
381
+ var_list = s.get("variations", [])
382
+ var = ""
383
+ if var_list:
384
+ # Prefer different variations across chunks
385
+ if chunk_num == 1:
386
+ var = random.choice(var_list[: max(1, len(var_list)//2)])
387
+ else:
388
+ var = random.choice(var_list)
389
+ tpl = s.get("prompt_template",
390
+ "Instrumental track at {bpm} BPM {variation}.")
391
+ prompt = tpl.format(
392
+ bpm=final_bpm,
393
+ drum=d,
394
+ synth=syn if syn != "none" else "",
395
+ rhythm=r if r != "none" else "",
396
+ bass=b if b != "none" else "",
397
+ guitar=g if g != "none" else "",
398
+ variation=var
399
+ )
400
+ return re.sub(r"\s{2,}", " ", prompt).strip()
401
 
402
+ STYLES = StylesConfig(PROMPTS_INI)
 
 
 
 
 
 
 
403
 
404
  # ======================================================================================
405
+ # MODEL
406
  # ======================================================================================
407
 
408
  try:
 
412
  raise
413
 
414
  def load_model():
415
+ free = check_vram()
416
+ if free is not None and free < 5000:
417
  logger.warning("Low free VRAM; consider closing other apps.")
418
  clean_memory()
419
  local_model_path = str(BASE_DIR / "models" / "musicgen-large")
 
430
  musicgen_model = load_model()
431
 
432
  # ======================================================================================
433
+ # GENERATION (30s CHUNKS, 60s READY)
434
  # ======================================================================================
435
 
436
  def _export_torch_to_segment(audio_tensor: torch.Tensor, sample_rate: int, bit_depth_int: int) -> Optional[AudioSegment]:
437
+ tmp = f"temp_audio_{int(time.time()*1000)}.wav"
 
438
  try:
439
+ torchaudio.save(tmp, audio_tensor, sample_rate, bits_per_sample=bit_depth_int)
440
+ with open(tmp, "rb") as f:
441
  mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
442
+ seg = AudioSegment.from_wav(tmp)
443
  mm.close()
444
  return seg
445
  except Exception as e:
 
448
  return None
449
  finally:
450
  try:
451
+ if os.path.exists(tmp):
452
+ os.remove(tmp)
453
  except OSError:
454
  pass
455
 
456
+ def _crossfade(seg_a: AudioSegment, seg_b: AudioSegment, overlap_ms: int, sr: int, bit_depth_int: int) -> AudioSegment:
457
  try:
458
+ seg_a = ensure_stereo(seg_a, sr, seg_a.sample_width)
459
+ seg_b = ensure_stereo(seg_b, sr, seg_b.sample_width)
460
  if overlap_ms <= 0 or len(seg_a) < overlap_ms or len(seg_b) < overlap_ms:
461
  return seg_a + seg_b
462
+ prev_wav = f"tmp_prev_{int(time.time()*1000)}.wav"
463
+ curr_wav = f"tmp_curr_{int(time.time()*1000)}.wav"
 
 
 
464
  try:
465
+ seg_a[-overlap_ms:].export(prev_wav, format="wav")
466
+ seg_b[:overlap_ms].export(curr_wav, format="wav")
467
+ a_audio, sra = torchaudio.load(prev_wav)
468
+ b_audio, srb = torchaudio.load(curr_wav)
469
+ if sra != sr:
470
+ a_audio = torchaudio.functional.resample(a_audio, sra, sr, lowpass_filter_width=64)
471
+ if srb != sr:
472
+ b_audio = torchaudio.functional.resample(b_audio, srb, sr, lowpass_filter_width=64)
473
  n = min(a_audio.shape[1], b_audio.shape[1])
474
  n = n - (n % 2)
475
  if n <= 0:
 
479
  hann = torch.hann_window(n, periodic=False)
480
  fade_in = hann
481
  fade_out = hann.flip(0)
482
+ blended = (a * fade_out + b * fade_in).to(torch.float32).clamp(-1.0, 1.0)
 
 
483
  scale = (2**23 if bit_depth_int == 24 else 32767)
484
  blended_i = (blended * scale).to(torch.int32 if bit_depth_int == 24 else torch.int16)
485
+ tmpx = f"tmp_cross_{int(time.time()*1000)}.wav"
486
+ torchaudio.save(tmpx, blended_i, sr, bits_per_sample=bit_depth_int)
487
+ blend_seg = AudioSegment.from_wav(tmpx)
488
+ blend_seg = ensure_stereo(blend_seg, sr, blend_seg.sample_width)
489
+ result = seg_a[:-overlap_ms] + blend_seg + seg_b[overlap_ms:]
490
+ try:
491
+ if os.path.exists(tmpx):
492
+ os.remove(tmpx)
493
+ except OSError:
494
+ pass
495
  return result
496
  finally:
497
+ for p in [prev_wav, curr_wav]:
498
  try:
499
+ if os.path.exists(p):
500
  os.remove(p)
501
  except OSError:
502
  pass
503
  except Exception as e:
504
+ logger.error(f"_crossfade failed: {e}")
505
  return seg_a + seg_b
506
 
507
  def generate_music(
 
525
  output_sample_rate: str,
526
  bit_depth: str
527
  ) -> Tuple[Optional[str], str, str]:
 
528
 
529
+ if not instrumental_prompt.strip():
530
+ return None, "⚠️ Enter a prompt.", vram_status_text
531
 
532
+ # Validate I/O
533
  try:
534
+ out_sr = int(output_sample_rate)
535
+ except:
536
+ return None, "❌ Invalid sample rate.", vram_status_text
537
+ try:
538
+ bd = int(bit_depth)
539
+ sample_width = 3 if bd == 24 else 2
540
+ except:
541
+ return None, "❌ Invalid bit depth.", vram_status_text
542
+ if not check_disk_space():
543
+ return None, "⚠️ Low disk space (<1GB).", vram_status_text
544
+
545
+ # Preset (optional)
546
+ # (kept simple; user can override via UI)
547
+ CHUNK_SEC = 30
548
+ total_duration = max(30, min(int(total_duration), 120))
549
+ num_chunks = math.ceil(total_duration / CHUNK_SEC)
550
+
551
+ PROCESS_SR = 48000
552
+ OVERLAP_SEC = 0.20
553
+ seed = random.randint(0, 2**31 - 1)
554
+ random.seed(seed)
555
+ torch.manual_seed(seed)
556
+ np.random.seed(seed)
557
+ torch.cuda.manual_seed_all(seed)
558
+
559
+ musicgen_model.set_generation_params(
560
+ duration=CHUNK_SEC,
561
+ use_sampling=True,
562
+ top_k=int(top_k),
563
+ top_p=float(top_p),
564
+ temperature=float(temperature),
565
+ cfg_coef=float(cfg_scale),
566
+ two_step_cfg=False,
567
+ )
 
 
 
 
 
 
 
 
 
 
 
568
 
569
+ vram_status_text = f"Start VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
570
+ segments: List[AudioSegment] = []
571
+ start_time = time.time()
 
572
 
573
+ for idx in range(num_chunks):
574
+ chunk_idx = idx + 1
575
+ dur = CHUNK_SEC if (idx < num_chunks - 1) else (total_duration - CHUNK_SEC * (num_chunks - 1) or CHUNK_SEC)
576
+ logger.info(f"Generating chunk {chunk_idx}/{num_chunks} ({dur}s)")
577
 
578
+ try:
579
+ with torch.no_grad():
580
+ with autocast(dtype=torch.float16):
581
+ clean_memory()
582
+ if idx == 0:
583
+ audio = musicgen_model.generate([instrumental_prompt], progress=True)[0].cpu()
584
+ else:
585
+ prev_seg = segments[-1]
586
+ prev_seg = apply_noise_gate(prev_seg, threshold_db=-80, sample_rate=PROCESS_SR)
587
+ prev_seg = balance_stereo(prev_seg, noise_threshold=-40, sample_rate=PROCESS_SR)
588
+ tmp_prev = f"prev_{int(time.time()*1000)}.wav"
589
+ try:
590
+ prev_seg.export(tmp_prev, format="wav")
591
+ prev_audio, prev_sr = torchaudio.load(tmp_prev)
592
+ if prev_sr != PROCESS_SR:
593
+ prev_audio = torchaudio.functional.resample(prev_audio, prev_sr, PROCESS_SR, lowpass_filter_width=64)
594
+ if prev_audio.shape[0] != 2:
595
+ prev_audio = prev_audio.repeat(2, 1)[:, :prev_audio.shape[1]]
596
+ prev_audio = prev_audio.to(DEVICE)
597
+ tail = prev_audio[:, -int(PROCESS_SR * OVERLAP_SEC):]
598
+ audio = musicgen_model.generate_continuation(
599
+ prompt=tail,
600
+ prompt_sample_rate=PROCESS_SR,
601
+ descriptions=[instrumental_prompt],
602
+ progress=True
603
+ )[0].cpu()
604
+ del prev_audio, tail
605
+ finally:
606
  try:
607
+ if os.path.exists(tmp_prev):
608
+ os.remove(tmp_prev)
609
+ except OSError:
610
+ pass
611
+ clean_memory()
612
+ except Exception as e:
613
+ logger.error(f"Chunk {chunk_idx} generation failed: {e}")
614
+ logger.error(traceback.format_exc())
615
+ return None, f"❌ Generate failed at chunk {chunk_idx}.", vram_status_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
616
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
617
  try:
618
+ if audio.shape[0] != 2:
619
+ audio = audio.repeat(2, 1)[:, :audio.shape[1]]
620
+ audio = audio.to(dtype=torch.float32)
621
+ audio = torchaudio.functional.resample(audio, 32000, PROCESS_SR, lowpass_filter_width=64)
622
+ seg = _export_torch_to_segment(audio, PROCESS_SR, bd)
623
+ if seg is None:
624
+ return None, f"❌ Convert failed chunk {chunk_idx}.", vram_status_text
625
+ seg = ensure_stereo(seg, PROCESS_SR, sample_width)
626
+ seg = seg - 15
627
+ seg = apply_noise_gate(seg, threshold_db=-80, sample_rate=PROCESS_SR)
628
+ seg = balance_stereo(seg, noise_threshold=-40, sample_rate=PROCESS_SR)
629
+ seg = rms_normalize(seg, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=PROCESS_SR)
630
+ seg = apply_eq(seg, sample_rate=PROCESS_SR)
631
+ seg = seg[:dur * 1000]
632
+ segments.append(seg)
633
+ del audio
634
  clean_memory()
635
+ vram_status_text = f"VRAM after chunk {chunk_idx}: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
636
  except Exception as e:
637
+ logger.error(f"Post-process failed chunk {chunk_idx}: {e}")
638
+ logger.error(traceback.format_exc())
639
+ return None, f"❌ Post-process failed chunk {chunk_idx}.", vram_status_text
640
+
641
+ if not segments:
642
+ return None, "❌ No audio generated.", vram_status_text
643
+
644
+ logger.info("Combining chunks...")
645
+ final_seg = segments[0]
646
+ overlap_ms = int(OVERLAP_SEC * 1000)
647
+ for i in range(1, len(segments)):
648
+ final_seg = _crossfade(final_seg, segments[i], overlap_ms, PROCESS_SR, bd)
649
+
650
+ final_seg = final_seg[:total_duration * 1000]
651
+ final_seg = apply_noise_gate(final_seg, threshold_db=-80, sample_rate=PROCESS_SR)
652
+ final_seg = balance_stereo(final_seg, noise_threshold=-40, sample_rate=PROCESS_SR)
653
+ final_seg = rms_normalize(final_seg, target_rms_db=target_volume, peak_limit_db=-3.0, sample_rate=PROCESS_SR)
654
+ final_seg = apply_eq(final_seg, sample_rate=PROCESS_SR)
655
+ final_seg = apply_fade(final_seg, 500, 800)
656
+ final_seg = final_seg - 10
657
+ final_seg = final_seg.set_frame_rate(out_sr)
658
+
659
+ fname = f"ghostai_{int(time.time())}.mp3"
660
+ mp3_path = str(MP3_DIR / fname)
661
+ try:
662
+ clean_memory()
663
+ final_seg.export(mp3_path, format="mp3", bitrate=bitrate,
664
+ tags={"title": "GhostAI Instrumental", "artist": "GhostAI"})
665
+ except Exception as e:
666
+ logger.error(f"MP3 export failed: {e}")
667
+ fb = str(MP3_DIR / f"ghostai_fb_{int(time.time())}.mp3")
668
+ try:
669
+ final_seg.export(fb, format="mp3", bitrate="128k")
670
+ mp3_path = fb
671
+ except Exception as ee:
672
+ return None, f"❌ Export failed: {ee}", vram_status_text
673
 
674
+ elapsed = time.time() - start_time
675
+ vram_status_text = f"Final VRAM: {torch.cuda.memory_allocated() / 1024**2:.2f} MB"
676
+ logger.info(f"Done in {elapsed:.2f}s -> {mp3_path}")
677
+ return mp3_path, "✅ Generated.", vram_status_text
678
 
679
+ def generate_music_wrapper(*args):
680
+ try:
681
+ return generate_music(*args)
 
682
  finally:
683
  clean_memory()
684
 
 
692
  )
693
 
694
  # ======================================================================================
695
+ # SERVER STATUS & API
696
  # ======================================================================================
697
 
698
  BUSY_LOCK = threading.Lock()
 
750
  output_sample_rate: Optional[str] = None
751
  bit_depth: Optional[str] = None
752
 
 
 
 
753
  fastapp = FastAPI(title=f"GhostAI Music Server {RELEASE}", version=RELEASE)
754
  fastapp.add_middleware(
755
+ CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"]
 
756
  )
757
 
758
  @fastapp.get("/health")
 
761
 
762
  @fastapp.get("/status")
763
  def status():
764
+ return {"busy": is_busy(), "job_id": CURRENT_JOB["id"], "since": CURRENT_JOB["start"], "elapsed": job_elapsed()}
765
+
766
+ @fastapp.get("/styles")
767
+ def styles():
768
+ return {"styles": STYLES.list_styles()}
769
+
770
+ @fastapp.get("/prompt/{style}")
771
+ def prompt(style: str, bpm: int = 120, chunk: int = 1,
772
+ drum_beat: str = "none", synthesizer: str = "none", rhythmic_steps: str = "none",
773
+ bass_style: str = "none", guitar_style: str = "none"):
774
+ txt = STYLES.build_prompt(style, bpm, chunk, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style)
775
+ if not txt:
776
+ raise HTTPException(status_code=404, detail="Style not found")
777
+ return {"style": style, "prompt": txt}
778
+
779
+ # Back-compat endpoints declared in prompts.ini (e.g., /set_classical_star_wars_prompt)
780
+ for sec, cfg in STYLES.styles.items():
781
+ api_name = cfg.get("api_name")
782
+ if api_name:
783
+ route = api_name
784
+ def make_route(sname):
785
+ @fastapp.get(route)
786
+ def _(bpm: int = 120, chunk: int = 1,
787
+ drum_beat: str = "none", synthesizer: str = "none", rhythmic_steps: str = "none",
788
+ bass_style: str = "none", guitar_style: str = "none"):
789
+ txt = STYLES.build_prompt(sname, bpm, chunk, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style)
790
+ if not txt:
791
+ raise HTTPException(status_code=404, detail="Style not found")
792
+ return {"style": sname, "prompt": txt}
793
+ make_route(sec)
794
 
795
  @fastapp.get("/config")
796
  def get_config():
797
  return {"defaults": CURRENT_SETTINGS, "release": RELEASE}
798
 
799
  @fastapp.post("/settings")
800
+ def set_settings(payload: Dict[str, Any]):
801
  try:
802
  s = CURRENT_SETTINGS.copy()
803
+ s.update(payload or {})
804
+ save_settings(s)
805
  for k, v in s.items():
806
  CURRENT_SETTINGS[k] = v
807
  return {"ok": True, "saved": s}
808
  except Exception as e:
809
  raise HTTPException(status_code=400, detail=str(e))
810
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
811
  @fastapp.post("/render")
812
  def render(req: RenderRequest):
813
  if is_busy():
 
849
  def _start_fastapi():
850
  uvicorn.run(fastapp, host="0.0.0.0", port=8555, log_level="info")
851
 
 
 
 
 
 
 
 
 
852
  api_thread = threading.Thread(target=_start_fastapi, daemon=True)
853
  api_thread.start()
854
+ logger.info(f"FastAPI server started on http://0.0.0.0:8555 [{RELEASE}]")
855
 
856
  # ======================================================================================
857
+ # GRADIO UI
858
  # ======================================================================================
859
 
860
+ def read_css() -> str:
861
  try:
862
  return CSS_FILE.read_text(encoding="utf-8")
863
  except Exception as e:
864
+ logger.error(f"Failed to read CSS: {e}")
865
+ return ""
866
 
867
+ def read_examples() -> str:
868
  try:
869
+ return EXAMPLES_MD.read_text(encoding="utf-8")
870
+ except Exception:
871
+ return "# GhostAI Examples\n\n_Provide examples.md next to app.py_"
 
872
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
873
  loaded = CURRENT_SETTINGS
874
 
875
+ with gr.Blocks(css=read_css(), analytics_enabled=False, title=f"GhostAI Music Generator {RELEASE}") as demo:
876
+ with gr.Tabs():
877
+ with gr.Tab(f"🎛️ Generator {RELEASE}"):
878
+ gr.Markdown(f"""
879
+ <div class="ga-header" role="banner" aria-label="GhostAI Music Generator">
880
+ <div class="logo">👻</div>
881
+ <h1>GhostAI Music Generator</h1>
882
+ <p>Unified 30s chunking · 60s ready · API & status</p>
883
+ </div>
884
+ """)
885
+
886
+ # PROMPT
887
+ with gr.Group(elem_classes="ga-section"):
888
+ gr.Markdown("### Prompt")
889
+ instrumental_prompt = gr.Textbox(
890
+ label="Instrumental Prompt",
891
+ placeholder="Type a prompt or click a style button below",
892
+ lines=4,
893
+ value=loaded.get("instrumental_prompt", "")
894
+ )
895
+
896
+ # BAND GRID (fixed rows of 4 per row)
897
+ with gr.Group(elem_classes="ga-section"):
898
+ gr.Markdown("### Band / Style (grid 4 per row)")
899
+ # helper to create a row of 4 buttons
900
+ def row_of_buttons(entries):
901
+ with gr.Row(equal_height=True):
902
+ buttons = []
903
+ for key, label in entries:
904
+ btn = gr.Button(label, variant="secondary", scale=1, min_width=0)
905
+ buttons.append((key, btn))
906
+ return buttons
907
+
908
+ # rows
909
+ row1 = row_of_buttons([
910
+ ("metallica", "Metallica (Thrash) 🎸"),
911
+ ("nirvana", "Nirvana (Grunge) 🎤"),
912
+ ("pearl_jam", "Pearl Jam (Grunge) 🦪"),
913
+ ("soundgarden", "Soundgarden (Grunge/Alt Metal) 🌑"),
914
+ ])
915
+ row2 = row_of_buttons([
916
+ ("foo_fighters", "Foo Fighters (Alt Rock) 🤘"),
917
+ ("rhcp", "Red Hot Chili Peppers (Funk Rock) 🌶️"),
918
+ ("smashing_pumpkins", "Smashing Pumpkins (Alt) 🎃"),
919
+ ("radiohead", "Radiohead (Experimental) 🧠"),
920
+ ])
921
+ row3 = row_of_buttons([
922
+ ("alternative_rock", "Alternative Rock (Pixies) 🎵"),
923
+ ("post_punk", "Post-Punk (Joy Division) 🖤"),
924
+ ("indie_rock", "Indie Rock (Arctic Monkeys) 🎤"),
925
+ ("funk_rock", "Funk Rock (RATM) 🕺"),
926
+ ])
927
+ row4 = row_of_buttons([
928
+ ("detroit_techno", "Detroit Techno 🎛️"),
929
+ ("deep_house", "Deep House 🏠"),
930
+ ("classical_star_wars", "Classical (Star Wars Suite) ✨"),
931
+ ("foo_pad", "—") # spacer to keep 4 columns
932
+ ])
933
+
934
+ # SETTINGS
935
+ with gr.Group(elem_classes="ga-section"):
936
+ gr.Markdown("### Settings")
937
+ with gr.Group():
938
+ with gr.Row():
939
+ cfg_scale = gr.Slider(1.0, 10.0, step=0.1, value=float(loaded.get("cfg_scale", DEFAULT_SETTINGS["cfg_scale"])), label="CFG Scale")
940
+ top_k = gr.Slider(10, 500, step=10, value=int(loaded.get("top_k", DEFAULT_SETTINGS["top_k"])), label="Top-K")
941
+ top_p = gr.Slider(0.0, 1.0, step=0.01, value=float(loaded.get("top_p", DEFAULT_SETTINGS["top_p"])), label="Top-P")
942
+ temperature = gr.Slider(0.1, 2.0, step=0.01, value=float(loaded.get("temperature", DEFAULT_SETTINGS["temperature"])), label="Temperature")
943
+ with gr.Row():
944
+ total_duration = gr.Dropdown(choices=[30, 60, 90, 120], value=int(loaded.get("total_duration", 60)), label="Song Length (seconds)")
945
+ bpm = gr.Slider(60, 180, step=1, value=int(loaded.get("bpm", 120)), label="Tempo (BPM)")
946
+ target_volume = gr.Slider(-30.0, -20.0, step=0.5, value=float(loaded.get("target_volume", -23.0)), label="Target Loudness (dBFS RMS)")
947
+ preset = gr.Dropdown(choices=["default", "rock", "techno", "grunge", "indie", "funk_rock"], value=str(loaded.get("preset", "default")), label="Preset")
948
+ with gr.Row():
949
+ drum_beat = gr.Dropdown(choices=["none", "standard rock", "funk groove", "techno kick", "jazz swing"], value=str(loaded.get("drum_beat", "none")), label="Drum Beat")
950
+ synthesizer = gr.Dropdown(choices=["none", "analog synth", "digital pad", "arpeggiated synth"], value=str(loaded.get("synthesizer", "none")), label="Synthesizer")
951
+ rhythmic_steps = gr.Dropdown(choices=["none", "syncopated steps", "steady steps", "complex steps"], value=str(loaded.get("rhythmic_steps", "none")), label="Rhythmic Steps")
952
+ with gr.Row():
953
+ bass_style = gr.Dropdown(choices=["none", "slap bass", "deep bass", "melodic bass"], value=str(loaded.get("bass_style", "none")), label="Bass Style")
954
+ guitar_style = gr.Dropdown(choices=["none", "distorted", "clean", "jangle"], value=str(loaded.get("guitar_style", "none")), label="Guitar Style")
955
+ max_steps = gr.Dropdown(choices=[1000, 1200, 1300, 1500], value=int(loaded.get("max_steps", 1500)), label="Max Steps (hint)")
956
+
957
+ bitrate_state = gr.State(value=str(loaded.get("bitrate", "192k")))
958
+ sample_rate_state = gr.State(value=str(loaded.get("output_sample_rate", "48000")))
959
+ bit_depth_state = gr.State(value=str(loaded.get("bit_depth", "16")))
960
+
961
+ with gr.Row():
962
+ bitrate_128_btn = gr.Button("Bitrate 128k", variant="secondary")
963
+ bitrate_192_btn = gr.Button("Bitrate 192k", variant="secondary")
964
+ bitrate_320_btn = gr.Button("Bitrate 320k", variant="secondary")
965
+ sample_rate_22050_btn = gr.Button("SR 22.05k", variant="secondary")
966
+ sample_rate_44100_btn = gr.Button("SR 44.1k", variant="secondary")
967
+ sample_rate_48000_btn = gr.Button("SR 48k", variant="secondary")
968
+ bit_depth_16_btn = gr.Button("16-bit", variant="secondary")
969
+ bit_depth_24_btn = gr.Button("24-bit", variant="secondary")
970
 
971
  with gr.Row():
972
+ gen_btn = gr.Button("Generate 🎶", variant="primary")
973
+ clr_btn = gr.Button("Clear 🧹", variant="secondary")
974
+ save_btn = gr.Button("Save Settings 💾", variant="secondary")
975
+ load_btn = gr.Button("Load Settings 📂", variant="secondary")
976
+ reset_btn = gr.Button("Reset Defaults ♻️", variant="secondary")
977
+
978
+ # OUTPUT
979
+ with gr.Group(elem_classes="ga-section"):
980
+ gr.Markdown("### Output")
981
+ out_audio = gr.Audio(label="Generated Track", type="filepath")
982
+ status_box = gr.Textbox(label="Status", interactive=False)
983
+ vram_box = gr.Textbox(label="VRAM", interactive=False, value="")
984
+
985
+ # LOGS
986
+ with gr.Group(elem_classes="ga-section"):
987
+ gr.Markdown("### Logs")
988
+ log_output = gr.Textbox(label="Current Log (rotating ≤ 5MB)", lines=14, interactive=False)
989
+ log_btn = gr.Button("View Log 📋", variant="secondary")
990
+
991
+ with gr.Tab("📚 Info & Examples"):
992
+ md_box = gr.Markdown(read_examples())
993
+ refresh_md = gr.Button("Refresh Examples.md", variant="secondary")
994
+ refresh_md.click(lambda: read_examples(), outputs=md_box)
995
+
996
+ # Band button wiring (from prompts.ini)
997
+ def set_prompt_from_style(style_key, bpm_v, drum_v, synth_v, steps_v, bass_v, guitar_v):
998
+ txt = STYLES.build_prompt(style_key, int(bpm_v), 1, str(drum_v), str(synth_v), str(steps_v), str(bass_v), str(guitar_v))
999
+ return txt or f"{style_key}: update prompts.ini"
1000
+
1001
+ for key, btn in row1 + row2 + row3 + row4:
1002
+ if key == "foo_pad":
1003
+ continue
1004
+ btn.click(
1005
+ set_prompt_from_style,
1006
+ inputs=[gr.State(key), bpm, drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style],
1007
+ outputs=instrumental_prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1008
  )
1009
 
1010
+ # Quick-sets
1011
+ bitrate_128_btn.click(lambda: "128k", outputs=bitrate_state)
1012
+ bitrate_192_btn.click(lambda: "192k", outputs=bitrate_state)
1013
+ bitrate_320_btn.click(lambda: "320k", outputs=bitrate_state)
1014
+ sample_rate_22050_btn.click(lambda: "22050", outputs=sample_rate_state)
1015
+ sample_rate_44100_btn.click(lambda: "44100", outputs=sample_rate_state)
1016
+ sample_rate_48000_btn.click(lambda: "48000", outputs=sample_rate_state)
1017
+ bit_depth_16_btn.click(lambda: "16", outputs=bit_depth_state)
1018
+ bit_depth_24_btn.click(lambda: "24", outputs=bit_depth_state)
1019
+
1020
+ # Generate
1021
+ gen_btn.click(
1022
+ generate_music_wrapper,
1023
+ inputs=[
1024
+ instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm,
1025
+ drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume,
1026
+ preset, max_steps, vram_box, bitrate_state, sample_rate_state, bit_depth_state
1027
+ ],
1028
+ outputs=[out_audio, status_box, vram_box]
1029
+ )
1030
 
1031
+ # Clear
1032
+ clr_btn.click(
1033
+ clear_inputs, outputs=[
1034
+ instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm,
1035
+ drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume,
1036
+ preset, max_steps, bitrate_state, sample_rate_state, bit_depth_state
1037
+ ]
1038
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1039
 
1040
+ # Save/Load/Reset
1041
+ def _save_action(
1042
+ instrumental_prompt_v, cfg_v, top_k_v, top_p_v, temp_v, dur_v, bpm_v,
1043
+ drum_v, synth_v, steps_v, bass_v, guitar_v, vol_v, preset_v, maxsteps_v, br_v, sr_v, bd_v
1044
+ ):
1045
+ s = {
1046
+ "instrumental_prompt": instrumental_prompt_v,
1047
+ "cfg_scale": float(cfg_v),
1048
+ "top_k": int(top_k_v),
1049
+ "top_p": float(top_p_v),
1050
+ "temperature": float(temp_v),
1051
+ "total_duration": int(dur_v),
1052
+ "bpm": int(bpm_v),
1053
+ "drum_beat": str(drum_v),
1054
+ "synthesizer": str(synth_v),
1055
+ "rhythmic_steps": str(steps_v),
1056
+ "bass_style": str(bass_v),
1057
+ "guitar_style": str(guitar_v),
1058
+ "target_volume": float(vol_v),
1059
+ "preset": str(preset_v),
1060
+ "max_steps": int(maxsteps_v),
1061
+ "bitrate": str(br_v),
1062
+ "output_sample_rate": str(sr_v),
1063
+ "bit_depth": str(bd_v)
1064
+ }
1065
+ save_settings(s)
1066
+ for k, v in s.items():
1067
+ CURRENT_SETTINGS[k] = v
1068
+ return "✅ Settings saved."
1069
 
1070
+ def _load_action():
1071
+ s = load_settings()
1072
+ for k, v in s.items():
1073
+ CURRENT_SETTINGS[k] = v
1074
+ return (
1075
+ s["instrumental_prompt"], s["cfg_scale"], s["top_k"], s["top_p"], s["temperature"],
1076
+ s["total_duration"], s["bpm"], s["drum_beat"], s["synthesizer"], s["rhythmic_steps"],
1077
+ s["bass_style"], s["guitar_style"], s["target_volume"], s["preset"], s["max_steps"],
1078
+ s["bitrate"], s["output_sample_rate"], s["bit_depth"],
1079
+ "✅ Settings loaded."
1080
  )
1081
 
1082
+ def _reset_action():
1083
+ s = DEFAULT_SETTINGS.copy()
1084
+ save_settings(s)
1085
+ for k, v in s.items():
1086
+ CURRENT_SETTINGS[k] = v
1087
+ return (
1088
+ s["instrumental_prompt"], s["cfg_scale"], s["top_k"], s["top_p"], s["temperature"],
1089
+ s["total_duration"], s["bpm"], s["drum_beat"], s["synthesizer"], s["rhythmic_steps"],
1090
+ s["bass_style"], s["guitar_style"], s["target_volume"], s["preset"], s["max_steps"],
1091
+ s["bitrate"], s["output_sample_rate"], s["bit_depth"],
1092
+ "✅ Defaults restored."
1093
+ )
1094
 
1095
+ save_btn.click(
1096
+ _save_action,
1097
+ inputs=[
1098
+ instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm,
1099
+ drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume,
1100
+ preset, max_steps, bitrate_state, sample_rate_state, bit_depth_state
1101
+ ],
1102
+ outputs=status_box
1103
+ )
1104
 
1105
+ load_btn.click(
1106
+ _load_action,
1107
+ outputs=[
1108
+ instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm,
1109
+ drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume,
1110
+ preset, max_steps, bitrate_state, sample_rate_state, bit_depth_state, status_box
1111
+ ]
1112
+ )
1113
 
1114
+ reset_btn.click(
1115
+ _reset_action,
1116
+ outputs=[
1117
+ instrumental_prompt, cfg_scale, top_k, top_p, temperature, total_duration, bpm,
1118
+ drum_beat, synthesizer, rhythmic_steps, bass_style, guitar_style, target_volume,
1119
+ preset, max_steps, bitrate_state, sample_rate_state, bit_depth_state, status_box
1120
+ ]
 
1121
  )
1122
+
1123
+ # Logs
1124
+ def _get_log():
1125
+ try:
1126
+ return LOG_FILE.read_text(encoding="utf-8")[-40000:]
1127
+ except Exception as e:
1128
+ return f"Log read error: {e}"
1129
+
1130
+ log_btn.click(_get_log, outputs=log_output)
1131
+
1132
+ if __name__ == "__main__":
1133
+ print(f"{Fore.CYAN}Launching Gradio UI http://0.0.0.0:9999 [{RELEASE}]{Fore.RESET}")
1134
+ try:
1135
+ demo.launch(
1136
+ server_name="0.0.0.0",
1137
+ server_port=9999,
1138
+ share=False,
1139
+ inbrowser=False,
1140
+ show_error=True
1141
+ )
1142
+ except Exception as e:
1143
+ logger.error(f"Gradio launch failed: {e}")
1144
+ logger.error(traceback.format_exc())
1145
+ sys.exit(1)
public/styles.css CHANGED
@@ -1,79 +1,72 @@
1
- /* =========================
2
- FILE: styles.css
3
- ========================= */
4
- :root {
5
- color-scheme: dark;
6
- --bg:#0B0B0D;
7
- --panel:#101114;
8
- --elev:#15161B;
9
 
10
- --text:#F3F4F6;
11
- --muted:#9CA3AF;
12
-
13
- /* Accents use a professional triad rather than heavy blue */
14
- --accent:#6EE7B7; /* mint (primary) */
15
- --accent2:#FDE047; /* warm yellow (secondary) */
16
- --accent3:#60A5FA; /* soft blue (tertiary) */
17
- --focus:#22D3EE; /* cyan outline */
 
 
18
  }
19
 
20
- body, .gradio-container { background: var(--bg) !important; color: var(--text) !important; }
21
- * { color: var(--text) !important; }
22
- .wrap, .block, .tabs, .panel, .form { background: transparent !important; }
23
 
24
- .header {
25
- text-align:center; padding: 14px 16px;
26
- border-bottom: 2px solid var(--accent);
27
- background: var(--panel);
 
 
 
28
  }
29
- .header h1 { font-size: 28px; margin: 6px 0 0 0; }
30
- .header .logo { font-size: 44px; }
31
- .small { font-size: 12px; color: var(--muted) !important; }
32
 
33
- .group {
34
- border:1px solid #23242A; border-radius: 12px;
35
- padding: 14px; margin-bottom: 14px; background: var(--elev);
 
 
 
 
 
36
  }
 
 
37
 
38
- label, p, span, h2, h3, h4 { color: var(--text) !important; }
39
 
40
- input, textarea, select {
41
- background: #0F1115 !important; color: var(--text) !important;
42
- border:1px solid #252833 !important; border-radius: 10px !important;
43
  }
 
 
44
 
45
- button {
46
- background: #1F2937 !important; color: var(--text) !important;
47
- border: 1px solid #303644 !important; border-radius: 10px !important;
48
- padding: 8px 12px !important; font-weight: 700 !important;
49
- transition: border-color .15s ease, transform .05s ease;
50
  }
51
- button:hover { background: #222D3D !important; border-color: var(--accent3) !important; }
52
- button:active { transform: translateY(1px); }
53
- button:focus { outline: 3px solid var(--focus) !important; }
54
- .slider > input { accent-color: var(--accent3) !important; }
55
 
56
- /* Compact grid for band/style buttons only */
57
  #genre-grid {
58
  display: grid;
59
- grid-template-columns: repeat(5, minmax(140px, 1fr));
60
- gap: 8px;
61
- padding: 8px;
62
- border: 1px solid #23242A;
63
- border-radius: 12px;
64
- background: var(--elev);
65
- max-height: 320px;
66
- overflow: auto;
67
  }
68
- @media (max-width: 1200px) {
69
- #genre-grid { grid-template-columns: repeat(4, minmax(140px, 1fr)); }
 
 
 
 
70
  }
71
- #genre-grid > * { margin: 0 !important; }
72
- #genre-grid button {
73
- padding: 6px 10px !important;
74
- font-size: 0.9rem !important;
75
- line-height: 1.15 !important;
76
- border-radius: 10px !important;
77
- border-color: #2B3140 !important;
78
  }
79
- #genre-grid button:hover { border-color: var(--accent2) !important; }
 
 
 
 
1
+ :root { color-scheme: dark; }
 
 
 
 
 
 
 
2
 
3
+ body, .gradio-container {
4
+ background: #0E1014 !important;
5
+ color: #EAECEF !important;
6
+ --ghost-surface: #0F1420;
7
+ --ghost-border: #243049;
8
+ --ghost-ink: #EAECEF;
9
+ --ghost-accent: #10B981; /* emerald */
10
+ --ghost-accent-2: #8B5CF6; /* violet */
11
+ --ghost-accent-3: #F59E0B; /* amber */
12
+ --ghost-accent-4: #38BDF8; /* sky */
13
  }
14
 
15
+ * { color: var(--ghost-ink) !important; }
 
 
16
 
17
+ label, p, span, h1, h2, h3, h4, h5, h6 { color: var(--ghost-ink) !important; }
18
+
19
+ input, textarea, select {
20
+ background: #151922 !important;
21
+ color: var(--ghost-ink) !important;
22
+ border: 1px solid #2A3142 !important;
23
+ border-radius: 10px !important;
24
  }
 
 
 
25
 
26
+ button.gr-button {
27
+ background: linear-gradient(180deg, #1E3A8A, #1D4ED8) !important;
28
+ color: #FFFFFF !important;
29
+ border: 1px solid #2947A3 !important;
30
+ border-radius: 12px !important;
31
+ padding: 10px 16px !important;
32
+ font-weight: 700 !important;
33
+ transition: transform 0.06s ease-out, background 0.2s ease;
34
  }
35
+ button.gr-button:hover { transform: translateY(-1px); background: linear-gradient(180deg, #1F49A8, #2563EB) !important; }
36
+ button.gr-button:focus { outline: 3px solid var(--ghost-accent) !important; }
37
 
38
+ .slider > input { accent-color: var(--ghost-accent-3) !important; }
39
 
40
+ .header {
41
+ text-align:center; padding: 12px 16px;
42
+ border-bottom: 2px solid var(--ghost-accent);
43
  }
44
+ .header h1 { font-size: 28px; margin: 6px 0 0 0; }
45
+ .header .logo { font-size: 44px; }
46
 
47
+ .input-container, .settings-container, .output-container, .logs-container {
48
+ max-width: 1200px; margin: 16px auto; padding: 16px;
49
+ background: var(--ghost-surface);
50
+ border: 1px solid var(--ghost-border); border-radius: 12px;
 
51
  }
 
 
 
 
52
 
53
+ /* Responsive grid for style buttons */
54
  #genre-grid {
55
  display: grid;
56
+ grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
57
+ gap: 12px;
 
 
 
 
 
 
58
  }
59
+
60
+ /* style buttons make them compact and colorful */
61
+ #genre-grid .style-btn.gr-button {
62
+ min-height: 44px;
63
+ background: linear-gradient(180deg, #0EA5E9, #0369A1) !important; /* blue-ish */
64
+ border-color: #0C4A6E !important;
65
  }
66
+ #genre-grid .style-btn.gr-button:hover {
67
+ background: linear-gradient(180deg, #22D3EE, #0EA5E9) !important;
 
 
 
 
 
68
  }
69
+
70
+ .group-container { border: 1px solid var(--ghost-border); border-radius: 12px; padding: 16px; }
71
+
72
+ .gradio-container .prose a { color: var(--ghost-accent-2) !important; text-decoration: underline; }