monatolmats commited on
Commit
79d8187
·
verified ·
1 Parent(s): 90101f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -19,7 +19,7 @@ if DEVICE.type == "cpu":
19
  torch.set_num_threads(min(2, os.cpu_count() or 2))
20
 
21
  MAX_LEN = 160_000 # 10 s @16 kHz
22
- NUM_DOMAINS = 3 # ennustame alati 3 domääni põhjal
23
  # ---------------------------------------------------------------------------
24
 
25
  # ---------- laadime mudeli korra -------------------------------------------
@@ -50,7 +50,7 @@ def compute_spec(wav: torch.Tensor) -> torch.Tensor:
50
  return torch.stack(views)
51
 
52
  def single_predict(audio_path: str) -> float:
53
- """MOS ühe faili kohta – keskmistab 3 domääni."""
54
  wav, sr = torchaudio.load(audio_path)
55
  if sr != 16_000:
56
  wav = torchaudio.transforms.Resample(sr, 16_000)(wav)[0]
@@ -98,7 +98,7 @@ def batch_predict(csv_file, wav_zip):
98
  zf.extractall(tdir)
99
 
100
  df = pd.read_csv(csv_file.name)
101
- col = "faili_uus_nimi" if "faili_uus_nimi" in df.columns else "audio"
102
  outs, errors = [], []
103
 
104
  for rel in df[col]:
@@ -126,20 +126,20 @@ with gr.Blocks(title="UTMOS-v2 MOS-hinnang (3 domeeni)") as demo:
126
  gr.Markdown(
127
  """
128
  # UTMOS-v2 (eesti kõne)
129
- Ennustab objektiivse MOS-i **kolme treeningu-domääni** keskmisena.
130
  """
131
  )
132
 
133
  # Üksik klipp -----------------------------------------------------------
134
- with gr.Tab("Üksik klipp"):
135
  audio = gr.Audio(type="filepath", label="WAV (16 kHz või muu)")
136
  out_mos = gr.Number(label="Ennustatud MOS")
137
  gr.Button("Hinda").click(single_predict, inputs=audio, outputs=out_mos)
138
 
139
  # Partii ---------------------------------------------------------------
140
  with gr.Tab("Partii (CSV + ZIP)"):
141
- csv_in = gr.File(file_types=[".csv"], label="CSV (audio|faili_uus_nimi)")
142
- zip_in = gr.File(file_types=[".zip"], label="ZIP kõigi WAV-idega")
143
  df_out = gr.Dataframe(label="Tulemused")
144
  file_dl = gr.File(label="Lae CSV ennustustega")
145
  gr.Button("Start").click(batch_predict,
 
19
  torch.set_num_threads(min(2, os.cpu_count() or 2))
20
 
21
  MAX_LEN = 160_000 # 10 s @16 kHz
22
+ NUM_DOMAINS = 3 # ennustame alati 3 domeeni põhjal
23
  # ---------------------------------------------------------------------------
24
 
25
  # ---------- laadime mudeli korra -------------------------------------------
 
50
  return torch.stack(views)
51
 
52
  def single_predict(audio_path: str) -> float:
53
+ """MOS ühe faili kohta – keskmistab 3 domeeni."""
54
  wav, sr = torchaudio.load(audio_path)
55
  if sr != 16_000:
56
  wav = torchaudio.transforms.Resample(sr, 16_000)(wav)[0]
 
98
  zf.extractall(tdir)
99
 
100
  df = pd.read_csv(csv_file.name)
101
+ col = "faili_nimi"
102
  outs, errors = [], []
103
 
104
  for rel in df[col]:
 
126
  gr.Markdown(
127
  """
128
  # UTMOS-v2 (eesti kõne)
129
+ Ennustab objektiivse MOS-i **kolme treeningu-domeeni** keskmisena.
130
  """
131
  )
132
 
133
  # Üksik klipp -----------------------------------------------------------
134
+ with gr.Tab("1 WAV fail"):
135
  audio = gr.Audio(type="filepath", label="WAV (16 kHz või muu)")
136
  out_mos = gr.Number(label="Ennustatud MOS")
137
  gr.Button("Hinda").click(single_predict, inputs=audio, outputs=out_mos)
138
 
139
  # Partii ---------------------------------------------------------------
140
  with gr.Tab("Partii (CSV + ZIP)"):
141
+ csv_in = gr.File(file_types=[".csv"], label="CSV (veerud: faili_nimi")
142
+ zip_in = gr.File(file_types=[".zip"], label="ZIP (WAV failid")
143
  df_out = gr.Dataframe(label="Tulemused")
144
  file_dl = gr.File(label="Lae CSV ennustustega")
145
  gr.Button("Start").click(batch_predict,