MH0386 commited on
Commit
4172492
·
verified ·
1 Parent(s): 61d0be7

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. pyproject.toml +1 -1
  2. src/vocalizr/__main__.py +1 -0
  3. src/vocalizr/model.py +97 -99
  4. uv.lock +1 -1
pyproject.toml CHANGED
@@ -5,7 +5,7 @@ description = "Voice Generator part of the Chatacter Backend"
5
  readme = "README.md"
6
  requires-python = ">=3.13, <3.14"
7
  dependencies = [
8
- "gradio[mcp]>=5.32.0",
9
  "kokoro>=0.9.4",
10
  "soundfile>=0.13.1",
11
  "pip>=25.1.1",
 
5
  readme = "README.md"
6
  requires-python = ">=3.13, <3.14"
7
  dependencies = [
8
+ "gradio[mcp]>=5.35.0",
9
  "kokoro>=0.9.4",
10
  "soundfile>=0.13.1",
11
  "pip>=25.1.1",
src/vocalizr/__main__.py CHANGED
@@ -15,6 +15,7 @@ def main() -> None:
15
  show_api=True,
16
  enable_monitoring=True,
17
  show_error=True,
 
18
  )
19
 
20
 
 
15
  show_api=True,
16
  enable_monitoring=True,
17
  show_error=True,
18
+ pwa=True,
19
  )
20
 
21
 
src/vocalizr/model.py CHANGED
@@ -1,99 +1,97 @@
1
- from typing import Any, Generator, Literal
2
-
3
- from gradio import Error
4
- from kokoro import KPipeline
5
- from loguru import logger
6
- from numpy import float32
7
- from numpy.typing import NDArray
8
- from soundfile import write
9
- from torch import zeros
10
-
11
- from vocalizr import AUDIO_FILE_PATH, PIPELINE
12
-
13
-
14
- @logger.catch
15
- def save_file_wav(audio: NDArray[float32]) -> None:
16
- """
17
- Saves an audio array to a WAV file using the specified sampling rate. If the saving
18
- operation fails, it logs the exception and raises a RuntimeError.
19
-
20
- :param audio: The audio data to be saved. Must be a NumPy array of data type
21
- float32, representing the audio signal to be written to the file.
22
- :type audio: NDArray[float32]
23
-
24
- :return: This function does not return a value.
25
- :rtype: None
26
- """
27
- try:
28
- logger.info(f"Saving audio to {AUDIO_FILE_PATH}")
29
- write(file=AUDIO_FILE_PATH, data=audio, samplerate=24000)
30
- except Exception as e:
31
- logger.exception(f"Failed to save audio to {AUDIO_FILE_PATH}: {e}")
32
- raise RuntimeError(f"Failed to save audio to {AUDIO_FILE_PATH}: {e}") from e
33
-
34
-
35
- @logger.catch
36
- def generate_audio_for_text(
37
- text: str,
38
- voice: str = "af_heart",
39
- speed: float = 1.0,
40
- save_file: bool = False,
41
- debug: bool = False,
42
- char_limit: int = -1,
43
- ) -> Generator[tuple[Literal[24000], NDArray[float32]], Any, None]:
44
- """
45
- Generates audio from the provided text using the specified voice and speed.
46
- It allows saving the generated audio to a file if required. The function
47
- yields tuples containing the audio sampling rate and the audio data as a
48
- NumPy array.
49
-
50
- :param text: The input text to generate audio for. If CHAR_LIMIT is set to a
51
- positive value, the text will be truncated to fit that limit.
52
- :type text: str
53
-
54
- :param voice: The voice profile to use for audio generation.
55
- Defaults to "af_heart".
56
- :type voice: str
57
-
58
- :param speed: The speed modifier for audio generation. Defaults to 1.0.
59
- :type speed: float
60
-
61
- :param save_file: Whether to save the generated audio to a file. Defaults
62
- to False.
63
- :type save_file: bool
64
-
65
- :param debug: Whether to enable debug mode. Defaults to False.
66
- :type debug: bool
67
-
68
- :param char_limit: The maximum number of characters to include in the input
69
- :type char_limit: int
70
-
71
- :return: A generator that yields tuples, where the first element is the
72
- fixed sampling rate of 24,000 Hz, and the second element is a NumPy
73
- array representing the generated audio data.
74
- :rtype: Generator[tuple[Literal[24000], NDArray[float32]], Any, None]
75
- """
76
- if not text:
77
- logger.exception("No text provided")
78
- elif len(text) < 4:
79
- logger.exception(f"Text too short: {text} with length {len(text)}")
80
- text = text if char_limit == -1 else text.strip()[:char_limit]
81
- generator: Generator[KPipeline.Result, None, None] = PIPELINE(
82
- text=text, voice=voice, speed=speed
83
- )
84
- first = True
85
- for _, _, audio in generator:
86
- if audio is None or isinstance(audio, str):
87
- logger.exception(f"Unexpected type (audio): {type(audio)}")
88
- raise Error(message=f"Unexpected type (audio): {type(audio)}")
89
- if debug:
90
- logger.info(f"Generating audio for '{text}'")
91
- audio_np: NDArray[float32] = audio.numpy()
92
- if save_file:
93
- if debug:
94
- logger.info(f"Saving audio file at {AUDIO_FILE_PATH}")
95
- save_file_wav(audio=audio_np)
96
- yield 24000, audio_np
97
- if first:
98
- first = False
99
- yield 24000, zeros(1).numpy()
 
1
+ from typing import Any, Generator, Literal
2
+
3
+ from gradio import Error
4
+ from kokoro import KPipeline
5
+ from loguru import logger
6
+ from numpy import dtype, float32, ndarray
7
+ from soundfile import write
8
+ from torch import zeros
9
+
10
+ from vocalizr import AUDIO_FILE_PATH, PIPELINE
11
+
12
+
13
+ @logger.catch
14
+ def save_file_wav(audio: ndarray[tuple[float32], dtype[float32]]) -> None:
15
+ """
16
+ Saves an audio array to a WAV file using the specified sampling rate. If the saving
17
+ operation fails, it logs the exception and raises a RuntimeError.
18
+
19
+ :param ndarray[tuple[float32],dtype[float32]] audio: The audio data to be saved.
20
+ Must be a NumPy array of data type float32, representing the audio signal
21
+ to be written to the file.
22
+
23
+ :return: This function does not return a value.
24
+ :rtype: None
25
+ """
26
+ try:
27
+ logger.info(f"Saving audio to {AUDIO_FILE_PATH}")
28
+ write(file=AUDIO_FILE_PATH, data=audio, samplerate=24000)
29
+ except Exception as e:
30
+ logger.exception(f"Failed to save audio to {AUDIO_FILE_PATH}: {e}")
31
+ raise RuntimeError(f"Failed to save audio to {AUDIO_FILE_PATH}: {e}") from e
32
+
33
+
34
+ @logger.catch
35
+ def generate_audio_for_text(
36
+ text: str,
37
+ voice: str = "af_heart",
38
+ speed: float = 1.0,
39
+ save_file: bool = False,
40
+ debug: bool = False,
41
+ char_limit: int = -1,
42
+ ) -> Generator[
43
+ tuple[Literal[24000], ndarray[tuple[float32], dtype[float32]]]
44
+ | tuple[int, ndarray],
45
+ Any,
46
+ None,
47
+ ]:
48
+ """
49
+ Generates audio from the provided text using the specified voice and speed.
50
+ It allows saving the generated audio to a file if required. The function
51
+ yields tuples containing the audio sampling rate and the audio data as a
52
+ NumPy array.
53
+
54
+ :param str text: The input text to generate audio for. If CHAR_LIMIT is set to a
55
+ positive value, the text will be truncated to fit that limit.
56
+
57
+ :param str voice: The voice profile to use for audio generation.
58
+ Defaults to "af_heart".
59
+
60
+ :param float speed: The speed modifier for audio generation. Defaults to 1.0.
61
+
62
+ :param bool save_file: Whether to save the generated audio to a file. Defaults
63
+ to False.
64
+
65
+ :param bool debug: Whether to enable debug mode. Defaults to False.
66
+
67
+ :param int char_limit: The maximum number of characters to include in the input
68
+
69
+ :return: A generator that yields tuples, where the first element is the
70
+ fixed sampling rate of 24,000 Hz, and the second element is a NumPy
71
+ array representing the generated audio data.
72
+ :rtype: Generator[tuple[Literal[24000], NDArray[float32]], Any, None]
73
+ """
74
+ if not text:
75
+ logger.exception("No text provided")
76
+ elif len(text) < 4:
77
+ logger.exception(f"Text too short: {text} with length {len(text)}")
78
+ text = text if char_limit == -1 else text.strip()[:char_limit]
79
+ generator: Generator[KPipeline.Result, None, None] = PIPELINE(
80
+ text=text, voice=voice, speed=speed
81
+ )
82
+ first = True
83
+ for _, _, audio in generator:
84
+ if audio is None or isinstance(audio, str):
85
+ logger.exception(f"Unexpected type (audio): {type(audio)}")
86
+ raise Error(message=f"Unexpected type (audio): {type(audio)}")
87
+ if debug:
88
+ logger.info(f"Generating audio for '{text}'")
89
+ audio_np: ndarray[tuple[float32], dtype[float32]] = audio.numpy()
90
+ if save_file:
91
+ if debug:
92
+ logger.info(f"Saving audio file at {AUDIO_FILE_PATH}")
93
+ save_file_wav(audio=audio_np)
94
+ yield 24000, audio_np
95
+ if first:
96
+ first = False
97
+ yield 24000, zeros(1).numpy()
 
 
uv.lock CHANGED
@@ -1894,7 +1894,7 @@ dev = [
1894
 
1895
  [package.metadata]
1896
  requires-dist = [
1897
- { name = "gradio", extras = ["mcp"], specifier = ">=5.32.0" },
1898
  { name = "kokoro", specifier = ">=0.9.4" },
1899
  { name = "pip", specifier = ">=25.1.1" },
1900
  { name = "soundfile", specifier = ">=0.13.1" },
 
1894
 
1895
  [package.metadata]
1896
  requires-dist = [
1897
+ { name = "gradio", extras = ["mcp"], specifier = ">=5.35.0" },
1898
  { name = "kokoro", specifier = ">=0.9.4" },
1899
  { name = "pip", specifier = ">=25.1.1" },
1900
  { name = "soundfile", specifier = ">=0.13.1" },