Spaces:

AlphaSphereDotAI
/

Vocalizr

Running

App Files Files Community

MH0386 commited on 13 days ago

Commit

4172492

verified ·

1 Parent(s): 61d0be7

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

pyproject.toml +1 -1
src/vocalizr/__main__.py +1 -0
src/vocalizr/model.py +97 -99
uv.lock +1 -1

pyproject.toml CHANGED Viewed

@@ -5,7 +5,7 @@ description = "Voice Generator part of the Chatacter Backend"
 readme = "README.md"
 requires-python = ">=3.13, <3.14"
 dependencies = [
-  "gradio[mcp]>=5.32.0",
   "kokoro>=0.9.4",
   "soundfile>=0.13.1",
   "pip>=25.1.1",

 readme = "README.md"
 requires-python = ">=3.13, <3.14"
 dependencies = [
+  "gradio[mcp]>=5.35.0",
   "kokoro>=0.9.4",
   "soundfile>=0.13.1",
   "pip>=25.1.1",

src/vocalizr/__main__.py CHANGED Viewed

@@ -15,6 +15,7 @@ def main() -> None:
         show_api=True,
         enable_monitoring=True,
         show_error=True,
     )

         show_api=True,
         enable_monitoring=True,
         show_error=True,
+        pwa=True,
     )

src/vocalizr/model.py CHANGED Viewed

@@ -1,99 +1,97 @@
-from typing import Any, Generator, Literal
-from gradio import Error
-from kokoro import KPipeline
-from loguru import logger
-from numpy import float32
-from numpy.typing import NDArray
-from soundfile import write
-from torch import zeros
-from vocalizr import AUDIO_FILE_PATH, PIPELINE
-@logger.catch
-def save_file_wav(audio: NDArray[float32]) -> None:
-    """
-    Saves an audio array to a WAV file using the specified sampling rate. If the saving
-    operation fails, it logs the exception and raises a RuntimeError.
-    :param audio: The audio data to be saved. Must be a NumPy array of data type
-        float32, representing the audio signal to be written to the file.
-    :type audio: NDArray[float32]
-    :return: This function does not return a value.
-    :rtype: None
-    """
-    try:
-        logger.info(f"Saving audio to {AUDIO_FILE_PATH}")
-        write(file=AUDIO_FILE_PATH, data=audio, samplerate=24000)
-    except Exception as e:
-        logger.exception(f"Failed to save audio to {AUDIO_FILE_PATH}: {e}")
-        raise RuntimeError(f"Failed to save audio to {AUDIO_FILE_PATH}: {e}") from e
-@logger.catch
-def generate_audio_for_text(
-    text: str,
-    voice: str = "af_heart",
-    speed: float = 1.0,
-    save_file: bool = False,
-    debug: bool = False,
-    char_limit: int = -1,
-) -> Generator[tuple[Literal[24000], NDArray[float32]], Any, None]:
-    """
-    Generates audio from the provided text using the specified voice and speed.
-    It allows saving the generated audio to a file if required. The function
-    yields tuples containing the audio sampling rate and the audio data as a
-    NumPy array.
-    :param text: The input text to generate audio for. If CHAR_LIMIT is set to a
-        positive value, the text will be truncated to fit that limit.
-    :type text: str
-    :param voice: The voice profile to use for audio generation.
-        Defaults to "af_heart".
-    :type voice: str
-    :param speed: The speed modifier for audio generation. Defaults to 1.0.
-    :type speed: float
-    :param save_file: Whether to save the generated audio to a file. Defaults
-        to False.
-    :type save_file: bool
-    :param debug: Whether to enable debug mode. Defaults to False.
-    :type debug: bool
-    :param char_limit: The maximum number of characters to include in the input
-    :type char_limit: int
-    :return: A generator that yields tuples, where the first element is the
-        fixed sampling rate of 24,000 Hz, and the second element is a NumPy
-        array representing the generated audio data.
-    :rtype: Generator[tuple[Literal[24000], NDArray[float32]], Any, None]
-    """
-    if not text:
-        logger.exception("No text provided")
-    elif len(text) < 4:
-        logger.exception(f"Text too short: {text} with length {len(text)}")
-    text = text if char_limit == -1 else text.strip()[:char_limit]
-    generator: Generator[KPipeline.Result, None, None] = PIPELINE(
-        text=text, voice=voice, speed=speed
-    )
-    first = True
-    for _, _, audio in generator:
-        if audio is None or isinstance(audio, str):
-            logger.exception(f"Unexpected type (audio): {type(audio)}")
-            raise Error(message=f"Unexpected type (audio): {type(audio)}")
-        if debug:
-            logger.info(f"Generating audio for '{text}'")
-        audio_np: NDArray[float32] = audio.numpy()
-        if save_file:
-            if debug:
-                logger.info(f"Saving audio file at {AUDIO_FILE_PATH}")
-            save_file_wav(audio=audio_np)
-        yield 24000, audio_np
-        if first:
-            first = False
-            yield 24000, zeros(1).numpy()

+from typing import Any, Generator, Literal
+from gradio import Error
+from kokoro import KPipeline
+from loguru import logger
+from numpy import dtype, float32, ndarray
+from soundfile import write
+from torch import zeros
+from vocalizr import AUDIO_FILE_PATH, PIPELINE
+@logger.catch
+def save_file_wav(audio: ndarray[tuple[float32], dtype[float32]]) -> None:
+    """
+    Saves an audio array to a WAV file using the specified sampling rate. If the saving
+    operation fails, it logs the exception and raises a RuntimeError.
+    :param ndarray[tuple[float32],dtype[float32]] audio: The audio data to be saved.
+        Must be a NumPy array of data type float32, representing the audio signal
+        to be written to the file.
+    :return: This function does not return a value.
+    :rtype: None
+    """
+    try:
+        logger.info(f"Saving audio to {AUDIO_FILE_PATH}")
+        write(file=AUDIO_FILE_PATH, data=audio, samplerate=24000)
+    except Exception as e:
+        logger.exception(f"Failed to save audio to {AUDIO_FILE_PATH}: {e}")
+        raise RuntimeError(f"Failed to save audio to {AUDIO_FILE_PATH}: {e}") from e
+@logger.catch
+def generate_audio_for_text(
+    text: str,
+    voice: str = "af_heart",
+    speed: float = 1.0,
+    save_file: bool = False,
+    debug: bool = False,
+    char_limit: int = -1,
+) -> Generator[
+    tuple[Literal[24000], ndarray[tuple[float32], dtype[float32]]]
+    | tuple[int, ndarray],
+    Any,
+    None,
+]:
+    """
+    Generates audio from the provided text using the specified voice and speed.
+    It allows saving the generated audio to a file if required. The function
+    yields tuples containing the audio sampling rate and the audio data as a
+    NumPy array.
+    :param str text: The input text to generate audio for. If CHAR_LIMIT is set to a
+        positive value, the text will be truncated to fit that limit.
+    :param str voice: The voice profile to use for audio generation.
+        Defaults to "af_heart".
+    :param float speed: The speed modifier for audio generation. Defaults to 1.0.
+    :param bool save_file: Whether to save the generated audio to a file. Defaults
+        to False.
+    :param bool debug: Whether to enable debug mode. Defaults to False.
+    :param int char_limit: The maximum number of characters to include in the input
+    :return: A generator that yields tuples, where the first element is the
+        fixed sampling rate of 24,000 Hz, and the second element is a NumPy
+        array representing the generated audio data.
+    :rtype: Generator[tuple[Literal[24000], NDArray[float32]], Any, None]
+    """
+    if not text:
+        logger.exception("No text provided")
+    elif len(text) < 4:
+        logger.exception(f"Text too short: {text} with length {len(text)}")
+    text = text if char_limit == -1 else text.strip()[:char_limit]
+    generator: Generator[KPipeline.Result, None, None] = PIPELINE(
+        text=text, voice=voice, speed=speed
+    )
+    first = True
+    for _, _, audio in generator:
+        if audio is None or isinstance(audio, str):
+            logger.exception(f"Unexpected type (audio): {type(audio)}")
+            raise Error(message=f"Unexpected type (audio): {type(audio)}")
+        if debug:
+            logger.info(f"Generating audio for '{text}'")
+        audio_np: ndarray[tuple[float32], dtype[float32]] = audio.numpy()
+        if save_file:
+            if debug:
+                logger.info(f"Saving audio file at {AUDIO_FILE_PATH}")
+            save_file_wav(audio=audio_np)
+        yield 24000, audio_np
+        if first:
+            first = False
+            yield 24000, zeros(1).numpy()

uv.lock CHANGED Viewed

@@ -1894,7 +1894,7 @@ dev = [
 [package.metadata]
 requires-dist = [
-    { name = "gradio", extras = ["mcp"], specifier = ">=5.32.0" },
     { name = "kokoro", specifier = ">=0.9.4" },
     { name = "pip", specifier = ">=25.1.1" },
     { name = "soundfile", specifier = ">=0.13.1" },

 [package.metadata]
 requires-dist = [
+    { name = "gradio", extras = ["mcp"], specifier = ">=5.35.0" },
     { name = "kokoro", specifier = ">=0.9.4" },
     { name = "pip", specifier = ">=25.1.1" },
     { name = "soundfile", specifier = ">=0.13.1" },