H5N1AIDS commited on
Commit
e484e70
·
verified ·
1 Parent(s): 8cf3c1e

Upload 10 files

Browse files
.gitattributes CHANGED
@@ -45,3 +45,6 @@ Transcribe_and_Translate_Subtitles/LLM/Gemma/Tokenizer/tokenizer.json filter=lfs
45
  Transcribe_and_Translate_Subtitles/LLM/Gemma/12B/5c2befb3-2b4e-11f0-846e-ec8e77077d52 filter=lfs diff=lfs merge=lfs -text
46
  Transcribe_and_Translate_Subtitles/VAD/HumAware/HumAwareVAD.jit filter=lfs diff=lfs merge=lfs -text
47
  Transcribe_and_Translate_Subtitles/VAD/NVIDIA_Frame_VAD_Multilingual_MarbleNet/frame_vad_multilingual_marblenet_v2.0.nemo filter=lfs diff=lfs merge=lfs -text
 
 
 
 
45
  Transcribe_and_Translate_Subtitles/LLM/Gemma/12B/5c2befb3-2b4e-11f0-846e-ec8e77077d52 filter=lfs diff=lfs merge=lfs -text
46
  Transcribe_and_Translate_Subtitles/VAD/HumAware/HumAwareVAD.jit filter=lfs diff=lfs merge=lfs -text
47
  Transcribe_and_Translate_Subtitles/VAD/NVIDIA_Frame_VAD_Multilingual_MarbleNet/frame_vad_multilingual_marblenet_v2.0.nemo filter=lfs diff=lfs merge=lfs -text
48
+ Transcribe_and_Translate_Subtitles/VAD/TEN/lib/Linux/x64/libten_vad.so filter=lfs diff=lfs merge=lfs -text
49
+ Transcribe_and_Translate_Subtitles/VAD/TEN/lib/macOS/ten_vad.framework/Versions/A/ten_vad filter=lfs diff=lfs merge=lfs -text
50
+ Transcribe_and_Translate_Subtitles/VAD/TEN/lib/Windows/x64/ten_vad.dll filter=lfs diff=lfs merge=lfs -text
Transcribe_and_Translate_Subtitles/VAD/NVIDIA_Frame_VAD_Multilingual_MarbleNet/F16/NVIDIA_MarbleNet.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccbd7a6ad40d64299eb6502d3b672c332b061dddc7a8152716f9cb31410fa3f4
3
+ size 766700
Transcribe_and_Translate_Subtitles/VAD/NVIDIA_Frame_VAD_Multilingual_MarbleNet/F32/NVIDIA_MarbleNet.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cc079144772762b81fed23ab538ab1948761ed3d58aed9f580b274641dffa18
3
+ size 1515526
Transcribe_and_Translate_Subtitles/VAD/TEN/include/ten_vad.h ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ifndef TEN_VAD_H
2
+ #define TEN_VAD_H
3
+
4
+ #if defined(__APPLE__) || defined(__ANDROID__) || defined(__linux__)
5
+ #define TENVAD_API __attribute__((visibility("default")))
6
+ #elif defined(_WIN32) || defined(__CYGWIN__)
7
+ #ifdef TENVAD_EXPORTS
8
+ #define TENVAD_API __declspec(dllexport)
9
+ #else
10
+ #define TENVAD_API __declspec(dllimport)
11
+ #endif
12
+ #else
13
+ #define TENVAD_API
14
+ #endif
15
+
16
+ #include <stddef.h> /* size_t */
17
+ #include <stdint.h> /* int16_t */
18
+
19
+ #ifdef __cplusplus
20
+ extern "C"
21
+ {
22
+ #endif
23
+
24
+ /**
25
+ * @typedef ten_vad_handle
26
+ * @brief Opaque handle for ten_vad instance.
27
+ */
28
+ typedef void *ten_vad_handle_t;
29
+
30
+ /**
31
+ * @brief Create and initialize a ten_vad instance.
32
+ *
33
+ * @param[out] handle Pointer to receive the vad handle.
34
+ * @param[in] hop_size The number of samples between the start points of
35
+ * two consecutive analysis frames. (e.g., 256).
36
+ * @param[in] threshold VAD detection threshold ranging from [0.0, 1.0]
37
+ * This threshold is used to determine voice activity by comparing with the output probability.
38
+ * When probability >= threshold, voice is detected.
39
+ * @return 0 on success, or -1 error occurs.
40
+ */
41
+ TENVAD_API int ten_vad_create(ten_vad_handle_t *handle, size_t hop_size,
42
+ float threshold);
43
+
44
+ /**
45
+ * @brief Process one audio frame for voice activity detection.
46
+ * Must call ten_vad_init() before calling this, and ten_vad_destroy() when done.
47
+ *
48
+ * @param[in] handle Valid VAD handle returned by ten_vad_create().
49
+ * @param[in] audio_data Pointer to an array of int16_t samples,
50
+ * buffer length must equal the hop size specified at ten_vad_create.
51
+ * @param[in] audio_data_length size of audio_data buffer, here should be equal to hop_size.
52
+ * @param[out] out_probability Pointer to a float (size 1) that receives the
53
+ * voice activity probability in the range [0.0, 1.0], where higher values indicate higher confidence in voice presence.
54
+ * @param[out] out_flag Pointer to an int (size 1) that receives the
55
+ * binary voice activity decision: 0: no voice, 1: voice detected.
56
+ * This flag is set to 1 when out_probability >= threshold, and 0 otherwise.
57
+ * @return 0 on success, or -1 error occurs.
58
+ */
59
+ TENVAD_API int ten_vad_process(ten_vad_handle_t handle, const int16_t *audio_data, size_t audio_data_length,
60
+ float *out_probability, int *out_flag);
61
+
62
+ /**
63
+ * @brief Destroy a ten_vad instance and release its resources.
64
+ *
65
+ * @param[in,out] handle Pointer to the ten_vad handle; set to NULL on return.
66
+ * @return 0 on success, or -1 error occurs.
67
+ */
68
+ TENVAD_API int ten_vad_destroy(ten_vad_handle_t *handle);
69
+
70
+ /**
71
+ * @brief Get the ten_vad library version string.
72
+ *
73
+ * @return The version string (e.g., "1.0.0").
74
+ */
75
+ TENVAD_API const char *ten_vad_get_version(void);
76
+
77
+ #ifdef __cplusplus
78
+ }
79
+ #endif
80
+
81
+ #endif /* TEN_VAD_H */
Transcribe_and_Translate_Subtitles/VAD/TEN/include/ten_vad.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ctypes import c_int, c_int32, c_float, c_size_t, CDLL, c_void_p, POINTER
2
+ import numpy as np
3
+ import os
4
+
5
+
6
+ class TenVad:
7
+ def __init__(self, hop_size: int = 256, threshold: float = 0.5, lib_path: str = r".lib/Linux/x64/libten_vad.so"):
8
+ self.hop_size = hop_size
9
+ self.threshold = threshold
10
+ if os.path.exists(
11
+ lib_path
12
+ ):
13
+ self.vad_library = CDLL(
14
+ lib_path
15
+ )
16
+ else:
17
+ self.vad_library = CDLL(
18
+ lib_path
19
+ )
20
+ self.vad_handler = c_void_p(0)
21
+ self.out_probability = c_float()
22
+ self.out_flags = c_int32()
23
+
24
+ self.vad_library.ten_vad_create.argtypes = [
25
+ POINTER(c_void_p),
26
+ c_size_t,
27
+ c_float,
28
+ ]
29
+ self.vad_library.ten_vad_create.restype = c_int
30
+
31
+ self.vad_library.ten_vad_destroy.argtypes = [POINTER(c_void_p)]
32
+ self.vad_library.ten_vad_destroy.restype = c_int
33
+
34
+ self.vad_library.ten_vad_process.argtypes = [
35
+ c_void_p,
36
+ c_void_p,
37
+ c_size_t,
38
+ POINTER(c_float),
39
+ POINTER(c_int32),
40
+ ]
41
+ self.vad_library.ten_vad_process.restype = c_int
42
+ self.create_and_init_handler()
43
+
44
+ def create_and_init_handler(self):
45
+ assert (
46
+ self.vad_library.ten_vad_create(
47
+ POINTER(c_void_p)(self.vad_handler),
48
+ c_size_t(self.hop_size),
49
+ c_float(self.threshold),
50
+ )
51
+ == 0
52
+ ), "[TEN VAD]: create handler failure!"
53
+
54
+ def __del__(self):
55
+ assert (
56
+ self.vad_library.ten_vad_destroy(
57
+ POINTER(c_void_p)(self.vad_handler)
58
+ )
59
+ == 0
60
+ ), "[TEN VAD]: destroy handler failure!"
61
+
62
+ def get_input_data(self, audio_data: np.ndarray):
63
+ audio_data = np.squeeze(audio_data)
64
+ assert (
65
+ len(audio_data.shape) == 1
66
+ and audio_data.shape[0] == self.hop_size
67
+ ), "[TEN VAD]: audio data shape should be [%d]" % (
68
+ self.hop_size
69
+ )
70
+ assert (
71
+ type(audio_data[0]) == np.int16
72
+ ), "[TEN VAD]: audio data type error, must be int16"
73
+ data_pointer = audio_data.__array_interface__["data"][0]
74
+ return c_void_p(data_pointer)
75
+
76
+ def process(self, audio_data: np.ndarray):
77
+ input_pointer = self.get_input_data(audio_data)
78
+ self.vad_library.ten_vad_process(
79
+ self.vad_handler,
80
+ input_pointer,
81
+ c_size_t(self.hop_size),
82
+ POINTER(c_float)(self.out_probability),
83
+ POINTER(c_int32)(self.out_flags),
84
+ )
85
+ return self.out_probability.value, self.out_flags.value
Transcribe_and_Translate_Subtitles/VAD/TEN/lib/Linux/x64/libten_vad.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5abfe6bf6e9a4fcea6b440240f0a9a0f431ab5006e48a4e16465ebe681ffd90f
3
+ size 313240
Transcribe_and_Translate_Subtitles/VAD/TEN/lib/Windows/x64/ten_vad.dll ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38937f5604fa93a7941db7b9326992b792fa3731ebf9353973b3234457c6064b
3
+ size 510464
Transcribe_and_Translate_Subtitles/VAD/TEN/lib/Windows/x64/ten_vad.lib ADDED
Binary file (2.35 kB). View file
 
Transcribe_and_Translate_Subtitles/VAD/TEN/lib/macOS/ten_vad.framework/Versions/A/Headers/ten_vad.h ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /*
2
+ * @file ten_vad.h
3
+ * @brief Ten Voice Activity Detection (ten_vad) C API
4
+ * Version: 1.0.0
5
+ *
6
+ * Provides functions to create, process, and destroy a VAD instance.
7
+ */
8
+ #ifndef TEN_VAD_H
9
+ #define TEN_VAD_H
10
+
11
+ #if defined(__APPLE__) || defined(__ANDROID__) || defined(__linux__)
12
+ #define TENVAD_API __attribute__((visibility("default")))
13
+ #elif defined(_WIN32) || defined(__CYGWIN__)
14
+ /**
15
+ * @def TENVAD_API
16
+ * @brief Export/import macro for ten_vad shared library symbols.
17
+ */
18
+ #ifdef TENVAD_EXPORTS
19
+ #define TENVAD_API __declspec(dllexport)
20
+ #else
21
+ #define TENVAD_API __declspec(dllimport)
22
+ #endif
23
+ #else
24
+ #define TENVAD_API
25
+ #endif
26
+
27
+ #include <stddef.h> /* size_t */
28
+ #include <stdint.h> /* int16_t */
29
+
30
+ #ifdef __cplusplus
31
+ extern "C"
32
+ {
33
+ #endif
34
+
35
+ /**
36
+ * @typedef ten_vad_handle
37
+ * @brief Opaque handle for ten_vad instance.
38
+ */
39
+ typedef void *ten_vad_handle_t;
40
+
41
+ /**
42
+ * @brief Create and initialize a ten_vad instance.
43
+ *
44
+ * @param[out] handle Pointer to receive the vad handle.
45
+ * @param[in] hop_size The number of samples between the start points of
46
+ * two consecutive analysis frames. (e.g., 256).
47
+ * @param[in] threshold VAD detection threshold ranging from [0.0, 1.0]
48
+ * (default: 0.5).
49
+ * @return 0 on success, or -1 error occurs.
50
+ */
51
+ TENVAD_API int ten_vad_create(ten_vad_handle_t *handle, size_t hop_size,
52
+ float threshold);
53
+
54
+ /**
55
+ * @brief Process one audio frame for voice activity detection.
56
+ * Must call ten_vad_init() before calling this, and ten_vad_destroy() when done.
57
+ *
58
+ * @param[in] handle Valid VAD handle returned by ten_vad_create().
59
+ * @param[in] audio_data Pointer to an array of int16_t samples,
60
+ * buffer length must equal the hop size specified at ten_vad_create.
61
+ * @param[in] audio_data_length size of audio_data buffer, here should be equal to hop_size.
62
+ * @param[out] out_probability Pointer to a float (size 1) that receives the
63
+ * voice activity probability in the range [0.0, 1.0].
64
+ * @param[out] out_flag Pointer to an int (size 1) that receives the
65
+ * detection result: 0 = no voice, 1 = voice detected.
66
+ * @return 0 on success, or -1 error occurs.
67
+ */
68
+ TENVAD_API int ten_vad_process(ten_vad_handle_t handle, const int16_t *audio_data, size_t audio_data_length,
69
+ float *out_probability, int *out_flag);
70
+
71
+ /**
72
+ * @brief Destroy a ten_vad instance and release its resources.
73
+ *
74
+ * @param[in,out] handle Pointer to the ten_vad handle; set to NULL on return.
75
+ * @return 0 on success, or -1 error occurs.
76
+ */
77
+ TENVAD_API int ten_vad_destroy(ten_vad_handle_t *handle);
78
+
79
+ /**
80
+ * @brief Get the ten_vad library version string.
81
+ *
82
+ * @return The version string (e.g., "1.0.0").
83
+ */
84
+ TENVAD_API const char *ten_vad_get_version(void);
85
+
86
+ #ifdef __cplusplus
87
+ }
88
+ #endif
89
+
90
+ #endif /* TEN_VAD_H */
Transcribe_and_Translate_Subtitles/VAD/TEN/lib/macOS/ten_vad.framework/Versions/A/Resources/Info.plist ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
+ <plist version="1.0">
4
+ <dict>
5
+ <key>BuildMachineOSBuild</key>
6
+ <string>23D60</string>
7
+ <key>CFBundleDevelopmentRegion</key>
8
+ <string>English</string>
9
+ <key>CFBundleExecutable</key>
10
+ <string>ten_vad</string>
11
+ <key>CFBundleIdentifier</key>
12
+ <string>com.yourcompany.ten_vad</string>
13
+ <key>CFBundleInfoDictionaryVersion</key>
14
+ <string>6.0</string>
15
+ <key>CFBundlePackageType</key>
16
+ <string>FMWK</string>
17
+ <key>CFBundleSignature</key>
18
+ <string>????</string>
19
+ <key>CFBundleSupportedPlatforms</key>
20
+ <array>
21
+ <string>MacOSX</string>
22
+ </array>
23
+ <key>CSResourcesFileMapped</key>
24
+ <true/>
25
+ <key>DTCompiler</key>
26
+ <string>com.apple.compilers.llvm.clang.1_0</string>
27
+ <key>DTPlatformBuild</key>
28
+ <string></string>
29
+ <key>DTPlatformName</key>
30
+ <string>macosx</string>
31
+ <key>DTPlatformVersion</key>
32
+ <string>14.2</string>
33
+ <key>DTSDKBuild</key>
34
+ <string>23C53</string>
35
+ <key>DTSDKName</key>
36
+ <string>macosx14.2</string>
37
+ <key>DTXcode</key>
38
+ <string>1520</string>
39
+ <key>DTXcodeBuild</key>
40
+ <string>15C500b</string>
41
+ <key>LSMinimumSystemVersion</key>
42
+ <string>10.10</string>
43
+ </dict>
44
+ </plist>
Transcribe_and_Translate_Subtitles/VAD/TEN/lib/macOS/ten_vad.framework/Versions/A/ten_vad ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81b2de13710670bb94fef315ab50fedc903a21c04c4290c6c2ac28d8b42e715a
3
+ size 744600