priteshmistry commited on
Commit
4d81aed
·
verified ·
1 Parent(s): 41ffd84

Upload 17 files

Browse files
tools/convert_to_gif.sh ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Help function
4
+ function show_help() {
5
+ echo "Usage: $0 <input_video>"
6
+ echo
7
+ echo "This script converts a video file to a GIF with the following properties:"
8
+ echo " - Frame rate: 10 fps"
9
+ echo " - Resolution: 1728x1028 (with padding if needed)"
10
+ echo
11
+ echo "Example:"
12
+ echo " $0 input.mov"
13
+ echo
14
+ echo "The output file will be named 'output.gif'."
15
+ }
16
+
17
+ # Check if help is requested
18
+ if [[ "$1" == "-h" || "$1" == "--h" || "$1" == "-help" || "$1" == "--help" ]]; then
19
+ show_help
20
+ exit 0
21
+ fi
22
+
23
+ # Check if the input file is provided
24
+ if [ "$#" -ne 1 ]; then
25
+ echo "Error: Missing input file."
26
+ echo "Use --help for usage information."
27
+ exit 1
28
+ fi
29
+
30
+ # Input video file
31
+ input_file="$1"
32
+ output_file="output.gif"
33
+
34
+ # Conversion parameters
35
+ fps=10
36
+ width=1728
37
+ height=1028
38
+
39
+ # Convert the video to GIF
40
+ ffmpeg -i "$input_file" -vf "fps=$fps,scale=${width}:${height}:force_original_aspect_ratio=decrease,pad=${width}:${height}:(ow-iw)/2:(oh-ih)/2" "$output_file"
41
+
42
+ echo "Conversion complete! Output file: $output_file"
43
+
tools/generate_ebooks.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import subprocess
4
+
5
+ from iso639 import languages
6
+ from deep_translator import GoogleTranslator
7
+ from tqdm import tqdm
8
+
9
+ #parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
10
+ #sys.path.append(parent_dir)
11
+
12
+ # Your language mapping dictionary from lang.py
13
+ from lib.lang import language_mapping
14
+
15
+ env = os.environ.copy()
16
+ os.environ['COQUI_TOS_AGREED'] = '1'
17
+ env["PYTHONIOENCODING"] = "utf-8";
18
+ env["LANG"] = "en_US.UTF-8"
19
+
20
+ # Base text to be translated
21
+ base_text = "This is a test from the result of text file to audiobook conversion."
22
+
23
+ # Output directory
24
+ output_dir = "../ebooks/tests"
25
+ if not os.path.exists(output_dir):
26
+ os.makedirs(output_dir)
27
+
28
+ # Path to your base cover image (adjust the path accordingly)
29
+ base_cover_image = "../ebooks/tests/__cover.jpg"
30
+
31
+ # List to keep track of languages that failed
32
+ failed_languages = []
33
+
34
+ # Loop over languages with a progress bar
35
+ for lang_code, lang_info in tqdm(language_mapping.items(), desc="Processing languages"):
36
+ try:
37
+ lang_iso = lang_code
38
+ language_array = languages.get(part3=lang_code)
39
+ if language_array and language_array.part1:
40
+ lang_iso = language_array.part1
41
+ if lang_iso == "zh":
42
+ lang_iso = "zh-CN"
43
+ # Translate the text
44
+ translated_text = GoogleTranslator(source='en', target=lang_iso).translate(base_text)
45
+ print(f"\nTranslated text for {lang_info['name']} ({lang_iso}): {translated_text}")
46
+
47
+ # Write the translated text to a txt file
48
+ txt_filename = f"test_{lang_code}.txt"
49
+ txt_filepath = os.path.join(output_dir, txt_filename)
50
+ with open(txt_filepath, 'w', encoding='utf-8') as f:
51
+ f.write(translated_text)
52
+
53
+ # Prepare the ebook-convert command
54
+ azw3_filename = f"test_{lang_code}.azw3"
55
+ azw3_filepath = os.path.join(output_dir, azw3_filename)
56
+
57
+ title = f"Ebook {lang_info['name']} Test"
58
+ authors = "Dev Team"
59
+ language = lang_iso
60
+
61
+ command = [
62
+ "ebook-convert",
63
+ txt_filepath,
64
+ azw3_filepath,
65
+ "--cover", base_cover_image,
66
+ "--title", title,
67
+ "--authors", authors,
68
+ "--language", language,
69
+ "--input-encoding", "utf-8"
70
+ ]
71
+
72
+ result = subprocess.run(command, env=env, text=True, encoding="utf-8")
73
+ print(f"Ebook generated for {lang_info['name']} at {azw3_filepath}\n")
74
+
75
+ except Exception as e:
76
+ print(f"Erro: language {lang_code} not supported!")
77
+ failed_languages.append(lang_code)
78
+ continue
79
+
80
+ # After processing all languages, output the list of languages that failed
81
+ if failed_languages:
82
+ print("\nThe following languages could not be processed:")
83
+ for lang_code in failed_languages:
84
+ lang_name = language_mapping[lang_code]['name']
85
+ print(f"- {lang_name} ({lang_code})")
tools/gpu_notebook_test.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ print("Torch version:", torch.__version__)
4
+ print("CUDA Available:", torch.cuda.is_available())
5
+
6
+ if torch.cuda.is_available():
7
+ print("Number of GPUs:", torch.cuda.device_count())
8
+ for i in range(torch.cuda.device_count()):
9
+ print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
10
+ else:
11
+ print("No GPU detected.")
tools/gpu_test.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ def gpu_test():
4
+ print("Checking PyTorch GPU support...")
5
+
6
+ # Check if CUDA is available
7
+ if not torch.cuda.is_available():
8
+ print("CUDA is NOT available. Running on CPU.")
9
+ return
10
+
11
+ # Get number of GPUs
12
+ num_gpus = torch.cuda.device_count()
13
+ print(f"CUDA is available. Number of GPUs: {num_gpus}")
14
+
15
+ # List all GPUs
16
+ for i in range(num_gpus):
17
+ print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
18
+
19
+ # Run a small test on the first GPU
20
+ device = torch.device("cuda:0")
21
+ print(f"Running a small test on {torch.cuda.get_device_name(0)}...")
22
+
23
+ # Test computation
24
+ try:
25
+ x = torch.rand(1000, 1000, device=device)
26
+ y = torch.mm(x, x)
27
+ print("GPU computation successful!")
28
+ except Exception as e:
29
+ print(f"GPU computation failed: {e}")
30
+
31
+ if __name__ == "__main__":
32
+ gpu_test()
tools/m4b_chapter_extractor.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ M4B Chapter Extractor
4
+
5
+ A command-line tool to extract chapters from M4B audiobook files
6
+ and save them as individual MP3 files.
7
+
8
+ Requirements:
9
+ - ffmpeg installed and accessible in PATH
10
+ - Python 3.6+
11
+
12
+ Usage:
13
+ python m4b_chapter_extractor.py input.m4b -o output_folder
14
+ """
15
+
16
+ import argparse
17
+ import os
18
+ import sys
19
+ import subprocess
20
+ import json
21
+ import re
22
+ from pathlib import Path
23
+ from typing import List, Dict, Optional
24
+
25
+
26
+ class M4BChapterExtractor:
27
+ def __init__(self, input_file: str, output_dir: str, quality: str = "192k"):
28
+ self.input_file = Path(input_file)
29
+ self.output_dir = Path(output_dir)
30
+ self.quality = quality
31
+
32
+ # Validate input file
33
+ if not self.input_file.exists():
34
+ raise FileNotFoundError(f"Input file not found: {input_file}")
35
+
36
+ if not self.input_file.suffix.lower() in ['.m4b', '.m4a']:
37
+ raise ValueError("Input file must be an M4B or M4A file")
38
+
39
+ # Create output directory
40
+ self.output_dir.mkdir(parents=True, exist_ok=True)
41
+
42
+ def check_ffmpeg(self) -> bool:
43
+ """Check if FFmpeg is available in the system PATH."""
44
+ try:
45
+ result = subprocess.run(['ffmpeg', '-version'],
46
+ capture_output=True, text=True)
47
+ return result.returncode == 0
48
+ except FileNotFoundError:
49
+ return False
50
+
51
+ def get_chapters(self) -> List[Dict]:
52
+ """Extract chapter information from the M4B file."""
53
+ cmd = [
54
+ 'ffprobe',
55
+ '-v', 'quiet',
56
+ '-print_format', 'json',
57
+ '-show_chapters',
58
+ str(self.input_file)
59
+ ]
60
+
61
+ try:
62
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
63
+ data = json.loads(result.stdout)
64
+ return data.get('chapters', [])
65
+ except subprocess.CalledProcessError as e:
66
+ print(f"Error getting chapters: {e}")
67
+ return []
68
+ except json.JSONDecodeError as e:
69
+ print(f"Error parsing chapter data: {e}")
70
+ return []
71
+
72
+ def sanitize_filename(self, filename: str) -> str:
73
+ """Sanitize filename by removing/replacing invalid characters."""
74
+ # Remove or replace invalid characters
75
+ filename = re.sub(r'[<>:"/\\|?*]', '', filename)
76
+ filename = re.sub(r'\s+', ' ', filename).strip()
77
+
78
+ # Ensure filename isn't too long (limit to 200 characters)
79
+ if len(filename) > 200:
80
+ filename = filename[:200].strip()
81
+
82
+ return filename or "Chapter"
83
+
84
+ def format_time(self, seconds: float) -> str:
85
+ """Convert seconds to HH:MM:SS.mmm format."""
86
+ hours = int(seconds // 3600)
87
+ minutes = int((seconds % 3600) // 60)
88
+ secs = seconds % 60
89
+ return f"{hours:02d}:{minutes:02d}:{secs:06.3f}"
90
+
91
+ def extract_chapter(self, chapter: Dict, chapter_num: int, total_chapters: int) -> bool:
92
+ """Extract a single chapter to MP3 file."""
93
+ # Get chapter title
94
+ title = chapter.get('tags', {}).get('title', f"Chapter {chapter_num:02d}")
95
+ title = self.sanitize_filename(title)
96
+
97
+ # Create output filename
98
+ output_filename = f"{chapter_num:02d} - {title}.mp3"
99
+ output_path = self.output_dir / output_filename
100
+
101
+ # Get start and end times
102
+ start_time = float(chapter['start_time'])
103
+ end_time = float(chapter['end_time'])
104
+ duration = end_time - start_time
105
+
106
+ print(f"Extracting [{chapter_num}/{total_chapters}]: {title}")
107
+ print(f" Duration: {self.format_time(duration)}")
108
+
109
+ # FFmpeg command to extract chapter
110
+ cmd = [
111
+ 'ffmpeg',
112
+ '-i', str(self.input_file),
113
+ '-ss', str(start_time),
114
+ '-t', str(duration),
115
+ '-acodec', 'libmp3lame',
116
+ '-ab', self.quality,
117
+ '-map_metadata', '0',
118
+ '-id3v2_version', '3',
119
+ '-metadata', f'title={title}',
120
+ '-metadata', f'track={chapter_num}/{total_chapters}',
121
+ '-y', # Overwrite output file
122
+ str(output_path)
123
+ ]
124
+
125
+ try:
126
+ result = subprocess.run(cmd, capture_output=True, text=True)
127
+ if result.returncode == 0:
128
+ print(f" ✓ Saved: {output_filename}")
129
+ return True
130
+ else:
131
+ print(f" ✗ Error extracting chapter: {result.stderr}")
132
+ return False
133
+ except Exception as e:
134
+ print(f" ✗ Exception during extraction: {e}")
135
+ return False
136
+
137
+ def extract_all_chapters(self) -> bool:
138
+ """Extract all chapters from the M4B file."""
139
+ print(f"Processing: {self.input_file.name}")
140
+ print(f"Output directory: {self.output_dir}")
141
+
142
+ # Check if FFmpeg is available
143
+ if not self.check_ffmpeg():
144
+ print("Error: FFmpeg not found. Please install FFmpeg and ensure it's in your PATH.")
145
+ return False
146
+
147
+ # Get chapters
148
+ chapters = self.get_chapters()
149
+ if not chapters:
150
+ print("No chapters found in the M4B file.")
151
+ return False
152
+
153
+ print(f"Found {len(chapters)} chapters")
154
+ print("-" * 50)
155
+
156
+ # Extract each chapter
157
+ success_count = 0
158
+ for i, chapter in enumerate(chapters, 1):
159
+ if self.extract_chapter(chapter, i, len(chapters)):
160
+ success_count += 1
161
+ print()
162
+
163
+ # Summary
164
+ print("-" * 50)
165
+ print(f"Extraction complete: {success_count}/{len(chapters)} chapters extracted successfully")
166
+
167
+ if success_count == len(chapters):
168
+ print("All chapters extracted successfully!")
169
+ return True
170
+ else:
171
+ print(f"Warning: {len(chapters) - success_count} chapters failed to extract")
172
+ return False
173
+
174
+
175
+ def main():
176
+ parser = argparse.ArgumentParser(
177
+ description="Extract chapters from M4B audiobook files as individual MP3 files",
178
+ formatter_class=argparse.RawDescriptionHelpFormatter,
179
+ epilog="""
180
+ Examples:
181
+ python m4b_chapter_extractor.py audiobook.m4b -o chapters/
182
+ python m4b_chapter_extractor.py audiobook.m4b -o output/ -q 128k
183
+ python m4b_chapter_extractor.py audiobook.m4b -o output/ --quality 256k
184
+
185
+ Requirements:
186
+ - FFmpeg must be installed and accessible in PATH
187
+ - Input file must be M4B or M4A format
188
+ """
189
+ )
190
+
191
+ parser.add_argument(
192
+ 'input_file',
193
+ help='Path to the input M4B audiobook file'
194
+ )
195
+
196
+ parser.add_argument(
197
+ '-o', '--output',
198
+ required=True,
199
+ help='Output directory for extracted MP3 chapters'
200
+ )
201
+
202
+ parser.add_argument(
203
+ '-q', '--quality',
204
+ default='192k',
205
+ help='MP3 audio quality/bitrate (default: 192k). Examples: 128k, 192k, 256k, 320k'
206
+ )
207
+
208
+ parser.add_argument(
209
+ '-v', '--verbose',
210
+ action='store_true',
211
+ help='Enable verbose output'
212
+ )
213
+
214
+ args = parser.parse_args()
215
+
216
+ try:
217
+ # Create extractor instance
218
+ extractor = M4BChapterExtractor(
219
+ input_file=args.input_file,
220
+ output_dir=args.output,
221
+ quality=args.quality
222
+ )
223
+
224
+ # Extract chapters
225
+ success = extractor.extract_all_chapters()
226
+
227
+ # Exit with appropriate code
228
+ sys.exit(0 if success else 1)
229
+
230
+ except FileNotFoundError as e:
231
+ print(f"Error: {e}")
232
+ sys.exit(1)
233
+ except ValueError as e:
234
+ print(f"Error: {e}")
235
+ sys.exit(1)
236
+ except KeyboardInterrupt:
237
+ print("\nOperation cancelled by user")
238
+ sys.exit(1)
239
+ except Exception as e:
240
+ print(f"Unexpected error: {e}")
241
+ sys.exit(1)
242
+
243
+
244
+ if __name__ == "__main__":
245
+ main()
tools/normalize_wav_file.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import subprocess
4
+ import argparse
5
+ import shutil
6
+ import torch
7
+
8
+ def demucs_voice(wav_file, output_dir, models_dir):
9
+ try:
10
+ # Set TORCH_HOME for demucs
11
+ torch.hub.set_dir(models_dir)
12
+ os.environ['TORCH_HOME'] = models_dir
13
+ demucs_app = shutil.which('demucs')
14
+ if not demucs_app:
15
+ demucs_app = os.path.join('..', 'python_env', 'Scripts', 'demucs')
16
+ # Run demucs subprocess
17
+ cmd = [
18
+ demucs_app,
19
+ "--verbose",
20
+ "--two-stems=vocals",
21
+ "--out", output_dir,
22
+ wav_file
23
+ ]
24
+
25
+ print(f"🔄 Running: {' '.join(cmd)}")
26
+ subprocess.run(cmd, check=True)
27
+
28
+ # Output folder name is based on input filename
29
+ base_name = os.path.splitext(os.path.basename(wav_file))[0]
30
+ demucs_output_path = os.path.join(output_dir, "htdemucs", "vocals.wav")
31
+ if os.path.exists(demucs_output_path):
32
+ print(f"✅ Voice track saved to: {demucs_output_path}")
33
+ return demucs_output_path
34
+ else:
35
+ raise FileNotFoundError(f"Expected output not found: {demucs_output_path}")
36
+
37
+ except subprocess.CalledProcessError as e:
38
+ raise RuntimeError(
39
+ f"demucs failed with exit code {e.returncode}.\n"
40
+ f"stdout: {getattr(e, 'output', 'N/A')}\n"
41
+ f"stderr: {getattr(e, 'stderr', 'N/A')}"
42
+ )
43
+ except FileNotFoundError as e:
44
+ raise RuntimeError(f"FileNotFoundError: {e}")
45
+ except Exception as e:
46
+ raise RuntimeError(f"Unexpected error: {e}")
47
+
48
+ def normalize_audio_file(input_file, output_file):
49
+ models_dir = os.path.join('..', 'models', 'tts')
50
+ output_dir = os.path.dirname(output_file)
51
+ #demucs_file = demucs_voice(input_file, output_dir, models_dir)
52
+ ffmpeg_cmd = [shutil.which('ffmpeg'), '-hide_banner', '-nostats', '-i', input_file]
53
+ filter_complex = (
54
+ 'agate=threshold=-25dB:ratio=1.4:attack=10:release=250,'
55
+ 'afftdn=nf=-70,'
56
+ 'acompressor=threshold=-20dB:ratio=2:attack=80:release=200:makeup=1dB,'
57
+ 'loudnorm=I=-14:TP=-3:LRA=7:linear=true,'
58
+ 'equalizer=f=150:t=q:w=2:g=1,'
59
+ 'equalizer=f=250:t=q:w=2:g=-3,'
60
+ 'equalizer=f=3000:t=q:w=2:g=2,'
61
+ 'equalizer=f=5500:t=q:w=2:g=-4,'
62
+ 'equalizer=f=9000:t=q:w=2:g=-2,'
63
+ 'highpass=f=63[audio]'
64
+ )
65
+ ffmpeg_cmd += [
66
+ '-filter_complex', filter_complex,
67
+ '-map', '[audio]',
68
+ '-ar', '24000',
69
+ '-y', output_file
70
+ ]
71
+ try:
72
+ process = subprocess.Popen(
73
+ ffmpeg_cmd,
74
+ env={},
75
+ stdout=subprocess.PIPE,
76
+ stderr=subprocess.STDOUT,
77
+ text=True,
78
+ universal_newlines=True,
79
+ encoding='utf-8'
80
+ )
81
+ for line in process.stdout:
82
+ print(line, end='') # Print each line of stdout
83
+ process.wait()
84
+ if process.returncode != 0:
85
+ error = f'normalize_audio(): process.returncode: {process.returncode}'
86
+ elif not os.path.exists(output_file) or os.path.getsize(output_file) == 0:
87
+ error = f'normalize_audio() error: {output_file} was not created or is empty.'
88
+ else:
89
+ print(f"File denoised and normalized!: {output_file}")
90
+ except subprocess.CalledProcessError as e:
91
+ error = f'_normalize_audio() ffmpeg.Error: {e.stderr.decode()}'
92
+ except subprocess.CalledProcessError as e:
93
+ print(f"Error processing file {input_file}: {e}")
94
+ except Exception as e:
95
+ print(f"Unexpected error: {e}")
96
+
97
+ if __name__ == "__main__":
98
+ if len(sys.argv) != 3:
99
+ print(f"Usage: python {os.path.basename(__file__)} <input_file> <output_file>")
100
+ sys.exit(1)
101
+ input_file = os.path.abspath(sys.argv[1])
102
+ output_file = os.path.abspath(sys.argv[2])
103
+ normalize_audio_file(input_file, output_file)
tools/normalize_wav_folder.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import subprocess
4
+ import argparse
5
+ import torch
6
+ import shutil
7
+
8
+ def demucs_voice(wav_file, output_dir, models_dir):
9
+ try:
10
+ # Set TORCH_HOME for demucs
11
+ torch.hub.set_dir(models_dir)
12
+ os.environ['TORCH_HOME'] = models_dir
13
+ # Run demucs subprocess
14
+ cmd = [
15
+ os.path.join('..', 'python_env', 'bin', 'demucs'),
16
+ "--verbose",
17
+ "--two-stems=vocals",
18
+ "--out", output_dir,
19
+ wav_file
20
+ ]
21
+ print(f"🔄 Running: {' '.join(cmd)}")
22
+ subprocess.run(cmd, check=True)
23
+ # Output folder name is based on input filename
24
+ base_name = os.path.splitext(os.path.basename(wav_file))[0]
25
+ demucs_output_path = os.path.join(output_dir, "htdemucs", base_name, "vocals.wav")
26
+ if os.path.exists(demucs_output_path):
27
+ print(f"✅ Voice track saved to: {demucs_output_path}")
28
+ return demucs_output_path
29
+ else:
30
+ raise FileNotFoundError(f"Expected output not found: {demucs_output_path}")
31
+ except subprocess.CalledProcessError as e:
32
+ raise RuntimeError(
33
+ f"demucs failed with exit code {e.returncode}.\n"
34
+ f"stdout: {getattr(e, 'output', 'N/A')}\n"
35
+ f"stderr: {getattr(e, 'stderr', 'N/A')}"
36
+ )
37
+ except FileNotFoundError as e:
38
+ raise RuntimeError(f"FileNotFoundError: {e}")
39
+ except Exception as e:
40
+ raise RuntimeError(f"Unexpected error: {e}")
41
+
42
+ def normalize_audio_folder(folder_path):
43
+ for root, dirs, files in os.walk(folder_path):
44
+ for file in files:
45
+ if file.lower().endswith('.wav'):
46
+ input_file = os.path.join(root, file)
47
+ models_dir = os.path.join('..', 'models', 'tts')
48
+ #demucs_file = demucs_voice(input_file, folder_path, models_dir)
49
+ process_file = os.path.join(root, 'temp_output.wav') # Temporary file to avoid overwriting during processing
50
+ ffmpeg_cmd = [shutil.which('ffmpeg'), '-hide_banner', '-nostats', '-i', input_file]
51
+ filter_complex = (
52
+ 'agate=threshold=-25dB:ratio=1.4:attack=10:release=250,'
53
+ 'afftdn=nf=-70,'
54
+ 'acompressor=threshold=-20dB:ratio=2:attack=80:release=200:makeup=1dB,'
55
+ 'loudnorm=I=-14:TP=-3:LRA=7:linear=true,'
56
+ 'equalizer=f=150:t=q:w=2:g=1,'
57
+ 'equalizer=f=250:t=q:w=2:g=-3,'
58
+ 'equalizer=f=3000:t=q:w=2:g=2,'
59
+ 'equalizer=f=5500:t=q:w=2:g=-4,'
60
+ 'equalizer=f=9000:t=q:w=2:g=-2,'
61
+ 'highpass=f=63[audio]'
62
+ )
63
+ ffmpeg_cmd += [
64
+ '-filter_complex', filter_complex,
65
+ '-map', '[audio]',
66
+ '-ar', '24000',
67
+ '-y', process_file
68
+ ]
69
+ try:
70
+ process = subprocess.Popen(
71
+ ffmpeg_cmd,
72
+ env={},
73
+ stdout=subprocess.PIPE,
74
+ stderr=subprocess.STDOUT,
75
+ text=True,
76
+ universal_newlines=True,
77
+ encoding='utf-8'
78
+ )
79
+ for line in process.stdout:
80
+ print(line, end='') # Print each line of stdout
81
+ process.wait()
82
+ if process.returncode != 0:
83
+ error = f'normalize_audio(): process.returncode: {process.returncode}'
84
+ break
85
+ elif not os.path.exists(process_file) or os.path.getsize(process_file) == 0:
86
+ error = f'normalize_audio() error: {process_file} was not created or is empty.'
87
+ break
88
+ else:
89
+ os.replace(process_file, input_file)
90
+ print(f"File processed and replaced: {input_file}")
91
+ except subprocess.CalledProcessError as e:
92
+ error = f'_normalize_audio() ffmpeg.Error: {e.stderr.decode()}'
93
+ break
94
+ except subprocess.CalledProcessError as e:
95
+ print(f"Error processing file {input_file}: {e}")
96
+ break
97
+ except Exception as e:
98
+ print(f"Unexpected error: {e}")
99
+ break
100
+
101
+ if __name__ == "__main__":
102
+ if len(sys.argv) != 2:
103
+ print(f"Usage: python {os.path.basename(__file__)} <folder_path>")
104
+ sys.exit(1)
105
+ folder_path = os.path.abspath(sys.argv[1])
106
+ normalize_audio_folder(folder_path)
tools/npz_to_wav.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import platform
3
+ import argparse
4
+
5
+ tmp_dir = os.path.abspath(os.path.join('..', 'tmp'))
6
+ models_dir = os.path.abspath(os.path.join('..', 'models'))
7
+ tts_dir = os.path.join(models_dir, 'tts')
8
+
9
+ os.environ['PYTHONUTF8'] = '1'
10
+ os.environ['PYTHONIOENCODING'] = 'utf-8'
11
+ os.environ['COQUI_TOS_AGREED'] = '1'
12
+ os.environ['PYTHONIOENCODING'] = 'utf-8'
13
+ os.environ['CALIBRE_NO_NATIVE_FILEDIALOGS'] = '1'
14
+ os.environ['DO_NOT_TRACK'] = 'true'
15
+ os.environ['CALIBRE_TEMP_DIR'] = tmp_dir
16
+ os.environ['CALIBRE_CACHE_DIRECTORY'] = tmp_dir
17
+ os.environ['HUGGINGFACE_HUB_CACHE'] = tts_dir
18
+ os.environ['HF_HOME'] = tts_dir
19
+ os.environ['HF_DATASETS_CACHE'] = tts_dir
20
+ os.environ['BARK_CACHE_DIR'] = tts_dir
21
+ os.environ['TTS_CACHE'] = tts_dir
22
+ os.environ['TORCH_HOME'] = tts_dir
23
+ os.environ['TTS_HOME'] = models_dir
24
+ os.environ['XDG_CACHE_HOME'] = models_dir
25
+ os.environ['ARGOS_TRANSLATE_PACKAGE_PATH'] = os.path.join(models_dir, 'argostranslate')
26
+ os.environ['HF_TOKEN_PATH'] = os.path.join(os.path.expanduser('~'), '.huggingface_token')
27
+ os.environ['TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD'] = '1'
28
+ os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
29
+ os.environ['SUNO_OFFLOAD_CPU'] = 'False' # BARK option: False needs A GPU
30
+ os.environ['SUNO_USE_SMALL_MODELS'] = 'False' # BARK option: False needs a GPU with VRAM > 4GB
31
+ if platform.system() == 'Windows':
32
+ os.environ['ESPEAK_DATA_PATH'] = os.path.expandvars(r"%USERPROFILE%\scoop\apps\espeak-ng\current\eSpeak NG\espeak-ng-data")
33
+
34
+ import torch
35
+ import torchaudio
36
+ import numpy as np
37
+ from pathlib import Path
38
+ from bark import SAMPLE_RATE, preload_models
39
+ from bark.generation import codec_decode
40
+
41
+ def npz_to_wav(npz_path, output_path):
42
+ preload_models()
43
+ data = np.load(npz_path)
44
+ fine_prompt = data["fine_prompt"]
45
+ audio_array = codec_decode(fine_prompt)
46
+ audio_tensor = torch.tensor(audio_array).unsqueeze(0)
47
+ torchaudio.save(output_path, audio_tensor, SAMPLE_RATE)
48
+ print(f"✅ Saved: {output_path}")
49
+
50
+ def process_all_npz_in_folder(folder_path):
51
+ preload_models()
52
+ for npz_file in Path(folder_path).rglob("*.npz"):
53
+ output_path = npz_file.with_suffix(".wav")
54
+ npz_to_wav(str(npz_file), str(output_path))
55
+
56
+ if __name__ == "__main__":
57
+ parser = argparse.ArgumentParser(description="Process all NPZ files in a folder.")
58
+ parser.add_argument("--folder_path", type=str, required=True, help="Path to the folder containing NPZ files")
59
+ args = parser.parse_args()
60
+ folder_path = os.path.abspath(args.folder_path)
61
+ process_all_npz_in_folder(folder_path)
tools/trim_silences.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pydub import AudioSegment
3
+ from pydub.silence import detect_nonsilent
4
+ import sys
5
+
6
+
7
+ def trim_silence(audio_path, silence_thresh=-70, min_silence_len=1000):
8
+ audio = AudioSegment.from_file(audio_path)
9
+ nonsilent_ranges = detect_nonsilent(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
10
+
11
+ if not nonsilent_ranges:
12
+ return None # No nonsilent segment found
13
+
14
+ start_trim = nonsilent_ranges[0][0]
15
+ end_trim = nonsilent_ranges[-1][1]
16
+ trimmed_audio = audio[start_trim:end_trim]
17
+ return trimmed_audio
18
+
19
+
20
+ def process_folder(folder_path):
21
+ for root, _, files in os.walk(folder_path):
22
+ for file in files:
23
+ if file.endswith(".wav"):
24
+ wav_path = os.path.join(root, file)
25
+ print(f"Trimming: {wav_path}")
26
+ trimmed = trim_silence(wav_path)
27
+ if trimmed:
28
+ trimmed.export(wav_path, format="wav")
29
+ else:
30
+ print(f"Warning: only silence found in {wav_path}")
31
+
32
+
33
+ if __name__ == "__main__":
34
+ if len(sys.argv) != 2:
35
+ print("Usage: python script.py <folder_path>")
36
+ sys.exit(1)
37
+
38
+ target_folder = sys.argv[1]
39
+ process_folder(target_folder)
tools/wav_to_npz.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # NOTE: to run this script you must move it to the root of ebook2audiobook
2
+
3
+ import os
4
+
5
+ os.environ['PYTHONUTF8'] = '1'
6
+ os.environ['PYTHONIOENCODING'] = 'utf-8'
7
+ os.environ['COQUI_TOS_AGREED'] = '1'
8
+ os.environ['PYTHONIOENCODING'] = 'utf-8'
9
+ os.environ['DO_NOT_TRACK'] = 'true'
10
+ os.environ['HUGGINGFACE_HUB_CACHE'] = tts_dir
11
+ os.environ['HF_HOME'] = tts_dir
12
+ os.environ['TRANSFORMERS_CACHE'] = tts_dir
13
+ os.environ['HF_DATASETS_CACHE'] = tts_dir
14
+ os.environ['BARK_CACHE_DIR'] = tts_dir
15
+ os.environ['TTS_CACHE'] = tts_dir
16
+ os.environ['TORCH_HOME'] = tts_dir
17
+ os.environ['TTS_HOME'] = models_dir
18
+ os.environ['XDG_CACHE_HOME'] = models_dir
19
+ os.environ['HF_TOKEN_PATH'] = os.path.join(os.path.expanduser('~'), '.huggingface_token')
20
+ os.environ['TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD'] = '1'
21
+ os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
22
+ os.environ['SUNO_OFFLOAD_CPU'] = 'False'
23
+ os.environ['SUNO_USE_SMALL_MODELS'] = 'False'
24
+
25
+ import argparse
26
+ import hashlib
27
+ import numpy as np
28
+ import regex as re
29
+ import shutil
30
+ import soundfile as sf
31
+ import subprocess
32
+ import tempfile
33
+ import torch
34
+ import torchaudio
35
+ import threading
36
+ import uuid
37
+
38
+ from iso639 import languages
39
+ from huggingface_hub import hf_hub_download
40
+ from pathlib import Path
41
+ from scipy.io import wavfile as wav
42
+ from scipy.signal import find_peaks
43
+ from TTS.tts.configs.bark_config import BarkConfig
44
+ from TTS.tts.models.bark import Bark
45
+
46
+ from lib import *
47
+
48
+ import logging
49
+ logging.basicConfig(level=logging.DEBUG)
50
+
51
+
52
+ torch.hub.set_dir(models_dir)
53
+
54
+ loaded_tts = {}
55
+
56
+ def load_checkpoint(**kwargs):
57
+ try:
58
+ key = kwargs.get('key')
59
+ tts_engine = kwargs.get('tts_engine')
60
+ device = kwargs.get('device')
61
+ checkpoint_dir = kwargs.get('checkpoint_dir')
62
+ config = BarkConfig()
63
+ config.CACHE_DIR = tts_dir
64
+ config.USE_SMALLER_MODELS = os.environ.get('SUNO_USE_SMALL_MODELS', '').lower() == 'true'
65
+ tts = Bark.init_from_config(config)
66
+ tts.load_checkpoint(
67
+ config,
68
+ checkpoint_dir=checkpoint_dir,
69
+ eval=True
70
+ )
71
+ if tts:
72
+ if device == 'cuda':
73
+ tts.cuda()
74
+ else:
75
+ tts.to(device)
76
+ loaded_tts[key] = {"engine": tts, "config": config}
77
+ msg = f'{tts_engine} Loaded!'
78
+ print(msg)
79
+ return tts
80
+ else:
81
+ error = 'TTS engine could not be created!'
82
+ print(error)
83
+ except Exception as e:
84
+ error = f'_load_checkpoint() error: {e}'
85
+ return False
86
+
87
+ def wav_to_npz(bark_dir, wav_dir):
88
+ try:
89
+ tts_internal_key = f"TTS_ENGINES['BARK']-internal"
90
+ hf_repo = models[TTS_ENGINES['BARK']]['internal']['repo']
91
+ hf_sub = models[TTS_ENGINES['BARK']]['internal']['sub']
92
+ text_model_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}{models[TTS_ENGINES['BARK']]['internal']['files'][0]}", cache_dir=tts_dir)
93
+ coarse_model_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}{models[TTS_ENGINES['BARK']]['internal']['files'][1]}", cache_dir=tts_dir)
94
+ fine_model_path = hf_hub_download(repo_id=hf_repo, filename=f"{hf_sub}{models[TTS_ENGINES['BARK']]['internal']['files'][2]}", cache_dir=tts_dir)
95
+ checkpoint_dir = os.path.dirname(text_model_path)
96
+ tts = load_checkpoint(tts_engine=TTS_ENGINES['BARK'], key=tts_internal_key, checkpoint_dir=checkpoint_dir, device='cpu')
97
+ if tts:
98
+ fine_tuned_params = {
99
+ "text_temp": default_engine_settings[TTS_ENGINES['BARK']]['text_temp'],
100
+ "waveform_temp": default_engine_settings[TTS_ENGINES['BARK']]['waveform_temp']
101
+ }
102
+ for root, dirs, files in os.walk(wav_dir):
103
+ for file in files:
104
+ if file.lower().endswith('.wav'):
105
+ match = re.match(r"^([a-z]{2})_", file)
106
+ if match:
107
+ speaker = os.path.splitext(file)[0]
108
+ npz_file = f'{speaker}.npz'
109
+ iso1_lang = match.group(1)
110
+ lang_array = languages.get(part1=iso1_lang)
111
+ if lang_array:
112
+ iso3_lang = lang_array.part3
113
+ default_text_file = os.path.join(voices_dir, iso3_lang, 'default.txt')
114
+ default_text = Path(default_text_file).read_text(encoding="utf-8")
115
+ with torch.no_grad():
116
+ torch.manual_seed(67878789)
117
+ audio_data = tts.synthesize(
118
+ default_text,
119
+ loaded_tts[tts_internal_key]['config'],
120
+ speaker_id=speaker,
121
+ voice_dirs=bark_dir,
122
+ silent=True,
123
+ **fine_tuned_params
124
+ )
125
+ del audio_data
126
+ msg = f"Saved NPZ file: {npz_file}"
127
+ print(msg)
128
+ else:
129
+ print('tts bark not loaded')
130
+ except Exception as e:
131
+ print(f'wav_to_npz() error: {e}')
132
+
133
+ if __name__ == "__main__":
134
+ parser = argparse.ArgumentParser(description="Convert WAV files to Bark NPZ format.")
135
+ parser.add_argument("--bark_dir", type=str, required=True, help="Path to the Bark asset directory")
136
+ parser.add_argument("--wav_dir", type=str, required=True, help="Path to the output WAV directory")
137
+ args = parser.parse_args()
138
+ bark_dir = os.path.abspath(args.bark_dir)
139
+ wav_dir = os.path.abspath(args.wav_dir)
140
+ wav_to_npz(bark_dir, wav_dir)
141
+
tools/workflow-testing/long_test.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Alright here you go just a long stream of words flowing endlessly without any stops or breaks just moving forward like a river that never ends carrying thoughts and ideas without pause or hesitation like a mind racing through the night trying to grasp onto something solid but finding only more thoughts and more words and more movement like a dream that never quite settles into focus always shifting always changing always just beyond reach like running through a field with the wind in your hair feeling free and untethered by the rules of language or structure or anything at all just existing in the purest form of expression where nothing has to make perfect sense and yet it all still feels like it does in some strange and beautiful way like the way music can make you feel something without ever needing words at all just rhythm and motion and the way it carries you forward into something bigger than yourself into something vast and infinite and endless.
tools/workflow-testing/test1.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ This is test file number 1.
tools/workflow-testing/test2.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ This is test file number 2.
tools/workflow-testing/test3.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ This is test file number 3.
tools/workflow-testing/test4.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ This is test file number 4.
tools/workflow-testing/test5.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ This is test file number 5.
tools/workflow-testing/urd-script_davanagari-test.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ यह टेस्ट फ़ाइल नंबर १ है।