diff --git "a/run.ipynb" "b/run.ipynb"
--- "a/run.ipynb"
+++ "b/run.ipynb"
@@ -71,7 +71,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "./reference_audio/vn_2.wav\n"
+ "./reference_audio/vn_3.wav\n"
]
},
{
@@ -79,7 +79,7 @@
"text/html": [
"\n",
" \n",
" "
@@ -103,7 +103,7 @@
"text/html": [
"\n",
" \n",
" "
@@ -119,18 +119,18 @@
"source": [
"speakers = {\n",
" \"id_1\": {\n",
- " \"path\": \"./reference_audio/vn_2.wav\", #Ref audio path\n",
+ " \"path\": \"./reference_audio/vn_3.wav\", #Ref audio path\n",
" \"lang\": \"vi\", #Default language\n",
- " \"speed\": 1.2, #Speaking speed\n",
+ " \"speed\": 1.1, #Speaking speed\n",
" },\n",
" \"id_2\": {\n",
" \"path\": \"./reference_audio/vn_4.wav\",\n",
" \"lang\": \"vi\",\n",
- " \"speed\": 1.2,\n",
+ " \"speed\": 1.1,\n",
" },\n",
"}\n",
"for id in speakers:\n",
- " max_samples = 24000*30 #max 30 seconds ref audio\n",
+ " max_samples = 24000*20 #max 20 seconds ref audio\n",
" print(speakers[id]['path'])\n",
" wave, sr = librosa.load(speakers[id]['path'], sr=24000)\n",
" audio, index = librosa.effects.trim(wave, top_db=30)\n",
@@ -192,7 +192,7 @@
"avg_style = True #BOOL Split the ref audio and calculate the avg styles.\n",
"stabilize = True #BOOL Stabilize speaking speed.\n",
"denoise = 0.6 #FLOAT Adjust the strength of the denoiser. Value range is [0, 1]\n",
- "n_merge = 16 #INT Avoid short sentences by merging when a sentence has fewer than n words"
+ "n_merge = 20 #INT Avoid short sentences by merging when a sentence has fewer than n words"
]
},
{
@@ -205,7 +205,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Computing the style for: ./reference_audio/vn_2.wav\n",
+ "Computing the style for: ./reference_audio/vn_3.wav\n",
"Computing the style for: ./reference_audio/vn_4.wav\n",
"Generating Audio...\n",
"Synthesized:\n"
@@ -216,7 +216,7 @@
"text/html": [
"\n",
" \n",
" "