diff --git "a/run.ipynb" "b/run.ipynb" --- "a/run.ipynb" +++ "b/run.ipynb" @@ -71,7 +71,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "./reference_audio/vn_2.wav\n" + "./reference_audio/vn_3.wav\n" ] }, { @@ -79,7 +79,7 @@ "text/html": [ "\n", " \n", " " @@ -103,7 +103,7 @@ "text/html": [ "\n", " \n", " " @@ -119,18 +119,18 @@ "source": [ "speakers = {\n", " \"id_1\": {\n", - " \"path\": \"./reference_audio/vn_2.wav\", #Ref audio path\n", + " \"path\": \"./reference_audio/vn_3.wav\", #Ref audio path\n", " \"lang\": \"vi\", #Default language\n", - " \"speed\": 1.2, #Speaking speed\n", + " \"speed\": 1.1, #Speaking speed\n", " },\n", " \"id_2\": {\n", " \"path\": \"./reference_audio/vn_4.wav\",\n", " \"lang\": \"vi\",\n", - " \"speed\": 1.2,\n", + " \"speed\": 1.1,\n", " },\n", "}\n", "for id in speakers:\n", - " max_samples = 24000*30 #max 30 seconds ref audio\n", + " max_samples = 24000*20 #max 20 seconds ref audio\n", " print(speakers[id]['path'])\n", " wave, sr = librosa.load(speakers[id]['path'], sr=24000)\n", " audio, index = librosa.effects.trim(wave, top_db=30)\n", @@ -192,7 +192,7 @@ "avg_style = True #BOOL Split the ref audio and calculate the avg styles.\n", "stabilize = True #BOOL Stabilize speaking speed.\n", "denoise = 0.6 #FLOAT Adjust the strength of the denoiser. Value range is [0, 1]\n", - "n_merge = 16 #INT Avoid short sentences by merging when a sentence has fewer than n words" + "n_merge = 20 #INT Avoid short sentences by merging when a sentence has fewer than n words" ] }, { @@ -205,7 +205,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Computing the style for: ./reference_audio/vn_2.wav\n", + "Computing the style for: ./reference_audio/vn_3.wav\n", "Computing the style for: ./reference_audio/vn_4.wav\n", "Generating Audio...\n", "Synthesized:\n" @@ -216,7 +216,7 @@ "text/html": [ "\n", " \n", " "