|
> Training Environment: |
|
| > Backend: Torch |
|
| > Mixed precision: False |
|
| > Precision: float32 |
|
| > Current device: 0 |
|
| > Num. of GPUs: 1 |
|
| > Num. of CPUs: 64 |
|
| > Num. of Torch Threads: 1 |
|
| > Torch seed: 1 |
|
| > Torch CUDNN: True |
|
| > Torch CUDNN deterministic: False |
|
| > Torch CUDNN benchmark: False |
|
| > Torch TF32 MatMul: False |
|
> Start Tensorboard: tensorboard --logdir=/workspace/run/training/GPT_XTTS_v2.0_LJSpeech_FT-April-23-2024_12+18PM-0000000 |
|
|
|
> Model has 518442047 parameters |
|
|
|
[4m[1m > EPOCH: 0/1000[0m |
|
--> /workspace/run/training/GPT_XTTS_v2.0_LJSpeech_FT-April-23-2024_12+18PM-0000000 |
|
|
|
[1m > TRAINING (2024-04-23 12:18:40) [0m |
|
|
|
[1m --> TIME: 2024-04-23 12:18:42 -- STEP: 0/1695 -- GLOBAL_STEP: 0[0m |
|
| > loss_text_ce: 0.042592838406562805 (0.042592838406562805) |
|
| > loss_mel_ce: 3.744250535964966 (3.744250535964966) |
|
| > loss: 0.04508147016167641 (0.04508147016167641) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.3181 (0.3181343078613281) |
|
| > loader_time: 1.1535 (1.153491735458374) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:18:50 -- STEP: 50/1695 -- GLOBAL_STEP: 50[0m |
|
| > loss_text_ce: 0.043245986104011536 (0.045777649357914924) |
|
| > loss_mel_ce: 4.0826735496521 (3.678379626274109) |
|
| > loss: 0.04911808669567108 (0.044335206523537646) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1173 (0.10748531341552735) |
|
| > loader_time: 0.0038 (0.012436685562133789) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:19:00 -- STEP: 100/1695 -- GLOBAL_STEP: 100[0m |
|
| > loss_text_ce: 0.04654935747385025 (0.04617325332015751) |
|
| > loss_mel_ce: 3.7310783863067627 (3.6352836871147156) |
|
| > loss: 0.044971760362386703 (0.04382686924189331) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1229 (0.1165578818321228) |
|
| > loader_time: 0.0044 (0.010995228290557862) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:19:10 -- STEP: 150/1695 -- GLOBAL_STEP: 150[0m |
|
| > loss_text_ce: 0.04864665865898132 (0.04633487790822981) |
|
| > loss_mel_ce: 3.695878267288208 (3.5984654172261554) |
|
| > loss: 0.04457768052816391 (0.04339048052827519) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.0968 (0.12075453917185465) |
|
| > loader_time: 0.0068 (0.009986537297566734) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:19:21 -- STEP: 200/1695 -- GLOBAL_STEP: 200[0m |
|
| > loss_text_ce: 0.04507960379123688 (0.04615468136966227) |
|
| > loss_mel_ce: 3.4362077713012695 (3.5497735607624055) |
|
| > loss: 0.041443899273872375 (0.042808670215308674) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1431 (0.12541004419326782) |
|
| > loader_time: 0.004 (0.009364948272705077) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:19:33 -- STEP: 250/1695 -- GLOBAL_STEP: 250[0m |
|
| > loss_text_ce: 0.044978540390729904 (0.04600780452787875) |
|
| > loss_mel_ce: 3.3835601806640625 (3.5098479528427124) |
|
| > loss: 0.040815938264131546 (0.04233161683380605) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1506 (0.12965419387817378) |
|
| > loader_time: 0.0043 (0.008812045097351074) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:19:45 -- STEP: 300/1695 -- GLOBAL_STEP: 300[0m |
|
| > loss_text_ce: 0.04761254042387009 (0.046099709086120134) |
|
| > loss_mel_ce: 3.859790325164795 (3.4856272101402284) |
|
| > loss: 0.04651670157909393 (0.04204436879605055) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.107 (0.13299476464589427) |
|
| > loader_time: 0.0045 (0.008340648015340164) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:19:57 -- STEP: 350/1695 -- GLOBAL_STEP: 350[0m |
|
| > loss_text_ce: 0.041058849543333054 (0.04608927173273904) |
|
| > loss_mel_ce: 3.2493679523468018 (3.4584123958860125) |
|
| > loss: 0.039171747863292694 (0.04172025864677771) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1586 (0.1357990046909877) |
|
| > loader_time: 0.0092 (0.007999198096139085) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:20:09 -- STEP: 400/1695 -- GLOBAL_STEP: 400[0m |
|
| > loss_text_ce: 0.0439525842666626 (0.04606584513559937) |
|
| > loss_mel_ce: 3.5535271167755127 (3.4283770048618316) |
|
| > loss: 0.04282714053988457 (0.04136241558939219) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1314 (0.13862687826156628) |
|
| > loader_time: 0.0039 (0.007809545397758481) |
|
|
|
|
|
> CHECKPOINT : /workspace/run/training/GPT_XTTS_v2.0_LJSpeech_FT-April-23-2024_12+18PM-0000000/checkpoint_400.pth |
|
|
|
[1m --> TIME: 2024-04-23 12:20:24 -- STEP: 450/1695 -- GLOBAL_STEP: 450[0m |
|
| > loss_text_ce: 0.05098263919353485 (0.04611284121870995) |
|
| > loss_mel_ce: 2.9446003437042236 (3.401099606090122) |
|
| > loss: 0.03566170483827591 (0.04103824413898918) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1581 (0.1393417421976725) |
|
| > loader_time: 0.0041 (0.0075671084721883105) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:20:36 -- STEP: 500/1695 -- GLOBAL_STEP: 500[0m |
|
| > loss_text_ce: 0.03936528041958809 (0.04605886636674404) |
|
| > loss_mel_ce: 3.534381628036499 (3.3785955691337586) |
|
| > loss: 0.04254460707306862 (0.04076969639584422) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1362 (0.1412919845581054) |
|
| > loader_time: 0.0044 (0.007305326461791989) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:20:49 -- STEP: 550/1695 -- GLOBAL_STEP: 550[0m |
|
| > loss_text_ce: 0.043622393161058426 (0.04607608911666003) |
|
| > loss_mel_ce: 3.36867618560791 (3.351200197826734) |
|
| > loss: 0.04062260314822197 (0.04044376604597676) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1491 (0.1432263898849487) |
|
| > loader_time: 0.0044 (0.007147459983825681) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:21:01 -- STEP: 600/1695 -- GLOBAL_STEP: 600[0m |
|
| > loss_text_ce: 0.04180557280778885 (0.04603437863911191) |
|
| > loss_mel_ce: 3.1069161891937256 (3.328243460655214) |
|
| > loss: 0.0374847836792469 (0.04016997500322759) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1583 (0.14447109142939243) |
|
| > loader_time: 0.0047 (0.006965583960215248) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:21:14 -- STEP: 650/1695 -- GLOBAL_STEP: 650[0m |
|
| > loss_text_ce: 0.04896671324968338 (0.04602042846381666) |
|
| > loss_mel_ce: 3.0476784706115723 (3.3038424359835123) |
|
| > loss: 0.03686482459306717 (0.03987932055042337) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1219 (0.14626641933734613) |
|
| > loader_time: 0.0047 (0.006803958232586197) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:21:27 -- STEP: 700/1695 -- GLOBAL_STEP: 700[0m |
|
| > loss_text_ce: 0.04512707144021988 (0.046030817106366195) |
|
| > loss_mel_ce: 3.066598892211914 (3.2816116438593195) |
|
| > loss: 0.037044357508420944 (0.03961479195526668) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1502 (0.14775025640215206) |
|
| > loader_time: 0.0044 (0.006717268739427837) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:21:40 -- STEP: 750/1695 -- GLOBAL_STEP: 750[0m |
|
| > loss_text_ce: 0.04244884476065636 (0.04599520656466488) |
|
| > loss_mel_ce: 2.8379921913146973 (3.264411670366924) |
|
| > loss: 0.034290965646505356 (0.03940960643688838) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.218 (0.14881795597076428) |
|
| > loader_time: 0.0049 (0.006605740865071612) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:21:53 -- STEP: 800/1695 -- GLOBAL_STEP: 800[0m |
|
| > loss_text_ce: 0.04257930815219879 (0.04597263523377482) |
|
| > loss_mel_ce: 2.8074073791503906 (3.2470336309075365) |
|
| > loss: 0.033928416669368744 (0.03920245631132278) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.151 (0.14975822657346743) |
|
| > loader_time: 0.0045 (0.006505406498908994) |
|
|
|
|
|
> CHECKPOINT : /workspace/run/training/GPT_XTTS_v2.0_LJSpeech_FT-April-23-2024_12+18PM-0000000/checkpoint_800.pth |
|
|
|
[1m --> TIME: 2024-04-23 12:22:08 -- STEP: 850/1695 -- GLOBAL_STEP: 850[0m |
|
| > loss_text_ce: 0.046279508620500565 (0.04595626743400801) |
|
| > loss_mel_ce: 2.9114205837249756 (3.232562539998224) |
|
| > loss: 0.03521071374416351 (0.0390299865691101) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1686 (0.14989260813769187) |
|
| > loader_time: 0.0041 (0.006382525107439824) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:22:21 -- STEP: 900/1695 -- GLOBAL_STEP: 900[0m |
|
| > loss_text_ce: 0.04815426096320152 (0.045925861448049575) |
|
| > loss_mel_ce: 2.881121873855591 (3.21540697336197) |
|
| > loss: 0.03487233817577362 (0.03882539166758459) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1067 (0.15040262672636256) |
|
| > loader_time: 0.0163 (0.006299734380510116) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:22:33 -- STEP: 950/1695 -- GLOBAL_STEP: 950[0m |
|
| > loss_text_ce: 0.046194590628147125 (0.045895876723684795) |
|
| > loss_mel_ce: 2.452665328979492 (3.2002050801327364) |
|
| > loss: 0.029748331755399704 (0.03864405976706431) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1483 (0.1510076773794075) |
|
| > loader_time: 0.0041 (0.006192966511375024) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:22:46 -- STEP: 1000/1695 -- GLOBAL_STEP: 1000[0m |
|
| > loss_text_ce: 0.04607674479484558 (0.04585176565870645) |
|
| > loss_mel_ce: 2.9387059211730957 (3.187430265903474) |
|
| > loss: 0.035533126443624496 (0.03849145351536573) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1648 (0.15175995016098034) |
|
| > loader_time: 0.0044 (0.006122385978698729) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:22:59 -- STEP: 1050/1695 -- GLOBAL_STEP: 1050[0m |
|
| > loss_text_ce: 0.0466134138405323 (0.045852795899623947) |
|
| > loss_mel_ce: 2.9738194942474365 (3.172598667598907) |
|
| > loss: 0.03595753759145737 (0.03831489915826492) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1396 (0.15251214708600735) |
|
| > loader_time: 0.0049 (0.00607424667903355) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:23:12 -- STEP: 1100/1695 -- GLOBAL_STEP: 1100[0m |
|
| > loss_text_ce: 0.04659873992204666 (0.04585135899822824) |
|
| > loss_mel_ce: 2.4221293926239014 (3.1576039728251373) |
|
| > loss: 0.029389619827270508 (0.038136373775249206) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.186 (0.15299982179294946) |
|
| > loader_time: 0.0048 (0.006017334894700482) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:23:25 -- STEP: 1150/1695 -- GLOBAL_STEP: 1150[0m |
|
| > loss_text_ce: 0.043769825249910355 (0.045824583464342636) |
|
| > loss_mel_ce: 2.859921455383301 (3.1463320172351352) |
|
| > loss: 0.034567754715681076 (0.03800186507079915) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1919 (0.15354946157206664) |
|
| > loader_time: 0.0045 (0.005964664376300311) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:23:38 -- STEP: 1200/1695 -- GLOBAL_STEP: 1200[0m |
|
| > loss_text_ce: 0.04848972707986832 (0.0457837945688516) |
|
| > loss_mel_ce: 2.9194998741149902 (3.13719070851803) |
|
| > loss: 0.035333212465047836 (0.037892554394590376) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.2642 (0.15420349061489103) |
|
| > loader_time: 0.0046 (0.005914180874824522) |
|
|
|
|
|
> CHECKPOINT : /workspace/run/training/GPT_XTTS_v2.0_LJSpeech_FT-April-23-2024_12+18PM-0000000/checkpoint_1200.pth |
|
|
|
[1m --> TIME: 2024-04-23 12:23:54 -- STEP: 1250/1695 -- GLOBAL_STEP: 1250[0m |
|
| > loss_text_ce: 0.044037092477083206 (0.04573154278397562) |
|
| > loss_mel_ce: 2.6508209705352783 (3.125970713424685) |
|
| > loss: 0.03208164498209953 (0.03775836098492144) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1472 (0.15428158359527583) |
|
| > loader_time: 0.0044 (0.0058847253799438485) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:24:07 -- STEP: 1300/1695 -- GLOBAL_STEP: 1300[0m |
|
| > loss_text_ce: 0.04510482773184776 (0.04571826306959757) |
|
| > loss_mel_ce: 3.4077906608581543 (3.1158635647480315) |
|
| > loss: 0.04110589995980263 (0.03763787967797653) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1634 (0.1545918438984798) |
|
| > loader_time: 0.0047 (0.00584103455910316) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:24:19 -- STEP: 1350/1695 -- GLOBAL_STEP: 1350[0m |
|
| > loss_text_ce: 0.0476665161550045 (0.0456638012136574) |
|
| > loss_mel_ce: 2.8584489822387695 (3.1040570794211506) |
|
| > loss: 0.03459661453962326 (0.03749667791994631) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1331 (0.15490423820636887) |
|
| > loader_time: 0.0044 (0.0058183479309082044) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:24:32 -- STEP: 1400/1695 -- GLOBAL_STEP: 1400[0m |
|
| > loss_text_ce: 0.04452496021986008 (0.04561551003051656) |
|
| > loss_mel_ce: 3.234622001647949 (3.0916232017108385) |
|
| > loss: 0.03903746232390404 (0.03734808066327656) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1215 (0.15542454413005272) |
|
| > loader_time: 0.0045 (0.005781678301947453) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:24:45 -- STEP: 1450/1695 -- GLOBAL_STEP: 1450[0m |
|
| > loss_text_ce: 0.042180027812719345 (0.04556434659608479) |
|
| > loss_mel_ce: 2.699432134628296 (3.080473143150068) |
|
| > loss: 0.03263824060559273 (0.037214732784135576) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.167 (0.15565427286871544) |
|
| > loader_time: 0.0044 (0.005737697502662392) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:24:57 -- STEP: 1500/1695 -- GLOBAL_STEP: 1500[0m |
|
| > loss_text_ce: 0.04820888489484787 (0.04552951066195965) |
|
| > loss_mel_ce: 2.6011390686035156 (3.0704109377861033) |
|
| > loss: 0.031539857387542725 (0.037094529901941585) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1594 (0.15576313861211125) |
|
| > loader_time: 0.0044 (0.005703491051991777) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:25:11 -- STEP: 1550/1695 -- GLOBAL_STEP: 1550[0m |
|
| > loss_text_ce: 0.045843496918678284 (0.04549794487655163) |
|
| > loss_mel_ce: 2.6503143310546875 (3.059678205059422) |
|
| > loss: 0.032097119837999344 (0.036966383488428164) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1592 (0.15621070800289008) |
|
| > loader_time: 0.0046 (0.005665828643306605) |
|
|
|
|
|
[1m --> TIME: 2024-04-23 12:25:24 -- STEP: 1600/1695 -- GLOBAL_STEP: 1600[0m |
|
| > loss_text_ce: 0.04320811480283737 (0.045465721643995496) |
|
| > loss_mel_ce: 2.5281929969787598 (3.049819415509702) |
|
| > loss: 0.0306119192391634 (0.03684863331844095) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1849 (0.15655839025974236) |
|
| > loader_time: 0.0047 (0.005632958114147183) |
|
|
|
|
|
> CHECKPOINT : /workspace/run/training/GPT_XTTS_v2.0_LJSpeech_FT-April-23-2024_12+18PM-0000000/checkpoint_1600.pth |
|
|
|
[1m --> TIME: 2024-04-23 12:25:39 -- STEP: 1650/1695 -- GLOBAL_STEP: 1650[0m |
|
| > loss_text_ce: 0.04909869655966759 (0.04544659794957349) |
|
| > loss_mel_ce: 2.6178195476531982 (3.039313706195717) |
|
| > loss: 0.03174902871251106 (0.036723337676940526) |
|
| > current_lr: 4e-06 |
|
| > step_time: 0.1669 (0.1564438345938015) |
|
| > loader_time: 0.0042 (0.005605537674643773) |
|
|
|
! Run is kept in /workspace/run/training/GPT_XTTS_v2.0_LJSpeech_FT-April-23-2024_12+18PM-0000000 |
|
|