Spaces:

sereich
/

BroadcastAudioUpscaling

Running

App Files Files Community

sereich commited on Sep 28, 2024

Commit

3551fa7

1 Parent(s): 872a4c6

Add GPU support when available, use proportional fading.

Browse files

Files changed (1) hide show

processAudio.py +11 -8

processAudio.py CHANGED Viewed

@@ -17,7 +17,7 @@ from src.utils import bold
 logger = logging.getLogger(__name__)
 SEGMENT_DURATION_SEC = 5
-SEGMENT_OVERLAP_SAMPLES = 2048
 SERIALIZE_KEY_STATE = 'state'
 def _load_model(checkpoint_file="models/FM_Radio_SR.th",model_name="aero"):
@@ -30,16 +30,18 @@ def _load_model(checkpoint_file="models/FM_Radio_SR.th",model_name="aero"):
     return model
-def crossfade_and_blend(out_clip, in_clip):
-    fade_out = torchaudio.transforms.Fade(0,SEGMENT_OVERLAP_SAMPLES)
-    fade_in = torchaudio.transforms.Fade(SEGMENT_OVERLAP_SAMPLES, 0)
     return fade_out(out_clip) + fade_in(in_clip)
 def upscaleAudio(lr_sig, checkpoint_file: str, sr=44100, hr_sr=44100, model_name="aero", progress=Progress()):
     model = _load_model(checkpoint_file,model_name)
     device = torch.device('cpu')
-    #model.cuda()
     logger.info(f'lr wav shape: {lr_sig.shape}')
@@ -55,7 +57,8 @@ def upscaleAudio(lr_sig, checkpoint_file: str, sr=44100, hr_sr=44100, model_name
     pr_chunks = []
-    lr_segment_overlap_samples = int((sr/hr_sr) * SEGMENT_OVERLAP_SAMPLES)
     model.eval()
     pred_start = time.time()
@@ -68,8 +71,8 @@ def upscaleAudio(lr_sig, checkpoint_file: str, sr=44100, hr_sr=44100, model_name
             if previous_chunk is not None:
                 combined_chunk = torch.cat((previous_chunk[...,-lr_segment_overlap_samples:], lr_chunk), 1)
                 pr_combined_chunk = model(combined_chunk.unsqueeze(0).to(device)).squeeze(0)
-                pr_chunk = pr_combined_chunk[...,SEGMENT_OVERLAP_SAMPLES:]
-                pr_chunks[-1][...,-SEGMENT_OVERLAP_SAMPLES:] = crossfade_and_blend(pr_chunks[-1][...,-SEGMENT_OVERLAP_SAMPLES:], pr_combined_chunk.cpu()[...,:SEGMENT_OVERLAP_SAMPLES] )
             else:
                 pr_chunk = model(lr_chunk.unsqueeze(0).to(device)).squeeze(0)
             logger.info(f'lr chunk {i} shape: {lr_chunk.shape}')

 logger = logging.getLogger(__name__)
 SEGMENT_DURATION_SEC = 5
+SEGMENT_OVERLAP_RATIO = 0.25
 SERIALIZE_KEY_STATE = 'state'
 def _load_model(checkpoint_file="models/FM_Radio_SR.th",model_name="aero"):
     return model
+def crossfade_and_blend(out_clip, in_clip, segment_overlap_samples):
+    fade_out = torchaudio.transforms.Fade(0,segment_overlap_samples)
+    fade_in = torchaudio.transforms.Fade(segment_overlap_samples, 0)
     return fade_out(out_clip) + fade_in(in_clip)
 def upscaleAudio(lr_sig, checkpoint_file: str, sr=44100, hr_sr=44100, model_name="aero", progress=Progress()):
     model = _load_model(checkpoint_file,model_name)
     device = torch.device('cpu')
+    if torch.cuda.is_available():
+        device = torch.device('cuda')
+        model.cuda()
     logger.info(f'lr wav shape: {lr_sig.shape}')
     pr_chunks = []
+    lr_segment_overlap_samples = int(sr*SEGMENT_OVERLAP_RATIO)
+    hr_segment_overlap_samples = int(hr_sr*SEGMENT_OVERLAP_RATIO)
     model.eval()
     pred_start = time.time()
             if previous_chunk is not None:
                 combined_chunk = torch.cat((previous_chunk[...,-lr_segment_overlap_samples:], lr_chunk), 1)
                 pr_combined_chunk = model(combined_chunk.unsqueeze(0).to(device)).squeeze(0)
+                pr_chunk = pr_combined_chunk[...,hr_segment_overlap_samples:]
+                pr_chunks[-1][...,-hr_segment_overlap_samples:] = crossfade_and_blend(pr_chunks[-1][...,-hr_segment_overlap_samples:], pr_combined_chunk.cpu()[...,:hr_segment_overlap_samples], hr_segment_overlap_samples )
             else:
                 pr_chunk = model(lr_chunk.unsqueeze(0).to(device)).squeeze(0)
             logger.info(f'lr chunk {i} shape: {lr_chunk.shape}')