import torch from transformers import AutoImageProcessor, AutoModelForImageClassification from process_audio import create_mel_spectrograms def predict_image(image): device = "gpu" if torch.cuda.is_available() else "cpu" processor = AutoImageProcessor.from_pretrained("kubinooo/convnext-tiny-224-audio-deepfake-classification") model = AutoModelForImageClassification.from_pretrained("kubinooo/convnext-tiny-224-audio-deepfake-classification") if image.mode != 'RGB': image = image.convert('RGB') image = image image.resize((224, 224)) pixel_values = processor(image, return_tensors="pt").pixel_values model.to(device) model.eval() with torch.no_grad(): outputs = model(pixel_values) logits = outputs.logits predicted_class_idx = logits.argmax(-1).item() prediction = model.config.id2label[predicted_class_idx] if prediction.lower() == "real": return {"real": 1.0, "fake": 0.0} # 100% confidence for real else: # prediction == "fake" return {"real": 0.0, "fake": 1.0} def prediction(file_path): total_real = 0.0 total_fake = 0.0 pil_images = create_mel_spectrograms(file_path, 2, 0) for image in pil_images: pred = predict_image(image) total_real += pred["real"] total_fake += pred["fake"] total = len(pil_images) if total == 0: return {"real": 0.0, "fake": 0.0} return { "real": round(total_real / total, 2), "fake": round(total_fake / total, 2) }