import os
import numpy as np
from resemblyzer import VoiceEncoder, preprocess_wav
from pathlib import Path
from pydub import AudioSegment
from pydub.effects import normalize
import noisereduce as nr
import librosa
import soundfile as sf

def preprocess_audio(input_path, output_path):
    # Normalize volume
    sound = AudioSegment.from_file(input_path)
    normalized_sound = normalize(sound)
    normalized_path = "temp_normalized.wav"
    normalized_sound.export(normalized_path, format="wav")

    # Noise reduction
    y, sr = librosa.load(normalized_path, sr=None)
    reduced_noise = nr.reduce_noise(y=y, sr=sr)
    sf.write(output_path, reduced_noise, sr)

    # Cleanup temp file
    os.remove(normalized_path)


def compare_voices(ref_file, test_file, threshold=0.80):
    encoder = VoiceEncoder(device="cpu")

    # Preprocess
    clean_ref = "clean_reference.wav"
    clean_test = "clean_test.wav"
    preprocess_audio(ref_file, clean_ref)
    preprocess_audio(test_file, clean_test)

    # Embeddings
    ref_wav = preprocess_wav(Path(clean_ref))
    test_wav = preprocess_wav(Path(clean_test))
    ref_emb = encoder.embed_utterance(ref_wav)
    test_emb = encoder.embed_utterance(test_wav)

    # Similarity
    similarity = np.inner(ref_emb, test_emb)

    # Cleanup processed files
    os.remove(clean_ref)
    os.remove(clean_test)

    return float(similarity), bool(similarity >= threshold)

