import numpy as np
import librosa
import pyloudnorm as pyln
from scipy.signal import butter, sosfilt
# ---------- Config ----------
TARGET_LUFS = -16.0  # typical for spoken-word web audio (mono)
OK_RANGE = 2.0       # +/- LU
FS_MIN = 16000       # minimum acceptable sample rate for HF clarity
HPF = 100            # high-pass cutoff to ignore rumble
EPS = 1e-9

def load_mono(path, target_sr=32000):
    y, sr = librosa.load(path, sr=target_sr, mono=True)

    # High-pass to remove rumble (100Hz)
    # Design a 2nd-order Butterworth in SOS form using scipy
    wn = HPF / (sr / 2.0)                 # normalized cutoff (0..1)
    sos = butter(N=2, Wn=wn, btype='highpass', output='sos')
    y = sosfilt(sos, y)

    return y, sr

def integrated_lufs(y, sr):
    meter = pyln.Meter(sr)  # BS.1770 meter
    return meter.integrated_loudness(y)

def band_energy_ratio(y, sr, f_low=100, f_split=2000, f_high=7000, frame_len=2048, hop=512):
    S = np.abs(librosa.stft(y, n_fft=frame_len, hop_length=hop))**2
    freqs = librosa.fft_frequencies(sr=sr, n_fft=frame_len)
    lo = (freqs >= f_low) & (freqs < f_split)
    hi = (freqs >= f_split) & (freqs <= f_high)
    lo_e = S[lo].sum()
    hi_e = S[hi].sum()
    total = (S[(freqs >= f_low) & (freqs <= f_high)]).sum() + EPS
    return hi_e / total  # 0..1; higher => crisper

def snr_proxy(y, sr, frame_ms=30, silence_quantile=0.1):
    # crude SNR: estimate noise floor from quietest frames
    frame_len = int(sr * frame_ms / 1000)
    frames = librosa.util.frame(y, frame_length=frame_len, hop_length=frame_len//2, axis=0)
    rms = np.sqrt((frames**2).mean(axis=1) + EPS)
    noise = np.quantile(rms, silence_quantile)
    signal = np.median(rms)
    snr = 20*np.log10((signal + EPS)/(noise + EPS))
    return float(np.clip(snr, -20, 60))

def clarity_score(hfer, snr_db):
    # Map HFER and SNR to 0..1 clarity
    # HFER thresholds: <0.20 muffled, 0.20–0.35 moderate, >0.35 crisp
    hfer_part = np.interp(hfer, [0.12, 0.20, 0.35, 0.50], [0.0, 0.25, 0.7, 1.0])
    # SNR thresholds: <10dB noisy, 10–20 moderate, >25 good
    snr_part  = np.interp(snr_db, [5, 10, 20, 30], [0.0, 0.25, 0.7, 1.0])
    return 0.5*hfer_part + 0.5*snr_part

def loudness_score(lufs):
    # Score is best near TARGET_LUFS within +/-OK_RANGE; fade out beyond
    diff = abs(lufs - TARGET_LUFS)
    # 0 diff => 1.0; 2 LU => ~0.7; 6 LU => ~0.2
    return float(np.clip(np.interp(diff, [0, OK_RANGE, 6, 12], [1.0, 0.7, 0.2, 0.0]), 0, 1))

def fuse_scores(clarity, loud):
    # Weighted fusion; clarity slightly more important
    s = 0.6*clarity + 0.4*loud
    if s < 0.35: return 'poor'
    if s < 0.6:  return 'moderate'
    if s < 0.8:  return 'good'
    return 'excellent'

def judge_audio_quality(path):
    y, sr = load_mono(path, target_sr=max(FS_MIN, 32000))
    # guard for silence
    if np.max(np.abs(y)) < 1e-4:
        return {'rating': 'poor', 'lufs': None, 'notes': 'Audio is near-silent.'}

    lufs = integrated_lufs(y, sr)
    hfer = band_energy_ratio(y, sr)          # 2–7kHz energy ratio
    snr  = snr_proxy(y, sr)

    cscore = clarity_score(hfer, snr)
    lscore = loudness_score(lufs)
    rating = fuse_scores(cscore, lscore)

    notes = []
    if lufs is not None:
        if lufs < TARGET_LUFS - OK_RANGE:
            notes.append(f'too quiet ({lufs:.1f} LUFS)')
        elif lufs > TARGET_LUFS + OK_RANGE:
            notes.append(f'too loud ({lufs:.1f} LUFS)')
        else:
            notes.append(f'volume near target ({lufs:.1f} LUFS)')
    notes.append(f'HF ratio {hfer:.2f}')
    notes.append(f'SNR {snr:.1f} dB')

    return {
        'rating': rating,         # one of: poor, moderate, good, excellent
        'lufs': round(lufs, 1),
        'hfer': round(hfer, 3),
        'snr_db': round(snr, 1),
        'clarity_score': round(cscore, 3),
        'loudness_score': round(lscore, 3),
        'notes': '; '.join(notes)
    }

# Example:
# print(judge_audio_quality('sample.wav'))
