BeyondCX_Insights/tests/unit/test_transcription.py

"""
CXInsights - Transcription Module Tests

Unit tests for transcription models and utilities.
Does NOT test actual API calls (those are in integration tests).
"""

from datetime import datetime
from pathlib import Path

import pytest

from src.transcription.models import (
    AudioMetadata,
    SpeakerTurn,
    Transcript,
    TranscriptMetadata,
    TranscriptionConfig,
    TranscriptionError,
    TranscriptionResult,
    TranscriptionStatus,
)


class TestSpeakerTurn:
    """Tests for SpeakerTurn model."""

    def test_create_valid_turn(self):
        """Test creating a valid speaker turn."""
        turn = SpeakerTurn(
            speaker="A",
            text="Hola, buenos días",
            start_time=0.0,
            end_time=2.5,
            confidence=0.95,
        )

        assert turn.speaker == "A"
        assert turn.text == "Hola, buenos días"
        assert turn.start_time == 0.0
        assert turn.end_time == 2.5
        assert turn.confidence == 0.95

    def test_duration_computed(self):
        """Test that duration is computed correctly."""
        turn = SpeakerTurn(
            speaker="A",
            text="Test",
            start_time=10.0,
            end_time=15.5,
        )

        assert turn.duration_sec == 5.5

    def test_word_count_computed(self):
        """Test that word count is computed correctly."""
        turn = SpeakerTurn(
            speaker="A",
            text="Esto es una prueba de conteo de palabras",
            start_time=0.0,
            end_time=5.0,
        )

        assert turn.word_count == 7

    def test_empty_text_word_count(self):
        """Test word count with empty text."""
        turn = SpeakerTurn(
            speaker="A",
            text="",
            start_time=0.0,
            end_time=1.0,
        )

        assert turn.word_count == 1  # Empty string splits to ['']

    def test_confidence_optional(self):
        """Test that confidence is optional."""
        turn = SpeakerTurn(
            speaker="A",
            text="Test",
            start_time=0.0,
            end_time=1.0,
        )

        assert turn.confidence is None


class TestTranscriptMetadata:
    """Tests for TranscriptMetadata model."""

    def test_create_metadata(self):
        """Test creating transcript metadata."""
        metadata = TranscriptMetadata(
            audio_duration_sec=420.5,
            audio_file="call_001.mp3",
            language="es",
            provider="assemblyai",
            job_id="abc123",
        )

        assert metadata.audio_duration_sec == 420.5
        assert metadata.audio_file == "call_001.mp3"
        assert metadata.language == "es"
        assert metadata.provider == "assemblyai"
        assert metadata.job_id == "abc123"

    def test_created_at_default(self):
        """Test that created_at defaults to now."""
        metadata = TranscriptMetadata(
            audio_duration_sec=100.0,
            audio_file="test.mp3",
            provider="assemblyai",
        )

        assert metadata.created_at is not None
        assert isinstance(metadata.created_at, datetime)


class TestTranscript:
    """Tests for Transcript model."""

    @pytest.fixture
    def sample_transcript(self):
        """Create a sample transcript for testing."""
        return Transcript(
            call_id="CALL001",
            turns=[
                SpeakerTurn(
                    speaker="agent",
                    text="Buenos días, ¿en qué puedo ayudarle?",
                    start_time=0.0,
                    end_time=3.0,
                ),
                SpeakerTurn(
                    speaker="customer",
                    text="Quiero cancelar mi servicio",
                    start_time=3.5,
                    end_time=6.0,
                ),
                SpeakerTurn(
                    speaker="agent",
                    text="Entiendo, ¿me puede indicar el motivo?",
                    start_time=6.5,
                    end_time=9.0,
                ),
            ],
            metadata=TranscriptMetadata(
                audio_duration_sec=420.0,
                audio_file="CALL001.mp3",
                provider="assemblyai",
                speaker_count=2,
            ),
        )

    def test_total_turns(self, sample_transcript):
        """Test total turns count."""
        assert sample_transcript.total_turns == 3

    def test_total_words(self, sample_transcript):
        """Test total words count."""
        # "Buenos días, ¿en qué puedo ayudarle?" = 6 words
        # "Quiero cancelar mi servicio" = 4 words
        # "Entiendo, ¿me puede indicar el motivo?" = 6 words
        assert sample_transcript.total_words == 16

    def test_get_full_text(self, sample_transcript):
        """Test getting full text."""
        full_text = sample_transcript.get_full_text()
        assert "Buenos días" in full_text
        assert "cancelar mi servicio" in full_text

    def test_get_speaker_text(self, sample_transcript):
        """Test getting text for a specific speaker."""
        agent_text = sample_transcript.get_speaker_text("agent")
        customer_text = sample_transcript.get_speaker_text("customer")

        assert "Buenos días" in agent_text
        assert "cancelar" not in agent_text
        assert "cancelar mi servicio" in customer_text

    def test_get_speakers(self, sample_transcript):
        """Test getting unique speakers."""
        speakers = sample_transcript.get_speakers()

        assert len(speakers) == 2
        assert "agent" in speakers
        assert "customer" in speakers


class TestTranscriptionResult:
    """Tests for TranscriptionResult model."""

    def test_success_result(self):
        """Test creating a successful result."""
        transcript = Transcript(
            call_id="CALL001",
            turns=[],
            metadata=TranscriptMetadata(
                audio_duration_sec=100.0,
                audio_file="test.mp3",
                provider="assemblyai",
            ),
        )

        result = TranscriptionResult.success(
            call_id="CALL001",
            audio_path=Path("test.mp3"),
            transcript=transcript,
        )

        assert result.status == TranscriptionStatus.COMPLETED
        assert result.is_success is True
        assert result.transcript is not None
        assert result.error is None

    def test_failure_result(self):
        """Test creating a failed result."""
        result = TranscriptionResult.failure(
            call_id="CALL001",
            audio_path=Path("test.mp3"),
            error=TranscriptionError.API_ERROR,
            error_message="Rate limit exceeded",
        )

        assert result.status == TranscriptionStatus.FAILED
        assert result.is_success is False
        assert result.transcript is None
        assert result.error == TranscriptionError.API_ERROR
        assert result.error_message == "Rate limit exceeded"

    def test_processing_time_computed(self):
        """Test processing time calculation."""
        result = TranscriptionResult(
            call_id="CALL001",
            audio_path="test.mp3",
            status=TranscriptionStatus.COMPLETED,
            started_at=datetime(2024, 1, 1, 12, 0, 0),
            completed_at=datetime(2024, 1, 1, 12, 0, 30),
        )

        assert result.processing_time_sec == 30.0


class TestAudioMetadata:
    """Tests for AudioMetadata model."""

    def test_create_metadata(self):
        """Test creating audio metadata."""
        metadata = AudioMetadata(
            file_path="/data/audio/call.mp3",
            file_size_bytes=5242880,  # 5 MB
            duration_sec=420.0,  # 7 minutes
            format="mp3",
            codec="mp3",
            sample_rate=44100,
            channels=2,
            bit_rate=128000,
        )

        assert metadata.file_path == "/data/audio/call.mp3"
        assert metadata.duration_sec == 420.0
        assert metadata.format == "mp3"

    def test_duration_minutes(self):
        """Test duration in minutes conversion."""
        metadata = AudioMetadata(
            file_path="test.mp3",
            file_size_bytes=1000000,
            duration_sec=420.0,
            format="mp3",
        )

        assert metadata.duration_minutes == 7.0

    def test_file_size_mb(self):
        """Test file size in MB conversion."""
        metadata = AudioMetadata(
            file_path="test.mp3",
            file_size_bytes=5242880,  # 5 MB
            duration_sec=100.0,
            format="mp3",
        )

        assert metadata.file_size_mb == 5.0


class TestTranscriptionConfig:
    """Tests for TranscriptionConfig model."""

    def test_default_config(self):
        """Test default configuration values."""
        config = TranscriptionConfig()

        assert config.language_code == "es"
        assert config.speaker_labels is True
        assert config.punctuate is True
        assert config.format_text is True
        assert config.auto_chapters is False

    def test_custom_config(self):
        """Test custom configuration."""
        config = TranscriptionConfig(
            language_code="en",
            speaker_labels=False,
            auto_chapters=True,
        )

        assert config.language_code == "en"
        assert config.speaker_labels is False
        assert config.auto_chapters is True


class TestTranscriptionError:
    """Tests for TranscriptionError enum."""

    def test_error_values(self):
        """Test that all error values are strings."""
        assert TranscriptionError.FILE_NOT_FOUND == "FILE_NOT_FOUND"
        assert TranscriptionError.API_ERROR == "API_ERROR"
        assert TranscriptionError.RATE_LIMITED == "RATE_LIMITED"
        assert TranscriptionError.TIMEOUT == "TIMEOUT"