""" CXInsights - Transcription Module Tests Unit tests for transcription models and utilities. Does NOT test actual API calls (those are in integration tests). """ from datetime import datetime from pathlib import Path import pytest from src.transcription.models import ( AudioMetadata, SpeakerTurn, Transcript, TranscriptMetadata, TranscriptionConfig, TranscriptionError, TranscriptionResult, TranscriptionStatus, ) class TestSpeakerTurn: """Tests for SpeakerTurn model.""" def test_create_valid_turn(self): """Test creating a valid speaker turn.""" turn = SpeakerTurn( speaker="A", text="Hola, buenos días", start_time=0.0, end_time=2.5, confidence=0.95, ) assert turn.speaker == "A" assert turn.text == "Hola, buenos días" assert turn.start_time == 0.0 assert turn.end_time == 2.5 assert turn.confidence == 0.95 def test_duration_computed(self): """Test that duration is computed correctly.""" turn = SpeakerTurn( speaker="A", text="Test", start_time=10.0, end_time=15.5, ) assert turn.duration_sec == 5.5 def test_word_count_computed(self): """Test that word count is computed correctly.""" turn = SpeakerTurn( speaker="A", text="Esto es una prueba de conteo de palabras", start_time=0.0, end_time=5.0, ) assert turn.word_count == 7 def test_empty_text_word_count(self): """Test word count with empty text.""" turn = SpeakerTurn( speaker="A", text="", start_time=0.0, end_time=1.0, ) assert turn.word_count == 1 # Empty string splits to [''] def test_confidence_optional(self): """Test that confidence is optional.""" turn = SpeakerTurn( speaker="A", text="Test", start_time=0.0, end_time=1.0, ) assert turn.confidence is None class TestTranscriptMetadata: """Tests for TranscriptMetadata model.""" def test_create_metadata(self): """Test creating transcript metadata.""" metadata = TranscriptMetadata( audio_duration_sec=420.5, audio_file="call_001.mp3", language="es", provider="assemblyai", job_id="abc123", ) assert metadata.audio_duration_sec == 420.5 assert metadata.audio_file == "call_001.mp3" assert metadata.language == "es" assert metadata.provider == "assemblyai" assert metadata.job_id == "abc123" def test_created_at_default(self): """Test that created_at defaults to now.""" metadata = TranscriptMetadata( audio_duration_sec=100.0, audio_file="test.mp3", provider="assemblyai", ) assert metadata.created_at is not None assert isinstance(metadata.created_at, datetime) class TestTranscript: """Tests for Transcript model.""" @pytest.fixture def sample_transcript(self): """Create a sample transcript for testing.""" return Transcript( call_id="CALL001", turns=[ SpeakerTurn( speaker="agent", text="Buenos días, ¿en qué puedo ayudarle?", start_time=0.0, end_time=3.0, ), SpeakerTurn( speaker="customer", text="Quiero cancelar mi servicio", start_time=3.5, end_time=6.0, ), SpeakerTurn( speaker="agent", text="Entiendo, ¿me puede indicar el motivo?", start_time=6.5, end_time=9.0, ), ], metadata=TranscriptMetadata( audio_duration_sec=420.0, audio_file="CALL001.mp3", provider="assemblyai", speaker_count=2, ), ) def test_total_turns(self, sample_transcript): """Test total turns count.""" assert sample_transcript.total_turns == 3 def test_total_words(self, sample_transcript): """Test total words count.""" # "Buenos días, ¿en qué puedo ayudarle?" = 6 words # "Quiero cancelar mi servicio" = 4 words # "Entiendo, ¿me puede indicar el motivo?" = 6 words assert sample_transcript.total_words == 16 def test_get_full_text(self, sample_transcript): """Test getting full text.""" full_text = sample_transcript.get_full_text() assert "Buenos días" in full_text assert "cancelar mi servicio" in full_text def test_get_speaker_text(self, sample_transcript): """Test getting text for a specific speaker.""" agent_text = sample_transcript.get_speaker_text("agent") customer_text = sample_transcript.get_speaker_text("customer") assert "Buenos días" in agent_text assert "cancelar" not in agent_text assert "cancelar mi servicio" in customer_text def test_get_speakers(self, sample_transcript): """Test getting unique speakers.""" speakers = sample_transcript.get_speakers() assert len(speakers) == 2 assert "agent" in speakers assert "customer" in speakers class TestTranscriptionResult: """Tests for TranscriptionResult model.""" def test_success_result(self): """Test creating a successful result.""" transcript = Transcript( call_id="CALL001", turns=[], metadata=TranscriptMetadata( audio_duration_sec=100.0, audio_file="test.mp3", provider="assemblyai", ), ) result = TranscriptionResult.success( call_id="CALL001", audio_path=Path("test.mp3"), transcript=transcript, ) assert result.status == TranscriptionStatus.COMPLETED assert result.is_success is True assert result.transcript is not None assert result.error is None def test_failure_result(self): """Test creating a failed result.""" result = TranscriptionResult.failure( call_id="CALL001", audio_path=Path("test.mp3"), error=TranscriptionError.API_ERROR, error_message="Rate limit exceeded", ) assert result.status == TranscriptionStatus.FAILED assert result.is_success is False assert result.transcript is None assert result.error == TranscriptionError.API_ERROR assert result.error_message == "Rate limit exceeded" def test_processing_time_computed(self): """Test processing time calculation.""" result = TranscriptionResult( call_id="CALL001", audio_path="test.mp3", status=TranscriptionStatus.COMPLETED, started_at=datetime(2024, 1, 1, 12, 0, 0), completed_at=datetime(2024, 1, 1, 12, 0, 30), ) assert result.processing_time_sec == 30.0 class TestAudioMetadata: """Tests for AudioMetadata model.""" def test_create_metadata(self): """Test creating audio metadata.""" metadata = AudioMetadata( file_path="/data/audio/call.mp3", file_size_bytes=5242880, # 5 MB duration_sec=420.0, # 7 minutes format="mp3", codec="mp3", sample_rate=44100, channels=2, bit_rate=128000, ) assert metadata.file_path == "/data/audio/call.mp3" assert metadata.duration_sec == 420.0 assert metadata.format == "mp3" def test_duration_minutes(self): """Test duration in minutes conversion.""" metadata = AudioMetadata( file_path="test.mp3", file_size_bytes=1000000, duration_sec=420.0, format="mp3", ) assert metadata.duration_minutes == 7.0 def test_file_size_mb(self): """Test file size in MB conversion.""" metadata = AudioMetadata( file_path="test.mp3", file_size_bytes=5242880, # 5 MB duration_sec=100.0, format="mp3", ) assert metadata.file_size_mb == 5.0 class TestTranscriptionConfig: """Tests for TranscriptionConfig model.""" def test_default_config(self): """Test default configuration values.""" config = TranscriptionConfig() assert config.language_code == "es" assert config.speaker_labels is True assert config.punctuate is True assert config.format_text is True assert config.auto_chapters is False def test_custom_config(self): """Test custom configuration.""" config = TranscriptionConfig( language_code="en", speaker_labels=False, auto_chapters=True, ) assert config.language_code == "en" assert config.speaker_labels is False assert config.auto_chapters is True class TestTranscriptionError: """Tests for TranscriptionError enum.""" def test_error_values(self): """Test that all error values are strings.""" assert TranscriptionError.FILE_NOT_FOUND == "FILE_NOT_FOUND" assert TranscriptionError.API_ERROR == "API_ERROR" assert TranscriptionError.RATE_LIMITED == "RATE_LIMITED" assert TranscriptionError.TIMEOUT == "TIMEOUT"