feat: Add Streamlit dashboard with Blueprint compliance (v2.1.0)

Dashboard Features: - 8 navigation sections: Overview, Outcomes, Poor CX, FCR, Churn, Agent, Call Explorer, Export - Beyond Brand Identity styling (colors #6D84E3, Outfit font) - RCA Sankey diagram (Driver → Outcome → Churn Risk flow) - Correlation heatmaps (driver co-occurrence, driver-outcome) - Outcome Deep Dive (root causes, correlation, duration analysis) - Export functionality (Excel, HTML, JSON) Blueprint Compliance: - FCR: 4 categories (Primera Llamada/Rellamada × Sin/Con Riesgo de Fuga) - Churn: Binary view (Sin Riesgo de Fuga / En Riesgo de Fuga) - Agent: Talento Para Replicar / Oportunidades de Mejora - Fixed FCR rate calculation (only FIRST_CALL counts as success) Technical: - Streamlit + Plotly for interactive visualizations - Light theme configuration (.streamlit/config.toml) - Fixed Plotly colorbar titlefont deprecation Documentation: - Updated PROJECT_CONTEXT.md, TODO.md, CHANGELOG.md - Added 4 new technical decisions (TD-014 to TD-017) - Created TROUBLESHOOTING.md with 10 common issues Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-19 16:27:30 +01:00
commit 75e7b9da3d
110 changed files with 28247 additions and 0 deletions
--- a/tests/unit/test_transcription.py
+++ b/tests/unit/test_transcription.py
@@ -0,0 +1,322 @@
+"""
+CXInsights - Transcription Module Tests
+
+Unit tests for transcription models and utilities.
+Does NOT test actual API calls (those are in integration tests).
+"""
+
+from datetime import datetime
+from pathlib import Path
+
+import pytest
+
+from src.transcription.models import (
+    AudioMetadata,
+    SpeakerTurn,
+    Transcript,
+    TranscriptMetadata,
+    TranscriptionConfig,
+    TranscriptionError,
+    TranscriptionResult,
+    TranscriptionStatus,
+)
+
+
+class TestSpeakerTurn:
+    """Tests for SpeakerTurn model."""
+
+    def test_create_valid_turn(self):
+        """Test creating a valid speaker turn."""
+        turn = SpeakerTurn(
+            speaker="A",
+            text="Hola, buenos días",
+            start_time=0.0,
+            end_time=2.5,
+            confidence=0.95,
+        )
+
+        assert turn.speaker == "A"
+        assert turn.text == "Hola, buenos días"
+        assert turn.start_time == 0.0
+        assert turn.end_time == 2.5
+        assert turn.confidence == 0.95
+
+    def test_duration_computed(self):
+        """Test that duration is computed correctly."""
+        turn = SpeakerTurn(
+            speaker="A",
+            text="Test",
+            start_time=10.0,
+            end_time=15.5,
+        )
+
+        assert turn.duration_sec == 5.5
+
+    def test_word_count_computed(self):
+        """Test that word count is computed correctly."""
+        turn = SpeakerTurn(
+            speaker="A",
+            text="Esto es una prueba de conteo de palabras",
+            start_time=0.0,
+            end_time=5.0,
+        )
+
+        assert turn.word_count == 7
+
+    def test_empty_text_word_count(self):
+        """Test word count with empty text."""
+        turn = SpeakerTurn(
+            speaker="A",
+            text="",
+            start_time=0.0,
+            end_time=1.0,
+        )
+
+        assert turn.word_count == 1  # Empty string splits to ['']
+
+    def test_confidence_optional(self):
+        """Test that confidence is optional."""
+        turn = SpeakerTurn(
+            speaker="A",
+            text="Test",
+            start_time=0.0,
+            end_time=1.0,
+        )
+
+        assert turn.confidence is None
+
+
+class TestTranscriptMetadata:
+    """Tests for TranscriptMetadata model."""
+
+    def test_create_metadata(self):
+        """Test creating transcript metadata."""
+        metadata = TranscriptMetadata(
+            audio_duration_sec=420.5,
+            audio_file="call_001.mp3",
+            language="es",
+            provider="assemblyai",
+            job_id="abc123",
+        )
+
+        assert metadata.audio_duration_sec == 420.5
+        assert metadata.audio_file == "call_001.mp3"
+        assert metadata.language == "es"
+        assert metadata.provider == "assemblyai"
+        assert metadata.job_id == "abc123"
+
+    def test_created_at_default(self):
+        """Test that created_at defaults to now."""
+        metadata = TranscriptMetadata(
+            audio_duration_sec=100.0,
+            audio_file="test.mp3",
+            provider="assemblyai",
+        )
+
+        assert metadata.created_at is not None
+        assert isinstance(metadata.created_at, datetime)
+
+
+class TestTranscript:
+    """Tests for Transcript model."""
+
+    @pytest.fixture
+    def sample_transcript(self):
+        """Create a sample transcript for testing."""
+        return Transcript(
+            call_id="CALL001",
+            turns=[
+                SpeakerTurn(
+                    speaker="agent",
+                    text="Buenos días, ¿en qué puedo ayudarle?",
+                    start_time=0.0,
+                    end_time=3.0,
+                ),
+                SpeakerTurn(
+                    speaker="customer",
+                    text="Quiero cancelar mi servicio",
+                    start_time=3.5,
+                    end_time=6.0,
+                ),
+                SpeakerTurn(
+                    speaker="agent",
+                    text="Entiendo, ¿me puede indicar el motivo?",
+                    start_time=6.5,
+                    end_time=9.0,
+                ),
+            ],
+            metadata=TranscriptMetadata(
+                audio_duration_sec=420.0,
+                audio_file="CALL001.mp3",
+                provider="assemblyai",
+                speaker_count=2,
+            ),
+        )
+
+    def test_total_turns(self, sample_transcript):
+        """Test total turns count."""
+        assert sample_transcript.total_turns == 3
+
+    def test_total_words(self, sample_transcript):
+        """Test total words count."""
+        # "Buenos días, ¿en qué puedo ayudarle?" = 6 words
+        # "Quiero cancelar mi servicio" = 4 words
+        # "Entiendo, ¿me puede indicar el motivo?" = 6 words
+        assert sample_transcript.total_words == 16
+
+    def test_get_full_text(self, sample_transcript):
+        """Test getting full text."""
+        full_text = sample_transcript.get_full_text()
+        assert "Buenos días" in full_text
+        assert "cancelar mi servicio" in full_text
+
+    def test_get_speaker_text(self, sample_transcript):
+        """Test getting text for a specific speaker."""
+        agent_text = sample_transcript.get_speaker_text("agent")
+        customer_text = sample_transcript.get_speaker_text("customer")
+
+        assert "Buenos días" in agent_text
+        assert "cancelar" not in agent_text
+        assert "cancelar mi servicio" in customer_text
+
+    def test_get_speakers(self, sample_transcript):
+        """Test getting unique speakers."""
+        speakers = sample_transcript.get_speakers()
+
+        assert len(speakers) == 2
+        assert "agent" in speakers
+        assert "customer" in speakers
+
+
+class TestTranscriptionResult:
+    """Tests for TranscriptionResult model."""
+
+    def test_success_result(self):
+        """Test creating a successful result."""
+        transcript = Transcript(
+            call_id="CALL001",
+            turns=[],
+            metadata=TranscriptMetadata(
+                audio_duration_sec=100.0,
+                audio_file="test.mp3",
+                provider="assemblyai",
+            ),
+        )
+
+        result = TranscriptionResult.success(
+            call_id="CALL001",
+            audio_path=Path("test.mp3"),
+            transcript=transcript,
+        )
+
+        assert result.status == TranscriptionStatus.COMPLETED
+        assert result.is_success is True
+        assert result.transcript is not None
+        assert result.error is None
+
+    def test_failure_result(self):
+        """Test creating a failed result."""
+        result = TranscriptionResult.failure(
+            call_id="CALL001",
+            audio_path=Path("test.mp3"),
+            error=TranscriptionError.API_ERROR,
+            error_message="Rate limit exceeded",
+        )
+
+        assert result.status == TranscriptionStatus.FAILED
+        assert result.is_success is False
+        assert result.transcript is None
+        assert result.error == TranscriptionError.API_ERROR
+        assert result.error_message == "Rate limit exceeded"
+
+    def test_processing_time_computed(self):
+        """Test processing time calculation."""
+        result = TranscriptionResult(
+            call_id="CALL001",
+            audio_path="test.mp3",
+            status=TranscriptionStatus.COMPLETED,
+            started_at=datetime(2024, 1, 1, 12, 0, 0),
+            completed_at=datetime(2024, 1, 1, 12, 0, 30),
+        )
+
+        assert result.processing_time_sec == 30.0
+
+
+class TestAudioMetadata:
+    """Tests for AudioMetadata model."""
+
+    def test_create_metadata(self):
+        """Test creating audio metadata."""
+        metadata = AudioMetadata(
+            file_path="/data/audio/call.mp3",
+            file_size_bytes=5242880,  # 5 MB
+            duration_sec=420.0,  # 7 minutes
+            format="mp3",
+            codec="mp3",
+            sample_rate=44100,
+            channels=2,
+            bit_rate=128000,
+        )
+
+        assert metadata.file_path == "/data/audio/call.mp3"
+        assert metadata.duration_sec == 420.0
+        assert metadata.format == "mp3"
+
+    def test_duration_minutes(self):
+        """Test duration in minutes conversion."""
+        metadata = AudioMetadata(
+            file_path="test.mp3",
+            file_size_bytes=1000000,
+            duration_sec=420.0,
+            format="mp3",
+        )
+
+        assert metadata.duration_minutes == 7.0
+
+    def test_file_size_mb(self):
+        """Test file size in MB conversion."""
+        metadata = AudioMetadata(
+            file_path="test.mp3",
+            file_size_bytes=5242880,  # 5 MB
+            duration_sec=100.0,
+            format="mp3",
+        )
+
+        assert metadata.file_size_mb == 5.0
+
+
+class TestTranscriptionConfig:
+    """Tests for TranscriptionConfig model."""
+
+    def test_default_config(self):
+        """Test default configuration values."""
+        config = TranscriptionConfig()
+
+        assert config.language_code == "es"
+        assert config.speaker_labels is True
+        assert config.punctuate is True
+        assert config.format_text is True
+        assert config.auto_chapters is False
+
+    def test_custom_config(self):
+        """Test custom configuration."""
+        config = TranscriptionConfig(
+            language_code="en",
+            speaker_labels=False,
+            auto_chapters=True,
+        )
+
+        assert config.language_code == "en"
+        assert config.speaker_labels is False
+        assert config.auto_chapters is True
+
+
+class TestTranscriptionError:
+    """Tests for TranscriptionError enum."""
+
+    def test_error_values(self):
+        """Test that all error values are strings."""
+        assert TranscriptionError.FILE_NOT_FOUND == "FILE_NOT_FOUND"
+        assert TranscriptionError.API_ERROR == "API_ERROR"
+        assert TranscriptionError.RATE_LIMITED == "RATE_LIMITED"
+        assert TranscriptionError.TIMEOUT == "TIMEOUT"