feat: Add Streamlit dashboard with Blueprint compliance (v2.1.0)

Dashboard Features: - 8 navigation sections: Overview, Outcomes, Poor CX, FCR, Churn, Agent, Call Explorer, Export - Beyond Brand Identity styling (colors #6D84E3, Outfit font) - RCA Sankey diagram (Driver → Outcome → Churn Risk flow) - Correlation heatmaps (driver co-occurrence, driver-outcome) - Outcome Deep Dive (root causes, correlation, duration analysis) - Export functionality (Excel, HTML, JSON) Blueprint Compliance: - FCR: 4 categories (Primera Llamada/Rellamada × Sin/Con Riesgo de Fuga) - Churn: Binary view (Sin Riesgo de Fuga / En Riesgo de Fuga) - Agent: Talento Para Replicar / Oportunidades de Mejora - Fixed FCR rate calculation (only FIRST_CALL counts as success) Technical: - Streamlit + Plotly for interactive visualizations - Light theme configuration (.streamlit/config.toml) - Fixed Plotly colorbar titlefont deprecation Documentation: - Updated PROJECT_CONTEXT.md, TODO.md, CHANGELOG.md - Added 4 new technical decisions (TD-014 to TD-017) - Created TROUBLESHOOTING.md with 10 common issues Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-19 16:27:30 +01:00
commit 75e7b9da3d
110 changed files with 28247 additions and 0 deletions
--- a/tests/unit/test_aggregation.py
+++ b/tests/unit/test_aggregation.py
@@ -0,0 +1,582 @@
+"""
+CXInsights - Aggregation Module Tests
+
+Tests for statistics, severity scoring, and RCA tree building.
+v2.0: Updated with FCR, churn risk, and agent skill tests.
+"""
+
+import pytest
+
+from src.aggregation import (
+    AggregationConfig,
+    BatchAggregation,
+    DriverFrequency,
+    DriverSeverity,
+    ImpactLevel,
+    RCANode,
+    RCATree,
+    RCATreeBuilder,
+    SeverityCalculator,
+    StatisticsCalculator,
+    aggregate_batch,
+    build_rca_tree,
+    calculate_batch_statistics,
+    calculate_driver_severities,
+)
+from src.models.call_analysis import (
+    AgentClassification,
+    AgentSkillIndicator,
+    CallAnalysis,
+    CallOutcome,
+    ChurnRisk,
+    EvidenceSpan,
+    FCRStatus,
+    ObservedFeatures,
+    ProcessingStatus,
+    RCALabel,
+    Traceability,
+)
+
+
+@pytest.fixture
+def sample_analyses():
+    """Create sample call analyses for testing (v2.0 with FCR, churn, agent)."""
+    base_observed = ObservedFeatures(
+        audio_duration_sec=60.0,
+        events=[],
+    )
+    base_traceability = Traceability(
+        schema_version="1.0.0",
+        prompt_version="v2.0",
+        model_id="gpt-4o-mini",
+    )
+
+    analyses = []
+
+    # Analysis 1: Lost sale due to price, first call, at risk
+    analyses.append(CallAnalysis(
+        call_id="CALL001",
+        batch_id="test_batch",
+        status=ProcessingStatus.SUCCESS,
+        observed=base_observed,
+        outcome=CallOutcome.SALE_LOST,
+        lost_sales_drivers=[
+            RCALabel(
+                driver_code="PRICE_TOO_HIGH",
+                confidence=0.9,
+                evidence_spans=[EvidenceSpan(text="Es muy caro", start_time=10, end_time=12)],
+            ),
+        ],
+        poor_cx_drivers=[],
+        fcr_status=FCRStatus.FIRST_CALL,
+        churn_risk=ChurnRisk.AT_RISK,
+        churn_risk_drivers=[
+            RCALabel(
+                driver_code="COMPETITOR_MENTION",
+                confidence=0.85,
+                evidence_spans=[EvidenceSpan(text="Vodafone me ofrece", start_time=20, end_time=22)],
+            ),
+        ],
+        agent_classification=AgentClassification.NEEDS_IMPROVEMENT,
+        traceability=base_traceability,
+    ))
+
+    # Analysis 2: Lost sale due to price + competitor, repeat call
+    analyses.append(CallAnalysis(
+        call_id="CALL002",
+        batch_id="test_batch",
+        status=ProcessingStatus.SUCCESS,
+        observed=base_observed,
+        outcome=CallOutcome.SALE_LOST,
+        lost_sales_drivers=[
+            RCALabel(
+                driver_code="PRICE_TOO_HIGH",
+                confidence=0.85,
+                evidence_spans=[EvidenceSpan(text="Muy caro", start_time=15, end_time=17)],
+            ),
+            RCALabel(
+                driver_code="COMPETITOR_PREFERENCE",
+                confidence=0.8,
+                evidence_spans=[EvidenceSpan(text="La competencia ofrece mejor", start_time=20, end_time=23)],
+            ),
+        ],
+        poor_cx_drivers=[],
+        fcr_status=FCRStatus.REPEAT_CALL,
+        fcr_failure_drivers=[
+            RCALabel(
+                driver_code="INCOMPLETE_RESOLUTION",
+                confidence=0.8,
+                evidence_spans=[EvidenceSpan(text="Ya llamé antes", start_time=5, end_time=7)],
+            ),
+        ],
+        churn_risk=ChurnRisk.AT_RISK,
+        agent_classification=AgentClassification.MIXED,
+        traceability=base_traceability,
+    ))
+
+    # Analysis 3: Poor CX - long hold, first call, good agent
+    analyses.append(CallAnalysis(
+        call_id="CALL003",
+        batch_id="test_batch",
+        status=ProcessingStatus.SUCCESS,
+        observed=base_observed,
+        outcome=CallOutcome.INQUIRY_RESOLVED,
+        lost_sales_drivers=[],
+        poor_cx_drivers=[
+            RCALabel(
+                driver_code="LONG_HOLD",
+                confidence=0.95,
+                evidence_spans=[EvidenceSpan(text="Mucho tiempo esperando", start_time=5, end_time=8)],
+            ),
+        ],
+        fcr_status=FCRStatus.FIRST_CALL,
+        churn_risk=ChurnRisk.NO_RISK,
+        agent_classification=AgentClassification.GOOD_PERFORMER,
+        agent_positive_skills=[
+            AgentSkillIndicator(
+                skill_code="EMPATHY_SHOWN",
+                skill_type="positive",
+                confidence=0.9,
+                evidence_spans=[EvidenceSpan(text="Entiendo su frustración", start_time=10, end_time=12)],
+                description="Agent showed empathy",
+            ),
+        ],
+        traceability=base_traceability,
+    ))
+
+    # Analysis 4: Both lost sale and poor CX, repeat call
+    analyses.append(CallAnalysis(
+        call_id="CALL004",
+        batch_id="test_batch",
+        status=ProcessingStatus.SUCCESS,
+        observed=base_observed,
+        outcome=CallOutcome.SALE_LOST,
+        lost_sales_drivers=[
+            RCALabel(
+                driver_code="PRICE_TOO_HIGH",
+                confidence=0.75,
+                evidence_spans=[EvidenceSpan(text="No puedo pagar", start_time=30, end_time=32)],
+            ),
+        ],
+        poor_cx_drivers=[
+            RCALabel(
+                driver_code="LOW_EMPATHY",
+                confidence=0.7,
+                evidence_spans=[EvidenceSpan(text="No me escucha", start_time=25, end_time=27)],
+            ),
+        ],
+        fcr_status=FCRStatus.REPEAT_CALL,
+        churn_risk=ChurnRisk.AT_RISK,
+        agent_classification=AgentClassification.NEEDS_IMPROVEMENT,
+        agent_improvement_areas=[
+            AgentSkillIndicator(
+                skill_code="POOR_CLOSING",
+                skill_type="improvement_needed",
+                confidence=0.8,
+                evidence_spans=[EvidenceSpan(text="Bueno, pues llame otro día", start_time=50, end_time=53)],
+                description="Agent failed to close",
+            ),
+        ],
+        traceability=base_traceability,
+    ))
+
+    # Analysis 5: Successful sale (no issues), first call, good agent
+    analyses.append(CallAnalysis(
+        call_id="CALL005",
+        batch_id="test_batch",
+        status=ProcessingStatus.SUCCESS,
+        observed=base_observed,
+        outcome=CallOutcome.SALE_COMPLETED,
+        lost_sales_drivers=[],
+        poor_cx_drivers=[],
+        fcr_status=FCRStatus.FIRST_CALL,
+        churn_risk=ChurnRisk.NO_RISK,
+        agent_classification=AgentClassification.GOOD_PERFORMER,
+        traceability=base_traceability,
+    ))
+
+    return analyses
+
+
+class TestDriverFrequency:
+    """Tests for DriverFrequency model."""
+
+    def test_valid_frequency(self):
+        """Test valid frequency creation."""
+        freq = DriverFrequency(
+            driver_code="PRICE_TOO_HIGH",
+            category="lost_sales",
+            total_occurrences=3,
+            calls_affected=3,
+            total_calls_in_batch=5,
+            occurrence_rate=0.6,
+            call_rate=0.6,
+            avg_confidence=0.83,
+            min_confidence=0.75,
+            max_confidence=0.9,
+        )
+
+        assert freq.driver_code == "PRICE_TOO_HIGH"
+        assert freq.occurrence_rate == 0.6
+
+    def test_invalid_rate(self):
+        """Test that invalid rates raise error."""
+        with pytest.raises(ValueError):
+            DriverFrequency(
+                driver_code="TEST",
+                category="lost_sales",
+                total_occurrences=1,
+                calls_affected=1,
+                total_calls_in_batch=5,
+                occurrence_rate=1.5,  # Invalid!
+                call_rate=0.2,
+                avg_confidence=0.8,
+                min_confidence=0.8,
+                max_confidence=0.8,
+            )
+
+
+class TestDriverSeverity:
+    """Tests for DriverSeverity model."""
+
+    def test_valid_severity(self):
+        """Test valid severity creation."""
+        sev = DriverSeverity(
+            driver_code="PRICE_TOO_HIGH",
+            category="lost_sales",
+            base_severity=0.8,
+            frequency_factor=0.6,
+            confidence_factor=0.85,
+            co_occurrence_factor=0.3,
+            severity_score=65.0,
+            impact_level=ImpactLevel.HIGH,
+        )
+
+        assert sev.severity_score == 65.0
+        assert sev.impact_level == ImpactLevel.HIGH
+
+    def test_invalid_severity_score(self):
+        """Test that invalid severity score raises error."""
+        with pytest.raises(ValueError):
+            DriverSeverity(
+                driver_code="TEST",
+                category="lost_sales",
+                base_severity=0.5,
+                frequency_factor=0.5,
+                confidence_factor=0.5,
+                co_occurrence_factor=0.5,
+                severity_score=150.0,  # Invalid!
+                impact_level=ImpactLevel.HIGH,
+            )
+
+
+class TestStatisticsCalculator:
+    """Tests for StatisticsCalculator."""
+
+    def test_calculate_frequencies(self, sample_analyses):
+        """Test frequency calculation (v2.0 dict format)."""
+        calculator = StatisticsCalculator()
+        frequencies = calculator.calculate_frequencies(sample_analyses)
+
+        # Check all categories are present
+        assert "lost_sales" in frequencies
+        assert "poor_cx" in frequencies
+        assert "fcr_failure" in frequencies
+        assert "churn_risk" in frequencies
+        assert "agent_positive" in frequencies
+        assert "agent_improvement" in frequencies
+
+        # PRICE_TOO_HIGH appears in 3 calls
+        lost_sales = frequencies["lost_sales"]
+        price_freq = next(f for f in lost_sales if f.driver_code == "PRICE_TOO_HIGH")
+        assert price_freq.total_occurrences == 3
+        assert price_freq.calls_affected == 3
+        assert price_freq.call_rate == 0.6  # 3/5 calls
+
+        # FCR failure drivers
+        fcr_failure = frequencies["fcr_failure"]
+        assert len(fcr_failure) == 1  # INCOMPLETE_RESOLUTION
+
+        # Agent positive skills
+        agent_positive = frequencies["agent_positive"]
+        assert len(agent_positive) == 1  # EMPATHY_SHOWN
+
+    def test_calculate_outcome_rates(self, sample_analyses):
+        """Test outcome rate calculation with v2.0 metrics."""
+        calculator = StatisticsCalculator()
+        rates = calculator.calculate_outcome_rates(sample_analyses)
+
+        assert rates["total_calls"] == 5
+        assert rates["lost_sales_count"] == 3  # Calls with lost sales drivers
+        assert rates["poor_cx_count"] == 2  # Calls with poor CX drivers
+        assert rates["both_count"] == 1  # Calls with both
+
+        # v2.0: FCR metrics
+        assert rates["fcr"]["first_call"] == 3
+        assert rates["fcr"]["repeat_call"] == 2
+        assert rates["fcr"]["repeat_rate"] == 0.4  # 2/5
+
+        # v2.0: Churn metrics
+        assert rates["churn"]["at_risk"] == 3
+        assert rates["churn"]["no_risk"] == 2
+
+        # v2.0: Agent metrics
+        assert rates["agent"]["good_performer"] == 2
+        assert rates["agent"]["needs_improvement"] == 2
+        assert rates["agent"]["mixed"] == 1
+
+    def test_empty_analyses(self):
+        """Test with empty analyses list."""
+        calculator = StatisticsCalculator()
+        frequencies = calculator.calculate_frequencies([])
+
+        assert frequencies["lost_sales"] == []
+        assert frequencies["poor_cx"] == []
+        assert frequencies["fcr_failure"] == []
+        assert frequencies["churn_risk"] == []
+
+    def test_conditional_probabilities(self, sample_analyses):
+        """Test conditional probability calculation."""
+        config = AggregationConfig(min_support=1)  # Low threshold for test
+        calculator = StatisticsCalculator(config=config)
+        probs = calculator.calculate_conditional_probabilities(sample_analyses)
+
+        # Should find relationships between drivers
+        assert len(probs) > 0
+
+
+class TestSeverityCalculator:
+    """Tests for SeverityCalculator."""
+
+    def test_get_base_severity(self):
+        """Test base severity lookup."""
+        calculator = SeverityCalculator()
+
+        # From taxonomy
+        assert calculator.get_base_severity("PRICE_TOO_HIGH", "lost_sales") == 0.8
+        assert calculator.get_base_severity("RUDE_BEHAVIOR", "poor_cx") == 0.9
+
+        # Unknown driver
+        assert calculator.get_base_severity("UNKNOWN", "lost_sales") == 0.5
+
+    def test_calculate_severity(self):
+        """Test severity calculation."""
+        calculator = SeverityCalculator()
+
+        freq = DriverFrequency(
+            driver_code="PRICE_TOO_HIGH",
+            category="lost_sales",
+            total_occurrences=3,
+            calls_affected=3,
+            total_calls_in_batch=5,
+            occurrence_rate=0.6,
+            call_rate=0.6,
+            avg_confidence=0.85,
+            min_confidence=0.75,
+            max_confidence=0.9,
+            commonly_co_occurs_with=["COMPETITOR_PREFERENCE"],
+        )
+
+        severity = calculator.calculate_severity(freq)
+
+        assert severity.driver_code == "PRICE_TOO_HIGH"
+        assert severity.base_severity == 0.8
+        assert 0 <= severity.severity_score <= 100
+        assert severity.impact_level in [
+            ImpactLevel.CRITICAL,
+            ImpactLevel.HIGH,
+            ImpactLevel.MEDIUM,
+            ImpactLevel.LOW,
+        ]
+
+    def test_impact_level_thresholds(self):
+        """Test impact level determination."""
+        calculator = SeverityCalculator()
+
+        # High severity + high frequency = CRITICAL
+        high_freq = DriverFrequency(
+            driver_code="TEST",
+            category="lost_sales",
+            total_occurrences=15,
+            calls_affected=15,
+            total_calls_in_batch=100,
+            occurrence_rate=0.15,
+            call_rate=0.15,  # >10%
+            avg_confidence=0.9,
+            min_confidence=0.9,
+            max_confidence=0.9,
+        )
+
+        sev = calculator.calculate_severity(high_freq)
+        # Should be HIGH or CRITICAL due to high frequency
+        assert sev.impact_level in [ImpactLevel.CRITICAL, ImpactLevel.HIGH]
+
+
+class TestRCATreeBuilder:
+    """Tests for RCATreeBuilder."""
+
+    def test_build_tree(self, sample_analyses):
+        """Test RCA tree building."""
+        builder = RCATreeBuilder()
+        tree = builder.build("test_batch", sample_analyses)
+
+        assert tree.batch_id == "test_batch"
+        assert tree.total_calls == 5
+        assert len(tree.lost_sales_root) > 0
+        assert len(tree.poor_cx_root) > 0
+
+    def test_top_drivers(self, sample_analyses):
+        """Test top drivers extraction."""
+        builder = RCATreeBuilder()
+        tree = builder.build("test_batch", sample_analyses)
+
+        # PRICE_TOO_HIGH should be top driver
+        assert "PRICE_TOO_HIGH" in tree.top_lost_sales_drivers
+
+    def test_tree_to_dict(self, sample_analyses):
+        """Test tree serialization."""
+        builder = RCATreeBuilder()
+        tree = builder.build("test_batch", sample_analyses)
+
+        tree_dict = tree.to_dict()
+
+        assert "batch_id" in tree_dict
+        assert "summary" in tree_dict
+        assert "lost_sales_tree" in tree_dict
+        assert "poor_cx_tree" in tree_dict
+
+    def test_build_aggregation(self, sample_analyses):
+        """Test full aggregation building."""
+        builder = RCATreeBuilder()
+        agg = builder.build_aggregation("test_batch", sample_analyses)
+
+        assert isinstance(agg, BatchAggregation)
+        assert agg.total_calls_processed == 5
+        assert agg.successful_analyses == 5
+        assert agg.rca_tree is not None
+
+
+class TestConvenienceFunctions:
+    """Tests for convenience functions."""
+
+    def test_calculate_batch_statistics(self, sample_analyses):
+        """Test calculate_batch_statistics function (v2.0 enhanced)."""
+        stats = calculate_batch_statistics(sample_analyses)
+
+        # v1.0 keys
+        assert "outcome_rates" in stats
+        assert "lost_sales_frequencies" in stats
+        assert "poor_cx_frequencies" in stats
+
+        # v2.0 keys
+        assert "fcr_failure_frequencies" in stats
+        assert "churn_risk_frequencies" in stats
+        assert "agent_positive_frequencies" in stats
+        assert "agent_improvement_frequencies" in stats
+
+        # v2.0 outcome_rates should have nested dicts
+        assert "fcr" in stats["outcome_rates"]
+        assert "churn" in stats["outcome_rates"]
+        assert "agent" in stats["outcome_rates"]
+
+    def test_build_rca_tree_function(self, sample_analyses):
+        """Test build_rca_tree function."""
+        tree = build_rca_tree("test_batch", sample_analyses)
+
+        assert isinstance(tree, RCATree)
+        assert tree.batch_id == "test_batch"
+
+    def test_aggregate_batch_function(self, sample_analyses):
+        """Test aggregate_batch function."""
+        agg = aggregate_batch("test_batch", sample_analyses)
+
+        assert isinstance(agg, BatchAggregation)
+        assert agg.batch_id == "test_batch"
+
+
+class TestRCANode:
+    """Tests for RCANode model."""
+
+    def test_node_to_dict(self):
+        """Test node serialization."""
+        freq = DriverFrequency(
+            driver_code="PRICE_TOO_HIGH",
+            category="lost_sales",
+            total_occurrences=3,
+            calls_affected=3,
+            total_calls_in_batch=5,
+            occurrence_rate=0.6,
+            call_rate=0.6,
+            avg_confidence=0.85,
+            min_confidence=0.75,
+            max_confidence=0.9,
+        )
+
+        sev = DriverSeverity(
+            driver_code="PRICE_TOO_HIGH",
+            category="lost_sales",
+            base_severity=0.8,
+            frequency_factor=0.6,
+            confidence_factor=0.85,
+            co_occurrence_factor=0.3,
+            severity_score=65.0,
+            impact_level=ImpactLevel.HIGH,
+        )
+
+        node = RCANode(
+            driver_code="PRICE_TOO_HIGH",
+            category="lost_sales",
+            frequency=freq,
+            severity=sev,
+            priority_rank=1,
+            sample_evidence=["Es muy caro para mí"],
+        )
+
+        node_dict = node.to_dict()
+
+        assert node_dict["driver_code"] == "PRICE_TOO_HIGH"
+        assert node_dict["priority_rank"] == 1
+        assert "frequency" in node_dict
+        assert "severity" in node_dict
+
+
+class TestEmergentPatterns:
+    """Tests for emergent pattern extraction."""
+
+    def test_extract_emergent(self):
+        """Test emergent pattern extraction."""
+        base_observed = ObservedFeatures(audio_duration_sec=60.0, events=[])
+        base_trace = Traceability(
+            schema_version="1.0.0",
+            prompt_version="v1.0",
+            model_id="gpt-4o-mini",
+        )
+
+        analyses = [
+            CallAnalysis(
+                call_id="EMG001",
+                batch_id="test",
+                status=ProcessingStatus.SUCCESS,
+                observed=base_observed,
+                outcome=CallOutcome.SALE_LOST,
+                lost_sales_drivers=[
+                    RCALabel(
+                        driver_code="OTHER_EMERGENT",
+                        confidence=0.7,
+                        evidence_spans=[
+                            EvidenceSpan(text="Nuevo patrón", start_time=0, end_time=1)
+                        ],
+                        proposed_label="NEW_PATTERN",
+                    )
+                ],
+                poor_cx_drivers=[],
+                traceability=base_trace,
+            )
+        ]
+
+        calculator = StatisticsCalculator()
+        emergent = calculator.extract_emergent_patterns(analyses)
+
+        assert len(emergent) == 1
+        assert emergent[0]["proposed_label"] == "NEW_PATTERN"
+        assert emergent[0]["occurrences"] == 1