""" CXInsights - Aggregation Module Tests Tests for statistics, severity scoring, and RCA tree building. v2.0: Updated with FCR, churn risk, and agent skill tests. """ import pytest from src.aggregation import ( AggregationConfig, BatchAggregation, DriverFrequency, DriverSeverity, ImpactLevel, RCANode, RCATree, RCATreeBuilder, SeverityCalculator, StatisticsCalculator, aggregate_batch, build_rca_tree, calculate_batch_statistics, calculate_driver_severities, ) from src.models.call_analysis import ( AgentClassification, AgentSkillIndicator, CallAnalysis, CallOutcome, ChurnRisk, EvidenceSpan, FCRStatus, ObservedFeatures, ProcessingStatus, RCALabel, Traceability, ) @pytest.fixture def sample_analyses(): """Create sample call analyses for testing (v2.0 with FCR, churn, agent).""" base_observed = ObservedFeatures( audio_duration_sec=60.0, events=[], ) base_traceability = Traceability( schema_version="1.0.0", prompt_version="v2.0", model_id="gpt-4o-mini", ) analyses = [] # Analysis 1: Lost sale due to price, first call, at risk analyses.append(CallAnalysis( call_id="CALL001", batch_id="test_batch", status=ProcessingStatus.SUCCESS, observed=base_observed, outcome=CallOutcome.SALE_LOST, lost_sales_drivers=[ RCALabel( driver_code="PRICE_TOO_HIGH", confidence=0.9, evidence_spans=[EvidenceSpan(text="Es muy caro", start_time=10, end_time=12)], ), ], poor_cx_drivers=[], fcr_status=FCRStatus.FIRST_CALL, churn_risk=ChurnRisk.AT_RISK, churn_risk_drivers=[ RCALabel( driver_code="COMPETITOR_MENTION", confidence=0.85, evidence_spans=[EvidenceSpan(text="Vodafone me ofrece", start_time=20, end_time=22)], ), ], agent_classification=AgentClassification.NEEDS_IMPROVEMENT, traceability=base_traceability, )) # Analysis 2: Lost sale due to price + competitor, repeat call analyses.append(CallAnalysis( call_id="CALL002", batch_id="test_batch", status=ProcessingStatus.SUCCESS, observed=base_observed, outcome=CallOutcome.SALE_LOST, lost_sales_drivers=[ RCALabel( driver_code="PRICE_TOO_HIGH", confidence=0.85, evidence_spans=[EvidenceSpan(text="Muy caro", start_time=15, end_time=17)], ), RCALabel( driver_code="COMPETITOR_PREFERENCE", confidence=0.8, evidence_spans=[EvidenceSpan(text="La competencia ofrece mejor", start_time=20, end_time=23)], ), ], poor_cx_drivers=[], fcr_status=FCRStatus.REPEAT_CALL, fcr_failure_drivers=[ RCALabel( driver_code="INCOMPLETE_RESOLUTION", confidence=0.8, evidence_spans=[EvidenceSpan(text="Ya llamé antes", start_time=5, end_time=7)], ), ], churn_risk=ChurnRisk.AT_RISK, agent_classification=AgentClassification.MIXED, traceability=base_traceability, )) # Analysis 3: Poor CX - long hold, first call, good agent analyses.append(CallAnalysis( call_id="CALL003", batch_id="test_batch", status=ProcessingStatus.SUCCESS, observed=base_observed, outcome=CallOutcome.INQUIRY_RESOLVED, lost_sales_drivers=[], poor_cx_drivers=[ RCALabel( driver_code="LONG_HOLD", confidence=0.95, evidence_spans=[EvidenceSpan(text="Mucho tiempo esperando", start_time=5, end_time=8)], ), ], fcr_status=FCRStatus.FIRST_CALL, churn_risk=ChurnRisk.NO_RISK, agent_classification=AgentClassification.GOOD_PERFORMER, agent_positive_skills=[ AgentSkillIndicator( skill_code="EMPATHY_SHOWN", skill_type="positive", confidence=0.9, evidence_spans=[EvidenceSpan(text="Entiendo su frustración", start_time=10, end_time=12)], description="Agent showed empathy", ), ], traceability=base_traceability, )) # Analysis 4: Both lost sale and poor CX, repeat call analyses.append(CallAnalysis( call_id="CALL004", batch_id="test_batch", status=ProcessingStatus.SUCCESS, observed=base_observed, outcome=CallOutcome.SALE_LOST, lost_sales_drivers=[ RCALabel( driver_code="PRICE_TOO_HIGH", confidence=0.75, evidence_spans=[EvidenceSpan(text="No puedo pagar", start_time=30, end_time=32)], ), ], poor_cx_drivers=[ RCALabel( driver_code="LOW_EMPATHY", confidence=0.7, evidence_spans=[EvidenceSpan(text="No me escucha", start_time=25, end_time=27)], ), ], fcr_status=FCRStatus.REPEAT_CALL, churn_risk=ChurnRisk.AT_RISK, agent_classification=AgentClassification.NEEDS_IMPROVEMENT, agent_improvement_areas=[ AgentSkillIndicator( skill_code="POOR_CLOSING", skill_type="improvement_needed", confidence=0.8, evidence_spans=[EvidenceSpan(text="Bueno, pues llame otro día", start_time=50, end_time=53)], description="Agent failed to close", ), ], traceability=base_traceability, )) # Analysis 5: Successful sale (no issues), first call, good agent analyses.append(CallAnalysis( call_id="CALL005", batch_id="test_batch", status=ProcessingStatus.SUCCESS, observed=base_observed, outcome=CallOutcome.SALE_COMPLETED, lost_sales_drivers=[], poor_cx_drivers=[], fcr_status=FCRStatus.FIRST_CALL, churn_risk=ChurnRisk.NO_RISK, agent_classification=AgentClassification.GOOD_PERFORMER, traceability=base_traceability, )) return analyses class TestDriverFrequency: """Tests for DriverFrequency model.""" def test_valid_frequency(self): """Test valid frequency creation.""" freq = DriverFrequency( driver_code="PRICE_TOO_HIGH", category="lost_sales", total_occurrences=3, calls_affected=3, total_calls_in_batch=5, occurrence_rate=0.6, call_rate=0.6, avg_confidence=0.83, min_confidence=0.75, max_confidence=0.9, ) assert freq.driver_code == "PRICE_TOO_HIGH" assert freq.occurrence_rate == 0.6 def test_invalid_rate(self): """Test that invalid rates raise error.""" with pytest.raises(ValueError): DriverFrequency( driver_code="TEST", category="lost_sales", total_occurrences=1, calls_affected=1, total_calls_in_batch=5, occurrence_rate=1.5, # Invalid! call_rate=0.2, avg_confidence=0.8, min_confidence=0.8, max_confidence=0.8, ) class TestDriverSeverity: """Tests for DriverSeverity model.""" def test_valid_severity(self): """Test valid severity creation.""" sev = DriverSeverity( driver_code="PRICE_TOO_HIGH", category="lost_sales", base_severity=0.8, frequency_factor=0.6, confidence_factor=0.85, co_occurrence_factor=0.3, severity_score=65.0, impact_level=ImpactLevel.HIGH, ) assert sev.severity_score == 65.0 assert sev.impact_level == ImpactLevel.HIGH def test_invalid_severity_score(self): """Test that invalid severity score raises error.""" with pytest.raises(ValueError): DriverSeverity( driver_code="TEST", category="lost_sales", base_severity=0.5, frequency_factor=0.5, confidence_factor=0.5, co_occurrence_factor=0.5, severity_score=150.0, # Invalid! impact_level=ImpactLevel.HIGH, ) class TestStatisticsCalculator: """Tests for StatisticsCalculator.""" def test_calculate_frequencies(self, sample_analyses): """Test frequency calculation (v2.0 dict format).""" calculator = StatisticsCalculator() frequencies = calculator.calculate_frequencies(sample_analyses) # Check all categories are present assert "lost_sales" in frequencies assert "poor_cx" in frequencies assert "fcr_failure" in frequencies assert "churn_risk" in frequencies assert "agent_positive" in frequencies assert "agent_improvement" in frequencies # PRICE_TOO_HIGH appears in 3 calls lost_sales = frequencies["lost_sales"] price_freq = next(f for f in lost_sales if f.driver_code == "PRICE_TOO_HIGH") assert price_freq.total_occurrences == 3 assert price_freq.calls_affected == 3 assert price_freq.call_rate == 0.6 # 3/5 calls # FCR failure drivers fcr_failure = frequencies["fcr_failure"] assert len(fcr_failure) == 1 # INCOMPLETE_RESOLUTION # Agent positive skills agent_positive = frequencies["agent_positive"] assert len(agent_positive) == 1 # EMPATHY_SHOWN def test_calculate_outcome_rates(self, sample_analyses): """Test outcome rate calculation with v2.0 metrics.""" calculator = StatisticsCalculator() rates = calculator.calculate_outcome_rates(sample_analyses) assert rates["total_calls"] == 5 assert rates["lost_sales_count"] == 3 # Calls with lost sales drivers assert rates["poor_cx_count"] == 2 # Calls with poor CX drivers assert rates["both_count"] == 1 # Calls with both # v2.0: FCR metrics assert rates["fcr"]["first_call"] == 3 assert rates["fcr"]["repeat_call"] == 2 assert rates["fcr"]["repeat_rate"] == 0.4 # 2/5 # v2.0: Churn metrics assert rates["churn"]["at_risk"] == 3 assert rates["churn"]["no_risk"] == 2 # v2.0: Agent metrics assert rates["agent"]["good_performer"] == 2 assert rates["agent"]["needs_improvement"] == 2 assert rates["agent"]["mixed"] == 1 def test_empty_analyses(self): """Test with empty analyses list.""" calculator = StatisticsCalculator() frequencies = calculator.calculate_frequencies([]) assert frequencies["lost_sales"] == [] assert frequencies["poor_cx"] == [] assert frequencies["fcr_failure"] == [] assert frequencies["churn_risk"] == [] def test_conditional_probabilities(self, sample_analyses): """Test conditional probability calculation.""" config = AggregationConfig(min_support=1) # Low threshold for test calculator = StatisticsCalculator(config=config) probs = calculator.calculate_conditional_probabilities(sample_analyses) # Should find relationships between drivers assert len(probs) > 0 class TestSeverityCalculator: """Tests for SeverityCalculator.""" def test_get_base_severity(self): """Test base severity lookup.""" calculator = SeverityCalculator() # From taxonomy assert calculator.get_base_severity("PRICE_TOO_HIGH", "lost_sales") == 0.8 assert calculator.get_base_severity("RUDE_BEHAVIOR", "poor_cx") == 0.9 # Unknown driver assert calculator.get_base_severity("UNKNOWN", "lost_sales") == 0.5 def test_calculate_severity(self): """Test severity calculation.""" calculator = SeverityCalculator() freq = DriverFrequency( driver_code="PRICE_TOO_HIGH", category="lost_sales", total_occurrences=3, calls_affected=3, total_calls_in_batch=5, occurrence_rate=0.6, call_rate=0.6, avg_confidence=0.85, min_confidence=0.75, max_confidence=0.9, commonly_co_occurs_with=["COMPETITOR_PREFERENCE"], ) severity = calculator.calculate_severity(freq) assert severity.driver_code == "PRICE_TOO_HIGH" assert severity.base_severity == 0.8 assert 0 <= severity.severity_score <= 100 assert severity.impact_level in [ ImpactLevel.CRITICAL, ImpactLevel.HIGH, ImpactLevel.MEDIUM, ImpactLevel.LOW, ] def test_impact_level_thresholds(self): """Test impact level determination.""" calculator = SeverityCalculator() # High severity + high frequency = CRITICAL high_freq = DriverFrequency( driver_code="TEST", category="lost_sales", total_occurrences=15, calls_affected=15, total_calls_in_batch=100, occurrence_rate=0.15, call_rate=0.15, # >10% avg_confidence=0.9, min_confidence=0.9, max_confidence=0.9, ) sev = calculator.calculate_severity(high_freq) # Should be HIGH or CRITICAL due to high frequency assert sev.impact_level in [ImpactLevel.CRITICAL, ImpactLevel.HIGH] class TestRCATreeBuilder: """Tests for RCATreeBuilder.""" def test_build_tree(self, sample_analyses): """Test RCA tree building.""" builder = RCATreeBuilder() tree = builder.build("test_batch", sample_analyses) assert tree.batch_id == "test_batch" assert tree.total_calls == 5 assert len(tree.lost_sales_root) > 0 assert len(tree.poor_cx_root) > 0 def test_top_drivers(self, sample_analyses): """Test top drivers extraction.""" builder = RCATreeBuilder() tree = builder.build("test_batch", sample_analyses) # PRICE_TOO_HIGH should be top driver assert "PRICE_TOO_HIGH" in tree.top_lost_sales_drivers def test_tree_to_dict(self, sample_analyses): """Test tree serialization.""" builder = RCATreeBuilder() tree = builder.build("test_batch", sample_analyses) tree_dict = tree.to_dict() assert "batch_id" in tree_dict assert "summary" in tree_dict assert "lost_sales_tree" in tree_dict assert "poor_cx_tree" in tree_dict def test_build_aggregation(self, sample_analyses): """Test full aggregation building.""" builder = RCATreeBuilder() agg = builder.build_aggregation("test_batch", sample_analyses) assert isinstance(agg, BatchAggregation) assert agg.total_calls_processed == 5 assert agg.successful_analyses == 5 assert agg.rca_tree is not None class TestConvenienceFunctions: """Tests for convenience functions.""" def test_calculate_batch_statistics(self, sample_analyses): """Test calculate_batch_statistics function (v2.0 enhanced).""" stats = calculate_batch_statistics(sample_analyses) # v1.0 keys assert "outcome_rates" in stats assert "lost_sales_frequencies" in stats assert "poor_cx_frequencies" in stats # v2.0 keys assert "fcr_failure_frequencies" in stats assert "churn_risk_frequencies" in stats assert "agent_positive_frequencies" in stats assert "agent_improvement_frequencies" in stats # v2.0 outcome_rates should have nested dicts assert "fcr" in stats["outcome_rates"] assert "churn" in stats["outcome_rates"] assert "agent" in stats["outcome_rates"] def test_build_rca_tree_function(self, sample_analyses): """Test build_rca_tree function.""" tree = build_rca_tree("test_batch", sample_analyses) assert isinstance(tree, RCATree) assert tree.batch_id == "test_batch" def test_aggregate_batch_function(self, sample_analyses): """Test aggregate_batch function.""" agg = aggregate_batch("test_batch", sample_analyses) assert isinstance(agg, BatchAggregation) assert agg.batch_id == "test_batch" class TestRCANode: """Tests for RCANode model.""" def test_node_to_dict(self): """Test node serialization.""" freq = DriverFrequency( driver_code="PRICE_TOO_HIGH", category="lost_sales", total_occurrences=3, calls_affected=3, total_calls_in_batch=5, occurrence_rate=0.6, call_rate=0.6, avg_confidence=0.85, min_confidence=0.75, max_confidence=0.9, ) sev = DriverSeverity( driver_code="PRICE_TOO_HIGH", category="lost_sales", base_severity=0.8, frequency_factor=0.6, confidence_factor=0.85, co_occurrence_factor=0.3, severity_score=65.0, impact_level=ImpactLevel.HIGH, ) node = RCANode( driver_code="PRICE_TOO_HIGH", category="lost_sales", frequency=freq, severity=sev, priority_rank=1, sample_evidence=["Es muy caro para mí"], ) node_dict = node.to_dict() assert node_dict["driver_code"] == "PRICE_TOO_HIGH" assert node_dict["priority_rank"] == 1 assert "frequency" in node_dict assert "severity" in node_dict class TestEmergentPatterns: """Tests for emergent pattern extraction.""" def test_extract_emergent(self): """Test emergent pattern extraction.""" base_observed = ObservedFeatures(audio_duration_sec=60.0, events=[]) base_trace = Traceability( schema_version="1.0.0", prompt_version="v1.0", model_id="gpt-4o-mini", ) analyses = [ CallAnalysis( call_id="EMG001", batch_id="test", status=ProcessingStatus.SUCCESS, observed=base_observed, outcome=CallOutcome.SALE_LOST, lost_sales_drivers=[ RCALabel( driver_code="OTHER_EMERGENT", confidence=0.7, evidence_spans=[ EvidenceSpan(text="Nuevo patrón", start_time=0, end_time=1) ], proposed_label="NEW_PATTERN", ) ], poor_cx_drivers=[], traceability=base_trace, ) ] calculator = StatisticsCalculator() emergent = calculator.extract_emergent_patterns(analyses) assert len(emergent) == 1 assert emergent[0]["proposed_label"] == "NEW_PATTERN" assert emergent[0]["occurrences"] == 1