Files
BeyondCX_Insights/tests/unit/test_aggregation.py
sujucu70 75e7b9da3d feat: Add Streamlit dashboard with Blueprint compliance (v2.1.0)
Dashboard Features:
- 8 navigation sections: Overview, Outcomes, Poor CX, FCR, Churn, Agent, Call Explorer, Export
- Beyond Brand Identity styling (colors #6D84E3, Outfit font)
- RCA Sankey diagram (Driver → Outcome → Churn Risk flow)
- Correlation heatmaps (driver co-occurrence, driver-outcome)
- Outcome Deep Dive (root causes, correlation, duration analysis)
- Export functionality (Excel, HTML, JSON)

Blueprint Compliance:
- FCR: 4 categories (Primera Llamada/Rellamada × Sin/Con Riesgo de Fuga)
- Churn: Binary view (Sin Riesgo de Fuga / En Riesgo de Fuga)
- Agent: Talento Para Replicar / Oportunidades de Mejora
- Fixed FCR rate calculation (only FIRST_CALL counts as success)

Technical:
- Streamlit + Plotly for interactive visualizations
- Light theme configuration (.streamlit/config.toml)
- Fixed Plotly colorbar titlefont deprecation

Documentation:
- Updated PROJECT_CONTEXT.md, TODO.md, CHANGELOG.md
- Added 4 new technical decisions (TD-014 to TD-017)
- Created TROUBLESHOOTING.md with 10 common issues

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-19 16:27:30 +01:00

583 lines
19 KiB
Python

"""
CXInsights - Aggregation Module Tests
Tests for statistics, severity scoring, and RCA tree building.
v2.0: Updated with FCR, churn risk, and agent skill tests.
"""
import pytest
from src.aggregation import (
AggregationConfig,
BatchAggregation,
DriverFrequency,
DriverSeverity,
ImpactLevel,
RCANode,
RCATree,
RCATreeBuilder,
SeverityCalculator,
StatisticsCalculator,
aggregate_batch,
build_rca_tree,
calculate_batch_statistics,
calculate_driver_severities,
)
from src.models.call_analysis import (
AgentClassification,
AgentSkillIndicator,
CallAnalysis,
CallOutcome,
ChurnRisk,
EvidenceSpan,
FCRStatus,
ObservedFeatures,
ProcessingStatus,
RCALabel,
Traceability,
)
@pytest.fixture
def sample_analyses():
"""Create sample call analyses for testing (v2.0 with FCR, churn, agent)."""
base_observed = ObservedFeatures(
audio_duration_sec=60.0,
events=[],
)
base_traceability = Traceability(
schema_version="1.0.0",
prompt_version="v2.0",
model_id="gpt-4o-mini",
)
analyses = []
# Analysis 1: Lost sale due to price, first call, at risk
analyses.append(CallAnalysis(
call_id="CALL001",
batch_id="test_batch",
status=ProcessingStatus.SUCCESS,
observed=base_observed,
outcome=CallOutcome.SALE_LOST,
lost_sales_drivers=[
RCALabel(
driver_code="PRICE_TOO_HIGH",
confidence=0.9,
evidence_spans=[EvidenceSpan(text="Es muy caro", start_time=10, end_time=12)],
),
],
poor_cx_drivers=[],
fcr_status=FCRStatus.FIRST_CALL,
churn_risk=ChurnRisk.AT_RISK,
churn_risk_drivers=[
RCALabel(
driver_code="COMPETITOR_MENTION",
confidence=0.85,
evidence_spans=[EvidenceSpan(text="Vodafone me ofrece", start_time=20, end_time=22)],
),
],
agent_classification=AgentClassification.NEEDS_IMPROVEMENT,
traceability=base_traceability,
))
# Analysis 2: Lost sale due to price + competitor, repeat call
analyses.append(CallAnalysis(
call_id="CALL002",
batch_id="test_batch",
status=ProcessingStatus.SUCCESS,
observed=base_observed,
outcome=CallOutcome.SALE_LOST,
lost_sales_drivers=[
RCALabel(
driver_code="PRICE_TOO_HIGH",
confidence=0.85,
evidence_spans=[EvidenceSpan(text="Muy caro", start_time=15, end_time=17)],
),
RCALabel(
driver_code="COMPETITOR_PREFERENCE",
confidence=0.8,
evidence_spans=[EvidenceSpan(text="La competencia ofrece mejor", start_time=20, end_time=23)],
),
],
poor_cx_drivers=[],
fcr_status=FCRStatus.REPEAT_CALL,
fcr_failure_drivers=[
RCALabel(
driver_code="INCOMPLETE_RESOLUTION",
confidence=0.8,
evidence_spans=[EvidenceSpan(text="Ya llamé antes", start_time=5, end_time=7)],
),
],
churn_risk=ChurnRisk.AT_RISK,
agent_classification=AgentClassification.MIXED,
traceability=base_traceability,
))
# Analysis 3: Poor CX - long hold, first call, good agent
analyses.append(CallAnalysis(
call_id="CALL003",
batch_id="test_batch",
status=ProcessingStatus.SUCCESS,
observed=base_observed,
outcome=CallOutcome.INQUIRY_RESOLVED,
lost_sales_drivers=[],
poor_cx_drivers=[
RCALabel(
driver_code="LONG_HOLD",
confidence=0.95,
evidence_spans=[EvidenceSpan(text="Mucho tiempo esperando", start_time=5, end_time=8)],
),
],
fcr_status=FCRStatus.FIRST_CALL,
churn_risk=ChurnRisk.NO_RISK,
agent_classification=AgentClassification.GOOD_PERFORMER,
agent_positive_skills=[
AgentSkillIndicator(
skill_code="EMPATHY_SHOWN",
skill_type="positive",
confidence=0.9,
evidence_spans=[EvidenceSpan(text="Entiendo su frustración", start_time=10, end_time=12)],
description="Agent showed empathy",
),
],
traceability=base_traceability,
))
# Analysis 4: Both lost sale and poor CX, repeat call
analyses.append(CallAnalysis(
call_id="CALL004",
batch_id="test_batch",
status=ProcessingStatus.SUCCESS,
observed=base_observed,
outcome=CallOutcome.SALE_LOST,
lost_sales_drivers=[
RCALabel(
driver_code="PRICE_TOO_HIGH",
confidence=0.75,
evidence_spans=[EvidenceSpan(text="No puedo pagar", start_time=30, end_time=32)],
),
],
poor_cx_drivers=[
RCALabel(
driver_code="LOW_EMPATHY",
confidence=0.7,
evidence_spans=[EvidenceSpan(text="No me escucha", start_time=25, end_time=27)],
),
],
fcr_status=FCRStatus.REPEAT_CALL,
churn_risk=ChurnRisk.AT_RISK,
agent_classification=AgentClassification.NEEDS_IMPROVEMENT,
agent_improvement_areas=[
AgentSkillIndicator(
skill_code="POOR_CLOSING",
skill_type="improvement_needed",
confidence=0.8,
evidence_spans=[EvidenceSpan(text="Bueno, pues llame otro día", start_time=50, end_time=53)],
description="Agent failed to close",
),
],
traceability=base_traceability,
))
# Analysis 5: Successful sale (no issues), first call, good agent
analyses.append(CallAnalysis(
call_id="CALL005",
batch_id="test_batch",
status=ProcessingStatus.SUCCESS,
observed=base_observed,
outcome=CallOutcome.SALE_COMPLETED,
lost_sales_drivers=[],
poor_cx_drivers=[],
fcr_status=FCRStatus.FIRST_CALL,
churn_risk=ChurnRisk.NO_RISK,
agent_classification=AgentClassification.GOOD_PERFORMER,
traceability=base_traceability,
))
return analyses
class TestDriverFrequency:
"""Tests for DriverFrequency model."""
def test_valid_frequency(self):
"""Test valid frequency creation."""
freq = DriverFrequency(
driver_code="PRICE_TOO_HIGH",
category="lost_sales",
total_occurrences=3,
calls_affected=3,
total_calls_in_batch=5,
occurrence_rate=0.6,
call_rate=0.6,
avg_confidence=0.83,
min_confidence=0.75,
max_confidence=0.9,
)
assert freq.driver_code == "PRICE_TOO_HIGH"
assert freq.occurrence_rate == 0.6
def test_invalid_rate(self):
"""Test that invalid rates raise error."""
with pytest.raises(ValueError):
DriverFrequency(
driver_code="TEST",
category="lost_sales",
total_occurrences=1,
calls_affected=1,
total_calls_in_batch=5,
occurrence_rate=1.5, # Invalid!
call_rate=0.2,
avg_confidence=0.8,
min_confidence=0.8,
max_confidence=0.8,
)
class TestDriverSeverity:
"""Tests for DriverSeverity model."""
def test_valid_severity(self):
"""Test valid severity creation."""
sev = DriverSeverity(
driver_code="PRICE_TOO_HIGH",
category="lost_sales",
base_severity=0.8,
frequency_factor=0.6,
confidence_factor=0.85,
co_occurrence_factor=0.3,
severity_score=65.0,
impact_level=ImpactLevel.HIGH,
)
assert sev.severity_score == 65.0
assert sev.impact_level == ImpactLevel.HIGH
def test_invalid_severity_score(self):
"""Test that invalid severity score raises error."""
with pytest.raises(ValueError):
DriverSeverity(
driver_code="TEST",
category="lost_sales",
base_severity=0.5,
frequency_factor=0.5,
confidence_factor=0.5,
co_occurrence_factor=0.5,
severity_score=150.0, # Invalid!
impact_level=ImpactLevel.HIGH,
)
class TestStatisticsCalculator:
"""Tests for StatisticsCalculator."""
def test_calculate_frequencies(self, sample_analyses):
"""Test frequency calculation (v2.0 dict format)."""
calculator = StatisticsCalculator()
frequencies = calculator.calculate_frequencies(sample_analyses)
# Check all categories are present
assert "lost_sales" in frequencies
assert "poor_cx" in frequencies
assert "fcr_failure" in frequencies
assert "churn_risk" in frequencies
assert "agent_positive" in frequencies
assert "agent_improvement" in frequencies
# PRICE_TOO_HIGH appears in 3 calls
lost_sales = frequencies["lost_sales"]
price_freq = next(f for f in lost_sales if f.driver_code == "PRICE_TOO_HIGH")
assert price_freq.total_occurrences == 3
assert price_freq.calls_affected == 3
assert price_freq.call_rate == 0.6 # 3/5 calls
# FCR failure drivers
fcr_failure = frequencies["fcr_failure"]
assert len(fcr_failure) == 1 # INCOMPLETE_RESOLUTION
# Agent positive skills
agent_positive = frequencies["agent_positive"]
assert len(agent_positive) == 1 # EMPATHY_SHOWN
def test_calculate_outcome_rates(self, sample_analyses):
"""Test outcome rate calculation with v2.0 metrics."""
calculator = StatisticsCalculator()
rates = calculator.calculate_outcome_rates(sample_analyses)
assert rates["total_calls"] == 5
assert rates["lost_sales_count"] == 3 # Calls with lost sales drivers
assert rates["poor_cx_count"] == 2 # Calls with poor CX drivers
assert rates["both_count"] == 1 # Calls with both
# v2.0: FCR metrics
assert rates["fcr"]["first_call"] == 3
assert rates["fcr"]["repeat_call"] == 2
assert rates["fcr"]["repeat_rate"] == 0.4 # 2/5
# v2.0: Churn metrics
assert rates["churn"]["at_risk"] == 3
assert rates["churn"]["no_risk"] == 2
# v2.0: Agent metrics
assert rates["agent"]["good_performer"] == 2
assert rates["agent"]["needs_improvement"] == 2
assert rates["agent"]["mixed"] == 1
def test_empty_analyses(self):
"""Test with empty analyses list."""
calculator = StatisticsCalculator()
frequencies = calculator.calculate_frequencies([])
assert frequencies["lost_sales"] == []
assert frequencies["poor_cx"] == []
assert frequencies["fcr_failure"] == []
assert frequencies["churn_risk"] == []
def test_conditional_probabilities(self, sample_analyses):
"""Test conditional probability calculation."""
config = AggregationConfig(min_support=1) # Low threshold for test
calculator = StatisticsCalculator(config=config)
probs = calculator.calculate_conditional_probabilities(sample_analyses)
# Should find relationships between drivers
assert len(probs) > 0
class TestSeverityCalculator:
"""Tests for SeverityCalculator."""
def test_get_base_severity(self):
"""Test base severity lookup."""
calculator = SeverityCalculator()
# From taxonomy
assert calculator.get_base_severity("PRICE_TOO_HIGH", "lost_sales") == 0.8
assert calculator.get_base_severity("RUDE_BEHAVIOR", "poor_cx") == 0.9
# Unknown driver
assert calculator.get_base_severity("UNKNOWN", "lost_sales") == 0.5
def test_calculate_severity(self):
"""Test severity calculation."""
calculator = SeverityCalculator()
freq = DriverFrequency(
driver_code="PRICE_TOO_HIGH",
category="lost_sales",
total_occurrences=3,
calls_affected=3,
total_calls_in_batch=5,
occurrence_rate=0.6,
call_rate=0.6,
avg_confidence=0.85,
min_confidence=0.75,
max_confidence=0.9,
commonly_co_occurs_with=["COMPETITOR_PREFERENCE"],
)
severity = calculator.calculate_severity(freq)
assert severity.driver_code == "PRICE_TOO_HIGH"
assert severity.base_severity == 0.8
assert 0 <= severity.severity_score <= 100
assert severity.impact_level in [
ImpactLevel.CRITICAL,
ImpactLevel.HIGH,
ImpactLevel.MEDIUM,
ImpactLevel.LOW,
]
def test_impact_level_thresholds(self):
"""Test impact level determination."""
calculator = SeverityCalculator()
# High severity + high frequency = CRITICAL
high_freq = DriverFrequency(
driver_code="TEST",
category="lost_sales",
total_occurrences=15,
calls_affected=15,
total_calls_in_batch=100,
occurrence_rate=0.15,
call_rate=0.15, # >10%
avg_confidence=0.9,
min_confidence=0.9,
max_confidence=0.9,
)
sev = calculator.calculate_severity(high_freq)
# Should be HIGH or CRITICAL due to high frequency
assert sev.impact_level in [ImpactLevel.CRITICAL, ImpactLevel.HIGH]
class TestRCATreeBuilder:
"""Tests for RCATreeBuilder."""
def test_build_tree(self, sample_analyses):
"""Test RCA tree building."""
builder = RCATreeBuilder()
tree = builder.build("test_batch", sample_analyses)
assert tree.batch_id == "test_batch"
assert tree.total_calls == 5
assert len(tree.lost_sales_root) > 0
assert len(tree.poor_cx_root) > 0
def test_top_drivers(self, sample_analyses):
"""Test top drivers extraction."""
builder = RCATreeBuilder()
tree = builder.build("test_batch", sample_analyses)
# PRICE_TOO_HIGH should be top driver
assert "PRICE_TOO_HIGH" in tree.top_lost_sales_drivers
def test_tree_to_dict(self, sample_analyses):
"""Test tree serialization."""
builder = RCATreeBuilder()
tree = builder.build("test_batch", sample_analyses)
tree_dict = tree.to_dict()
assert "batch_id" in tree_dict
assert "summary" in tree_dict
assert "lost_sales_tree" in tree_dict
assert "poor_cx_tree" in tree_dict
def test_build_aggregation(self, sample_analyses):
"""Test full aggregation building."""
builder = RCATreeBuilder()
agg = builder.build_aggregation("test_batch", sample_analyses)
assert isinstance(agg, BatchAggregation)
assert agg.total_calls_processed == 5
assert agg.successful_analyses == 5
assert agg.rca_tree is not None
class TestConvenienceFunctions:
"""Tests for convenience functions."""
def test_calculate_batch_statistics(self, sample_analyses):
"""Test calculate_batch_statistics function (v2.0 enhanced)."""
stats = calculate_batch_statistics(sample_analyses)
# v1.0 keys
assert "outcome_rates" in stats
assert "lost_sales_frequencies" in stats
assert "poor_cx_frequencies" in stats
# v2.0 keys
assert "fcr_failure_frequencies" in stats
assert "churn_risk_frequencies" in stats
assert "agent_positive_frequencies" in stats
assert "agent_improvement_frequencies" in stats
# v2.0 outcome_rates should have nested dicts
assert "fcr" in stats["outcome_rates"]
assert "churn" in stats["outcome_rates"]
assert "agent" in stats["outcome_rates"]
def test_build_rca_tree_function(self, sample_analyses):
"""Test build_rca_tree function."""
tree = build_rca_tree("test_batch", sample_analyses)
assert isinstance(tree, RCATree)
assert tree.batch_id == "test_batch"
def test_aggregate_batch_function(self, sample_analyses):
"""Test aggregate_batch function."""
agg = aggregate_batch("test_batch", sample_analyses)
assert isinstance(agg, BatchAggregation)
assert agg.batch_id == "test_batch"
class TestRCANode:
"""Tests for RCANode model."""
def test_node_to_dict(self):
"""Test node serialization."""
freq = DriverFrequency(
driver_code="PRICE_TOO_HIGH",
category="lost_sales",
total_occurrences=3,
calls_affected=3,
total_calls_in_batch=5,
occurrence_rate=0.6,
call_rate=0.6,
avg_confidence=0.85,
min_confidence=0.75,
max_confidence=0.9,
)
sev = DriverSeverity(
driver_code="PRICE_TOO_HIGH",
category="lost_sales",
base_severity=0.8,
frequency_factor=0.6,
confidence_factor=0.85,
co_occurrence_factor=0.3,
severity_score=65.0,
impact_level=ImpactLevel.HIGH,
)
node = RCANode(
driver_code="PRICE_TOO_HIGH",
category="lost_sales",
frequency=freq,
severity=sev,
priority_rank=1,
sample_evidence=["Es muy caro para mí"],
)
node_dict = node.to_dict()
assert node_dict["driver_code"] == "PRICE_TOO_HIGH"
assert node_dict["priority_rank"] == 1
assert "frequency" in node_dict
assert "severity" in node_dict
class TestEmergentPatterns:
"""Tests for emergent pattern extraction."""
def test_extract_emergent(self):
"""Test emergent pattern extraction."""
base_observed = ObservedFeatures(audio_duration_sec=60.0, events=[])
base_trace = Traceability(
schema_version="1.0.0",
prompt_version="v1.0",
model_id="gpt-4o-mini",
)
analyses = [
CallAnalysis(
call_id="EMG001",
batch_id="test",
status=ProcessingStatus.SUCCESS,
observed=base_observed,
outcome=CallOutcome.SALE_LOST,
lost_sales_drivers=[
RCALabel(
driver_code="OTHER_EMERGENT",
confidence=0.7,
evidence_spans=[
EvidenceSpan(text="Nuevo patrón", start_time=0, end_time=1)
],
proposed_label="NEW_PATTERN",
)
],
poor_cx_drivers=[],
traceability=base_trace,
)
]
calculator = StatisticsCalculator()
emergent = calculator.extract_emergent_patterns(analyses)
assert len(emergent) == 1
assert emergent[0]["proposed_label"] == "NEW_PATTERN"
assert emergent[0]["occurrences"] == 1