Dashboard Features: - 8 navigation sections: Overview, Outcomes, Poor CX, FCR, Churn, Agent, Call Explorer, Export - Beyond Brand Identity styling (colors #6D84E3, Outfit font) - RCA Sankey diagram (Driver → Outcome → Churn Risk flow) - Correlation heatmaps (driver co-occurrence, driver-outcome) - Outcome Deep Dive (root causes, correlation, duration analysis) - Export functionality (Excel, HTML, JSON) Blueprint Compliance: - FCR: 4 categories (Primera Llamada/Rellamada × Sin/Con Riesgo de Fuga) - Churn: Binary view (Sin Riesgo de Fuga / En Riesgo de Fuga) - Agent: Talento Para Replicar / Oportunidades de Mejora - Fixed FCR rate calculation (only FIRST_CALL counts as success) Technical: - Streamlit + Plotly for interactive visualizations - Light theme configuration (.streamlit/config.toml) - Fixed Plotly colorbar titlefont deprecation Documentation: - Updated PROJECT_CONTEXT.md, TODO.md, CHANGELOG.md - Added 4 new technical decisions (TD-014 to TD-017) - Created TROUBLESHOOTING.md with 10 common issues Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
583 lines
19 KiB
Python
583 lines
19 KiB
Python
"""
|
|
CXInsights - Aggregation Module Tests
|
|
|
|
Tests for statistics, severity scoring, and RCA tree building.
|
|
v2.0: Updated with FCR, churn risk, and agent skill tests.
|
|
"""
|
|
|
|
import pytest
|
|
|
|
from src.aggregation import (
|
|
AggregationConfig,
|
|
BatchAggregation,
|
|
DriverFrequency,
|
|
DriverSeverity,
|
|
ImpactLevel,
|
|
RCANode,
|
|
RCATree,
|
|
RCATreeBuilder,
|
|
SeverityCalculator,
|
|
StatisticsCalculator,
|
|
aggregate_batch,
|
|
build_rca_tree,
|
|
calculate_batch_statistics,
|
|
calculate_driver_severities,
|
|
)
|
|
from src.models.call_analysis import (
|
|
AgentClassification,
|
|
AgentSkillIndicator,
|
|
CallAnalysis,
|
|
CallOutcome,
|
|
ChurnRisk,
|
|
EvidenceSpan,
|
|
FCRStatus,
|
|
ObservedFeatures,
|
|
ProcessingStatus,
|
|
RCALabel,
|
|
Traceability,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_analyses():
|
|
"""Create sample call analyses for testing (v2.0 with FCR, churn, agent)."""
|
|
base_observed = ObservedFeatures(
|
|
audio_duration_sec=60.0,
|
|
events=[],
|
|
)
|
|
base_traceability = Traceability(
|
|
schema_version="1.0.0",
|
|
prompt_version="v2.0",
|
|
model_id="gpt-4o-mini",
|
|
)
|
|
|
|
analyses = []
|
|
|
|
# Analysis 1: Lost sale due to price, first call, at risk
|
|
analyses.append(CallAnalysis(
|
|
call_id="CALL001",
|
|
batch_id="test_batch",
|
|
status=ProcessingStatus.SUCCESS,
|
|
observed=base_observed,
|
|
outcome=CallOutcome.SALE_LOST,
|
|
lost_sales_drivers=[
|
|
RCALabel(
|
|
driver_code="PRICE_TOO_HIGH",
|
|
confidence=0.9,
|
|
evidence_spans=[EvidenceSpan(text="Es muy caro", start_time=10, end_time=12)],
|
|
),
|
|
],
|
|
poor_cx_drivers=[],
|
|
fcr_status=FCRStatus.FIRST_CALL,
|
|
churn_risk=ChurnRisk.AT_RISK,
|
|
churn_risk_drivers=[
|
|
RCALabel(
|
|
driver_code="COMPETITOR_MENTION",
|
|
confidence=0.85,
|
|
evidence_spans=[EvidenceSpan(text="Vodafone me ofrece", start_time=20, end_time=22)],
|
|
),
|
|
],
|
|
agent_classification=AgentClassification.NEEDS_IMPROVEMENT,
|
|
traceability=base_traceability,
|
|
))
|
|
|
|
# Analysis 2: Lost sale due to price + competitor, repeat call
|
|
analyses.append(CallAnalysis(
|
|
call_id="CALL002",
|
|
batch_id="test_batch",
|
|
status=ProcessingStatus.SUCCESS,
|
|
observed=base_observed,
|
|
outcome=CallOutcome.SALE_LOST,
|
|
lost_sales_drivers=[
|
|
RCALabel(
|
|
driver_code="PRICE_TOO_HIGH",
|
|
confidence=0.85,
|
|
evidence_spans=[EvidenceSpan(text="Muy caro", start_time=15, end_time=17)],
|
|
),
|
|
RCALabel(
|
|
driver_code="COMPETITOR_PREFERENCE",
|
|
confidence=0.8,
|
|
evidence_spans=[EvidenceSpan(text="La competencia ofrece mejor", start_time=20, end_time=23)],
|
|
),
|
|
],
|
|
poor_cx_drivers=[],
|
|
fcr_status=FCRStatus.REPEAT_CALL,
|
|
fcr_failure_drivers=[
|
|
RCALabel(
|
|
driver_code="INCOMPLETE_RESOLUTION",
|
|
confidence=0.8,
|
|
evidence_spans=[EvidenceSpan(text="Ya llamé antes", start_time=5, end_time=7)],
|
|
),
|
|
],
|
|
churn_risk=ChurnRisk.AT_RISK,
|
|
agent_classification=AgentClassification.MIXED,
|
|
traceability=base_traceability,
|
|
))
|
|
|
|
# Analysis 3: Poor CX - long hold, first call, good agent
|
|
analyses.append(CallAnalysis(
|
|
call_id="CALL003",
|
|
batch_id="test_batch",
|
|
status=ProcessingStatus.SUCCESS,
|
|
observed=base_observed,
|
|
outcome=CallOutcome.INQUIRY_RESOLVED,
|
|
lost_sales_drivers=[],
|
|
poor_cx_drivers=[
|
|
RCALabel(
|
|
driver_code="LONG_HOLD",
|
|
confidence=0.95,
|
|
evidence_spans=[EvidenceSpan(text="Mucho tiempo esperando", start_time=5, end_time=8)],
|
|
),
|
|
],
|
|
fcr_status=FCRStatus.FIRST_CALL,
|
|
churn_risk=ChurnRisk.NO_RISK,
|
|
agent_classification=AgentClassification.GOOD_PERFORMER,
|
|
agent_positive_skills=[
|
|
AgentSkillIndicator(
|
|
skill_code="EMPATHY_SHOWN",
|
|
skill_type="positive",
|
|
confidence=0.9,
|
|
evidence_spans=[EvidenceSpan(text="Entiendo su frustración", start_time=10, end_time=12)],
|
|
description="Agent showed empathy",
|
|
),
|
|
],
|
|
traceability=base_traceability,
|
|
))
|
|
|
|
# Analysis 4: Both lost sale and poor CX, repeat call
|
|
analyses.append(CallAnalysis(
|
|
call_id="CALL004",
|
|
batch_id="test_batch",
|
|
status=ProcessingStatus.SUCCESS,
|
|
observed=base_observed,
|
|
outcome=CallOutcome.SALE_LOST,
|
|
lost_sales_drivers=[
|
|
RCALabel(
|
|
driver_code="PRICE_TOO_HIGH",
|
|
confidence=0.75,
|
|
evidence_spans=[EvidenceSpan(text="No puedo pagar", start_time=30, end_time=32)],
|
|
),
|
|
],
|
|
poor_cx_drivers=[
|
|
RCALabel(
|
|
driver_code="LOW_EMPATHY",
|
|
confidence=0.7,
|
|
evidence_spans=[EvidenceSpan(text="No me escucha", start_time=25, end_time=27)],
|
|
),
|
|
],
|
|
fcr_status=FCRStatus.REPEAT_CALL,
|
|
churn_risk=ChurnRisk.AT_RISK,
|
|
agent_classification=AgentClassification.NEEDS_IMPROVEMENT,
|
|
agent_improvement_areas=[
|
|
AgentSkillIndicator(
|
|
skill_code="POOR_CLOSING",
|
|
skill_type="improvement_needed",
|
|
confidence=0.8,
|
|
evidence_spans=[EvidenceSpan(text="Bueno, pues llame otro día", start_time=50, end_time=53)],
|
|
description="Agent failed to close",
|
|
),
|
|
],
|
|
traceability=base_traceability,
|
|
))
|
|
|
|
# Analysis 5: Successful sale (no issues), first call, good agent
|
|
analyses.append(CallAnalysis(
|
|
call_id="CALL005",
|
|
batch_id="test_batch",
|
|
status=ProcessingStatus.SUCCESS,
|
|
observed=base_observed,
|
|
outcome=CallOutcome.SALE_COMPLETED,
|
|
lost_sales_drivers=[],
|
|
poor_cx_drivers=[],
|
|
fcr_status=FCRStatus.FIRST_CALL,
|
|
churn_risk=ChurnRisk.NO_RISK,
|
|
agent_classification=AgentClassification.GOOD_PERFORMER,
|
|
traceability=base_traceability,
|
|
))
|
|
|
|
return analyses
|
|
|
|
|
|
class TestDriverFrequency:
|
|
"""Tests for DriverFrequency model."""
|
|
|
|
def test_valid_frequency(self):
|
|
"""Test valid frequency creation."""
|
|
freq = DriverFrequency(
|
|
driver_code="PRICE_TOO_HIGH",
|
|
category="lost_sales",
|
|
total_occurrences=3,
|
|
calls_affected=3,
|
|
total_calls_in_batch=5,
|
|
occurrence_rate=0.6,
|
|
call_rate=0.6,
|
|
avg_confidence=0.83,
|
|
min_confidence=0.75,
|
|
max_confidence=0.9,
|
|
)
|
|
|
|
assert freq.driver_code == "PRICE_TOO_HIGH"
|
|
assert freq.occurrence_rate == 0.6
|
|
|
|
def test_invalid_rate(self):
|
|
"""Test that invalid rates raise error."""
|
|
with pytest.raises(ValueError):
|
|
DriverFrequency(
|
|
driver_code="TEST",
|
|
category="lost_sales",
|
|
total_occurrences=1,
|
|
calls_affected=1,
|
|
total_calls_in_batch=5,
|
|
occurrence_rate=1.5, # Invalid!
|
|
call_rate=0.2,
|
|
avg_confidence=0.8,
|
|
min_confidence=0.8,
|
|
max_confidence=0.8,
|
|
)
|
|
|
|
|
|
class TestDriverSeverity:
|
|
"""Tests for DriverSeverity model."""
|
|
|
|
def test_valid_severity(self):
|
|
"""Test valid severity creation."""
|
|
sev = DriverSeverity(
|
|
driver_code="PRICE_TOO_HIGH",
|
|
category="lost_sales",
|
|
base_severity=0.8,
|
|
frequency_factor=0.6,
|
|
confidence_factor=0.85,
|
|
co_occurrence_factor=0.3,
|
|
severity_score=65.0,
|
|
impact_level=ImpactLevel.HIGH,
|
|
)
|
|
|
|
assert sev.severity_score == 65.0
|
|
assert sev.impact_level == ImpactLevel.HIGH
|
|
|
|
def test_invalid_severity_score(self):
|
|
"""Test that invalid severity score raises error."""
|
|
with pytest.raises(ValueError):
|
|
DriverSeverity(
|
|
driver_code="TEST",
|
|
category="lost_sales",
|
|
base_severity=0.5,
|
|
frequency_factor=0.5,
|
|
confidence_factor=0.5,
|
|
co_occurrence_factor=0.5,
|
|
severity_score=150.0, # Invalid!
|
|
impact_level=ImpactLevel.HIGH,
|
|
)
|
|
|
|
|
|
class TestStatisticsCalculator:
|
|
"""Tests for StatisticsCalculator."""
|
|
|
|
def test_calculate_frequencies(self, sample_analyses):
|
|
"""Test frequency calculation (v2.0 dict format)."""
|
|
calculator = StatisticsCalculator()
|
|
frequencies = calculator.calculate_frequencies(sample_analyses)
|
|
|
|
# Check all categories are present
|
|
assert "lost_sales" in frequencies
|
|
assert "poor_cx" in frequencies
|
|
assert "fcr_failure" in frequencies
|
|
assert "churn_risk" in frequencies
|
|
assert "agent_positive" in frequencies
|
|
assert "agent_improvement" in frequencies
|
|
|
|
# PRICE_TOO_HIGH appears in 3 calls
|
|
lost_sales = frequencies["lost_sales"]
|
|
price_freq = next(f for f in lost_sales if f.driver_code == "PRICE_TOO_HIGH")
|
|
assert price_freq.total_occurrences == 3
|
|
assert price_freq.calls_affected == 3
|
|
assert price_freq.call_rate == 0.6 # 3/5 calls
|
|
|
|
# FCR failure drivers
|
|
fcr_failure = frequencies["fcr_failure"]
|
|
assert len(fcr_failure) == 1 # INCOMPLETE_RESOLUTION
|
|
|
|
# Agent positive skills
|
|
agent_positive = frequencies["agent_positive"]
|
|
assert len(agent_positive) == 1 # EMPATHY_SHOWN
|
|
|
|
def test_calculate_outcome_rates(self, sample_analyses):
|
|
"""Test outcome rate calculation with v2.0 metrics."""
|
|
calculator = StatisticsCalculator()
|
|
rates = calculator.calculate_outcome_rates(sample_analyses)
|
|
|
|
assert rates["total_calls"] == 5
|
|
assert rates["lost_sales_count"] == 3 # Calls with lost sales drivers
|
|
assert rates["poor_cx_count"] == 2 # Calls with poor CX drivers
|
|
assert rates["both_count"] == 1 # Calls with both
|
|
|
|
# v2.0: FCR metrics
|
|
assert rates["fcr"]["first_call"] == 3
|
|
assert rates["fcr"]["repeat_call"] == 2
|
|
assert rates["fcr"]["repeat_rate"] == 0.4 # 2/5
|
|
|
|
# v2.0: Churn metrics
|
|
assert rates["churn"]["at_risk"] == 3
|
|
assert rates["churn"]["no_risk"] == 2
|
|
|
|
# v2.0: Agent metrics
|
|
assert rates["agent"]["good_performer"] == 2
|
|
assert rates["agent"]["needs_improvement"] == 2
|
|
assert rates["agent"]["mixed"] == 1
|
|
|
|
def test_empty_analyses(self):
|
|
"""Test with empty analyses list."""
|
|
calculator = StatisticsCalculator()
|
|
frequencies = calculator.calculate_frequencies([])
|
|
|
|
assert frequencies["lost_sales"] == []
|
|
assert frequencies["poor_cx"] == []
|
|
assert frequencies["fcr_failure"] == []
|
|
assert frequencies["churn_risk"] == []
|
|
|
|
def test_conditional_probabilities(self, sample_analyses):
|
|
"""Test conditional probability calculation."""
|
|
config = AggregationConfig(min_support=1) # Low threshold for test
|
|
calculator = StatisticsCalculator(config=config)
|
|
probs = calculator.calculate_conditional_probabilities(sample_analyses)
|
|
|
|
# Should find relationships between drivers
|
|
assert len(probs) > 0
|
|
|
|
|
|
class TestSeverityCalculator:
|
|
"""Tests for SeverityCalculator."""
|
|
|
|
def test_get_base_severity(self):
|
|
"""Test base severity lookup."""
|
|
calculator = SeverityCalculator()
|
|
|
|
# From taxonomy
|
|
assert calculator.get_base_severity("PRICE_TOO_HIGH", "lost_sales") == 0.8
|
|
assert calculator.get_base_severity("RUDE_BEHAVIOR", "poor_cx") == 0.9
|
|
|
|
# Unknown driver
|
|
assert calculator.get_base_severity("UNKNOWN", "lost_sales") == 0.5
|
|
|
|
def test_calculate_severity(self):
|
|
"""Test severity calculation."""
|
|
calculator = SeverityCalculator()
|
|
|
|
freq = DriverFrequency(
|
|
driver_code="PRICE_TOO_HIGH",
|
|
category="lost_sales",
|
|
total_occurrences=3,
|
|
calls_affected=3,
|
|
total_calls_in_batch=5,
|
|
occurrence_rate=0.6,
|
|
call_rate=0.6,
|
|
avg_confidence=0.85,
|
|
min_confidence=0.75,
|
|
max_confidence=0.9,
|
|
commonly_co_occurs_with=["COMPETITOR_PREFERENCE"],
|
|
)
|
|
|
|
severity = calculator.calculate_severity(freq)
|
|
|
|
assert severity.driver_code == "PRICE_TOO_HIGH"
|
|
assert severity.base_severity == 0.8
|
|
assert 0 <= severity.severity_score <= 100
|
|
assert severity.impact_level in [
|
|
ImpactLevel.CRITICAL,
|
|
ImpactLevel.HIGH,
|
|
ImpactLevel.MEDIUM,
|
|
ImpactLevel.LOW,
|
|
]
|
|
|
|
def test_impact_level_thresholds(self):
|
|
"""Test impact level determination."""
|
|
calculator = SeverityCalculator()
|
|
|
|
# High severity + high frequency = CRITICAL
|
|
high_freq = DriverFrequency(
|
|
driver_code="TEST",
|
|
category="lost_sales",
|
|
total_occurrences=15,
|
|
calls_affected=15,
|
|
total_calls_in_batch=100,
|
|
occurrence_rate=0.15,
|
|
call_rate=0.15, # >10%
|
|
avg_confidence=0.9,
|
|
min_confidence=0.9,
|
|
max_confidence=0.9,
|
|
)
|
|
|
|
sev = calculator.calculate_severity(high_freq)
|
|
# Should be HIGH or CRITICAL due to high frequency
|
|
assert sev.impact_level in [ImpactLevel.CRITICAL, ImpactLevel.HIGH]
|
|
|
|
|
|
class TestRCATreeBuilder:
|
|
"""Tests for RCATreeBuilder."""
|
|
|
|
def test_build_tree(self, sample_analyses):
|
|
"""Test RCA tree building."""
|
|
builder = RCATreeBuilder()
|
|
tree = builder.build("test_batch", sample_analyses)
|
|
|
|
assert tree.batch_id == "test_batch"
|
|
assert tree.total_calls == 5
|
|
assert len(tree.lost_sales_root) > 0
|
|
assert len(tree.poor_cx_root) > 0
|
|
|
|
def test_top_drivers(self, sample_analyses):
|
|
"""Test top drivers extraction."""
|
|
builder = RCATreeBuilder()
|
|
tree = builder.build("test_batch", sample_analyses)
|
|
|
|
# PRICE_TOO_HIGH should be top driver
|
|
assert "PRICE_TOO_HIGH" in tree.top_lost_sales_drivers
|
|
|
|
def test_tree_to_dict(self, sample_analyses):
|
|
"""Test tree serialization."""
|
|
builder = RCATreeBuilder()
|
|
tree = builder.build("test_batch", sample_analyses)
|
|
|
|
tree_dict = tree.to_dict()
|
|
|
|
assert "batch_id" in tree_dict
|
|
assert "summary" in tree_dict
|
|
assert "lost_sales_tree" in tree_dict
|
|
assert "poor_cx_tree" in tree_dict
|
|
|
|
def test_build_aggregation(self, sample_analyses):
|
|
"""Test full aggregation building."""
|
|
builder = RCATreeBuilder()
|
|
agg = builder.build_aggregation("test_batch", sample_analyses)
|
|
|
|
assert isinstance(agg, BatchAggregation)
|
|
assert agg.total_calls_processed == 5
|
|
assert agg.successful_analyses == 5
|
|
assert agg.rca_tree is not None
|
|
|
|
|
|
class TestConvenienceFunctions:
|
|
"""Tests for convenience functions."""
|
|
|
|
def test_calculate_batch_statistics(self, sample_analyses):
|
|
"""Test calculate_batch_statistics function (v2.0 enhanced)."""
|
|
stats = calculate_batch_statistics(sample_analyses)
|
|
|
|
# v1.0 keys
|
|
assert "outcome_rates" in stats
|
|
assert "lost_sales_frequencies" in stats
|
|
assert "poor_cx_frequencies" in stats
|
|
|
|
# v2.0 keys
|
|
assert "fcr_failure_frequencies" in stats
|
|
assert "churn_risk_frequencies" in stats
|
|
assert "agent_positive_frequencies" in stats
|
|
assert "agent_improvement_frequencies" in stats
|
|
|
|
# v2.0 outcome_rates should have nested dicts
|
|
assert "fcr" in stats["outcome_rates"]
|
|
assert "churn" in stats["outcome_rates"]
|
|
assert "agent" in stats["outcome_rates"]
|
|
|
|
def test_build_rca_tree_function(self, sample_analyses):
|
|
"""Test build_rca_tree function."""
|
|
tree = build_rca_tree("test_batch", sample_analyses)
|
|
|
|
assert isinstance(tree, RCATree)
|
|
assert tree.batch_id == "test_batch"
|
|
|
|
def test_aggregate_batch_function(self, sample_analyses):
|
|
"""Test aggregate_batch function."""
|
|
agg = aggregate_batch("test_batch", sample_analyses)
|
|
|
|
assert isinstance(agg, BatchAggregation)
|
|
assert agg.batch_id == "test_batch"
|
|
|
|
|
|
class TestRCANode:
|
|
"""Tests for RCANode model."""
|
|
|
|
def test_node_to_dict(self):
|
|
"""Test node serialization."""
|
|
freq = DriverFrequency(
|
|
driver_code="PRICE_TOO_HIGH",
|
|
category="lost_sales",
|
|
total_occurrences=3,
|
|
calls_affected=3,
|
|
total_calls_in_batch=5,
|
|
occurrence_rate=0.6,
|
|
call_rate=0.6,
|
|
avg_confidence=0.85,
|
|
min_confidence=0.75,
|
|
max_confidence=0.9,
|
|
)
|
|
|
|
sev = DriverSeverity(
|
|
driver_code="PRICE_TOO_HIGH",
|
|
category="lost_sales",
|
|
base_severity=0.8,
|
|
frequency_factor=0.6,
|
|
confidence_factor=0.85,
|
|
co_occurrence_factor=0.3,
|
|
severity_score=65.0,
|
|
impact_level=ImpactLevel.HIGH,
|
|
)
|
|
|
|
node = RCANode(
|
|
driver_code="PRICE_TOO_HIGH",
|
|
category="lost_sales",
|
|
frequency=freq,
|
|
severity=sev,
|
|
priority_rank=1,
|
|
sample_evidence=["Es muy caro para mí"],
|
|
)
|
|
|
|
node_dict = node.to_dict()
|
|
|
|
assert node_dict["driver_code"] == "PRICE_TOO_HIGH"
|
|
assert node_dict["priority_rank"] == 1
|
|
assert "frequency" in node_dict
|
|
assert "severity" in node_dict
|
|
|
|
|
|
class TestEmergentPatterns:
|
|
"""Tests for emergent pattern extraction."""
|
|
|
|
def test_extract_emergent(self):
|
|
"""Test emergent pattern extraction."""
|
|
base_observed = ObservedFeatures(audio_duration_sec=60.0, events=[])
|
|
base_trace = Traceability(
|
|
schema_version="1.0.0",
|
|
prompt_version="v1.0",
|
|
model_id="gpt-4o-mini",
|
|
)
|
|
|
|
analyses = [
|
|
CallAnalysis(
|
|
call_id="EMG001",
|
|
batch_id="test",
|
|
status=ProcessingStatus.SUCCESS,
|
|
observed=base_observed,
|
|
outcome=CallOutcome.SALE_LOST,
|
|
lost_sales_drivers=[
|
|
RCALabel(
|
|
driver_code="OTHER_EMERGENT",
|
|
confidence=0.7,
|
|
evidence_spans=[
|
|
EvidenceSpan(text="Nuevo patrón", start_time=0, end_time=1)
|
|
],
|
|
proposed_label="NEW_PATTERN",
|
|
)
|
|
],
|
|
poor_cx_drivers=[],
|
|
traceability=base_trace,
|
|
)
|
|
]
|
|
|
|
calculator = StatisticsCalculator()
|
|
emergent = calculator.extract_emergent_patterns(analyses)
|
|
|
|
assert len(emergent) == 1
|
|
assert emergent[0]["proposed_label"] == "NEW_PATTERN"
|
|
assert emergent[0]["occurrences"] == 1
|