Dashboard Features: - 8 navigation sections: Overview, Outcomes, Poor CX, FCR, Churn, Agent, Call Explorer, Export - Beyond Brand Identity styling (colors #6D84E3, Outfit font) - RCA Sankey diagram (Driver → Outcome → Churn Risk flow) - Correlation heatmaps (driver co-occurrence, driver-outcome) - Outcome Deep Dive (root causes, correlation, duration analysis) - Export functionality (Excel, HTML, JSON) Blueprint Compliance: - FCR: 4 categories (Primera Llamada/Rellamada × Sin/Con Riesgo de Fuga) - Churn: Binary view (Sin Riesgo de Fuga / En Riesgo de Fuga) - Agent: Talento Para Replicar / Oportunidades de Mejora - Fixed FCR rate calculation (only FIRST_CALL counts as success) Technical: - Streamlit + Plotly for interactive visualizations - Light theme configuration (.streamlit/config.toml) - Fixed Plotly colorbar titlefont deprecation Documentation: - Updated PROJECT_CONTEXT.md, TODO.md, CHANGELOG.md - Added 4 new technical decisions (TD-014 to TD-017) - Created TROUBLESHOOTING.md with 10 common issues Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
350 lines
11 KiB
Python
350 lines
11 KiB
Python
"""
|
|
CXInsights - Aggregation Models
|
|
|
|
Data models for aggregated RCA analysis.
|
|
Transforms individual call analyses into actionable insights.
|
|
"""
|
|
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
from typing import Literal
|
|
|
|
|
|
class ImpactLevel(str, Enum):
|
|
"""Impact level for RCA drivers."""
|
|
|
|
CRITICAL = "critical" # >10% of calls, high severity
|
|
HIGH = "high" # >5% of calls or high severity
|
|
MEDIUM = "medium" # >2% of calls
|
|
LOW = "low" # <2% of calls
|
|
|
|
|
|
class TrendDirection(str, Enum):
|
|
"""Trend direction for time-series analysis."""
|
|
|
|
INCREASING = "increasing"
|
|
STABLE = "stable"
|
|
DECREASING = "decreasing"
|
|
UNKNOWN = "unknown"
|
|
|
|
|
|
# Category types for v2.0
|
|
DriverCategory = Literal[
|
|
"lost_sales", "poor_cx", "fcr_failure", "churn_risk",
|
|
"agent_positive", "agent_improvement"
|
|
]
|
|
|
|
|
|
@dataclass
|
|
class DriverFrequency:
|
|
"""Frequency statistics for a single driver."""
|
|
|
|
driver_code: str
|
|
category: DriverCategory
|
|
|
|
# Counts
|
|
total_occurrences: int
|
|
calls_affected: int
|
|
total_calls_in_batch: int
|
|
|
|
# Rates
|
|
occurrence_rate: float # occurrences / total_calls
|
|
call_rate: float # calls_affected / total_calls
|
|
|
|
# Confidence stats
|
|
avg_confidence: float
|
|
min_confidence: float
|
|
max_confidence: float
|
|
|
|
# Co-occurrence
|
|
commonly_co_occurs_with: list[str] = field(default_factory=list)
|
|
|
|
def __post_init__(self):
|
|
"""Validate rates."""
|
|
if not 0 <= self.occurrence_rate <= 1:
|
|
raise ValueError(f"occurrence_rate must be 0-1, got {self.occurrence_rate}")
|
|
if not 0 <= self.call_rate <= 1:
|
|
raise ValueError(f"call_rate must be 0-1, got {self.call_rate}")
|
|
|
|
|
|
@dataclass
|
|
class DriverSeverity:
|
|
"""Severity scoring for a driver."""
|
|
|
|
driver_code: str
|
|
category: DriverCategory
|
|
|
|
# Base severity from taxonomy
|
|
base_severity: float # 0-1 from config
|
|
|
|
# Computed severity factors
|
|
frequency_factor: float # Higher frequency = higher impact
|
|
confidence_factor: float # Higher confidence = more reliable
|
|
co_occurrence_factor: float # Often with other issues = systemic
|
|
|
|
# Final score
|
|
severity_score: float # 0-100 scale
|
|
|
|
# Impact classification
|
|
impact_level: ImpactLevel
|
|
|
|
def __post_init__(self):
|
|
"""Validate severity score."""
|
|
if not 0 <= self.severity_score <= 100:
|
|
raise ValueError(f"severity_score must be 0-100, got {self.severity_score}")
|
|
|
|
|
|
@dataclass
|
|
class ConditionalProbability:
|
|
"""Conditional probability between drivers."""
|
|
|
|
driver_a: str # Given this driver...
|
|
driver_b: str # ...probability of this driver
|
|
category_a: DriverCategory
|
|
category_b: DriverCategory
|
|
|
|
# P(B|A) - probability of B given A
|
|
probability: float
|
|
support: int # Number of co-occurrences
|
|
|
|
# Lift: P(B|A) / P(B) - how much A increases likelihood of B
|
|
lift: float
|
|
|
|
def __post_init__(self):
|
|
"""Validate probability."""
|
|
if not 0 <= self.probability <= 1:
|
|
raise ValueError(f"probability must be 0-1, got {self.probability}")
|
|
|
|
|
|
@dataclass
|
|
class RCANode:
|
|
"""A node in the RCA tree."""
|
|
|
|
driver_code: str
|
|
category: DriverCategory
|
|
|
|
# Statistics
|
|
frequency: DriverFrequency
|
|
severity: DriverSeverity
|
|
|
|
# Hierarchy
|
|
parent_code: str | None = None
|
|
children: list["RCANode"] = field(default_factory=list)
|
|
|
|
# Actionability
|
|
recommended_actions: list[str] = field(default_factory=list)
|
|
priority_rank: int = 0
|
|
|
|
# Evidence summary
|
|
sample_evidence: list[str] = field(default_factory=list)
|
|
|
|
def to_dict(self) -> dict:
|
|
"""Convert to dictionary for serialization."""
|
|
return {
|
|
"driver_code": self.driver_code,
|
|
"category": self.category,
|
|
"frequency": {
|
|
"total_occurrences": self.frequency.total_occurrences,
|
|
"calls_affected": self.frequency.calls_affected,
|
|
"occurrence_rate": self.frequency.occurrence_rate,
|
|
"call_rate": self.frequency.call_rate,
|
|
},
|
|
"severity": {
|
|
"severity_score": self.severity.severity_score,
|
|
"impact_level": self.severity.impact_level.value,
|
|
},
|
|
"priority_rank": self.priority_rank,
|
|
"children": [c.to_dict() for c in self.children],
|
|
"sample_evidence": self.sample_evidence[:3],
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class RCATree:
|
|
"""Complete RCA tree for a batch."""
|
|
|
|
batch_id: str
|
|
created_at: datetime = field(default_factory=datetime.utcnow)
|
|
|
|
# Root nodes (top-level drivers)
|
|
lost_sales_root: list[RCANode] = field(default_factory=list)
|
|
poor_cx_root: list[RCANode] = field(default_factory=list)
|
|
fcr_failure_root: list[RCANode] = field(default_factory=list) # v2.0
|
|
churn_risk_root: list[RCANode] = field(default_factory=list) # v2.0
|
|
|
|
# Summary stats
|
|
total_calls: int = 0
|
|
calls_with_lost_sales: int = 0
|
|
calls_with_poor_cx: int = 0
|
|
calls_with_both: int = 0
|
|
|
|
# FCR stats (v2.0)
|
|
calls_first_call: int = 0
|
|
calls_repeat_call: int = 0
|
|
repeat_call_rate: float = 0.0
|
|
|
|
# Churn stats (v2.0)
|
|
calls_at_risk: int = 0
|
|
churn_risk_rate: float = 0.0
|
|
|
|
# Agent stats (v2.0)
|
|
agents_good_performer: int = 0
|
|
agents_needs_improvement: int = 0
|
|
agents_mixed: int = 0
|
|
|
|
# Top drivers by impact
|
|
top_lost_sales_drivers: list[str] = field(default_factory=list)
|
|
top_poor_cx_drivers: list[str] = field(default_factory=list)
|
|
top_fcr_failure_drivers: list[str] = field(default_factory=list) # v2.0
|
|
top_churn_risk_drivers: list[str] = field(default_factory=list) # v2.0
|
|
|
|
# Cross-category patterns
|
|
conditional_probabilities: list[ConditionalProbability] = field(default_factory=list)
|
|
|
|
def get_driver_by_code(self, code: str) -> RCANode | None:
|
|
"""Find a driver node by code."""
|
|
all_roots = (
|
|
self.lost_sales_root + self.poor_cx_root +
|
|
self.fcr_failure_root + self.churn_risk_root
|
|
)
|
|
for node in all_roots:
|
|
if node.driver_code == code:
|
|
return node
|
|
for child in node.children:
|
|
if child.driver_code == code:
|
|
return child
|
|
return None
|
|
|
|
def to_dict(self) -> dict:
|
|
"""Convert to dictionary for serialization."""
|
|
return {
|
|
"batch_id": self.batch_id,
|
|
"created_at": self.created_at.isoformat(),
|
|
"summary": {
|
|
"total_calls": self.total_calls,
|
|
"calls_with_lost_sales": self.calls_with_lost_sales,
|
|
"calls_with_poor_cx": self.calls_with_poor_cx,
|
|
"calls_with_both": self.calls_with_both,
|
|
"lost_sales_rate": self.calls_with_lost_sales / self.total_calls if self.total_calls > 0 else 0,
|
|
"poor_cx_rate": self.calls_with_poor_cx / self.total_calls if self.total_calls > 0 else 0,
|
|
# v2.0 stats
|
|
"calls_first_call": self.calls_first_call,
|
|
"calls_repeat_call": self.calls_repeat_call,
|
|
"repeat_call_rate": self.repeat_call_rate,
|
|
"calls_at_risk": self.calls_at_risk,
|
|
"churn_risk_rate": self.churn_risk_rate,
|
|
"agents_good_performer": self.agents_good_performer,
|
|
"agents_needs_improvement": self.agents_needs_improvement,
|
|
},
|
|
"top_drivers": {
|
|
"lost_sales": self.top_lost_sales_drivers[:5],
|
|
"poor_cx": self.top_poor_cx_drivers[:5],
|
|
"fcr_failure": self.top_fcr_failure_drivers[:5],
|
|
"churn_risk": self.top_churn_risk_drivers[:5],
|
|
},
|
|
"lost_sales_tree": [n.to_dict() for n in self.lost_sales_root],
|
|
"poor_cx_tree": [n.to_dict() for n in self.poor_cx_root],
|
|
"fcr_failure_tree": [n.to_dict() for n in self.fcr_failure_root],
|
|
"churn_risk_tree": [n.to_dict() for n in self.churn_risk_root],
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class BatchAggregation:
|
|
"""Complete aggregation results for a batch."""
|
|
|
|
batch_id: str
|
|
created_at: datetime = field(default_factory=datetime.utcnow)
|
|
|
|
# Input stats
|
|
total_calls_processed: int = 0
|
|
successful_analyses: int = 0
|
|
failed_analyses: int = 0
|
|
|
|
# Driver frequencies
|
|
lost_sales_frequencies: list[DriverFrequency] = field(default_factory=list)
|
|
poor_cx_frequencies: list[DriverFrequency] = field(default_factory=list)
|
|
fcr_failure_frequencies: list[DriverFrequency] = field(default_factory=list) # v2.0
|
|
churn_risk_frequencies: list[DriverFrequency] = field(default_factory=list) # v2.0
|
|
agent_positive_frequencies: list[DriverFrequency] = field(default_factory=list) # v2.0
|
|
agent_improvement_frequencies: list[DriverFrequency] = field(default_factory=list) # v2.0
|
|
|
|
# Severity scores
|
|
lost_sales_severities: list[DriverSeverity] = field(default_factory=list)
|
|
poor_cx_severities: list[DriverSeverity] = field(default_factory=list)
|
|
fcr_failure_severities: list[DriverSeverity] = field(default_factory=list) # v2.0
|
|
churn_risk_severities: list[DriverSeverity] = field(default_factory=list) # v2.0
|
|
|
|
# RCA Tree
|
|
rca_tree: RCATree | None = None
|
|
|
|
# Emergent patterns (OTHER_EMERGENT analysis)
|
|
emergent_patterns: list[dict] = field(default_factory=list)
|
|
|
|
# v2.0 aggregate stats
|
|
fcr_stats: dict = field(default_factory=dict)
|
|
churn_stats: dict = field(default_factory=dict)
|
|
agent_stats: dict = field(default_factory=dict)
|
|
|
|
def get_top_drivers(
|
|
self,
|
|
category: DriverCategory,
|
|
n: int = 5,
|
|
by: Literal["frequency", "severity"] = "severity",
|
|
) -> list[str]:
|
|
"""Get top N drivers by frequency or severity."""
|
|
freq_map = {
|
|
"lost_sales": self.lost_sales_frequencies,
|
|
"poor_cx": self.poor_cx_frequencies,
|
|
"fcr_failure": self.fcr_failure_frequencies,
|
|
"churn_risk": self.churn_risk_frequencies,
|
|
"agent_positive": self.agent_positive_frequencies,
|
|
"agent_improvement": self.agent_improvement_frequencies,
|
|
}
|
|
sev_map = {
|
|
"lost_sales": self.lost_sales_severities,
|
|
"poor_cx": self.poor_cx_severities,
|
|
"fcr_failure": self.fcr_failure_severities,
|
|
"churn_risk": self.churn_risk_severities,
|
|
}
|
|
|
|
if by == "frequency":
|
|
items = sorted(
|
|
freq_map.get(category, []),
|
|
key=lambda x: x.occurrence_rate,
|
|
reverse=True,
|
|
)
|
|
else:
|
|
items = sorted(
|
|
sev_map.get(category, []),
|
|
key=lambda x: x.severity_score,
|
|
reverse=True,
|
|
)
|
|
|
|
return [item.driver_code for item in items[:n]]
|
|
|
|
|
|
@dataclass
|
|
class AggregationConfig:
|
|
"""Configuration for aggregation."""
|
|
|
|
# Severity weights
|
|
frequency_weight: float = 0.4
|
|
confidence_weight: float = 0.3
|
|
co_occurrence_weight: float = 0.3
|
|
|
|
# Impact thresholds
|
|
critical_threshold: float = 0.10 # >10% of calls
|
|
high_threshold: float = 0.05 # >5% of calls
|
|
medium_threshold: float = 0.02 # >2% of calls
|
|
|
|
# Minimum support for conditional probabilities
|
|
min_support: int = 5
|
|
|
|
# Top N for reports
|
|
top_n_drivers: int = 10
|
|
|
|
# Include emergent patterns
|
|
include_emergent: bool = True
|