feat: Add Streamlit dashboard with Blueprint compliance (v2.1.0)
Dashboard Features: - 8 navigation sections: Overview, Outcomes, Poor CX, FCR, Churn, Agent, Call Explorer, Export - Beyond Brand Identity styling (colors #6D84E3, Outfit font) - RCA Sankey diagram (Driver → Outcome → Churn Risk flow) - Correlation heatmaps (driver co-occurrence, driver-outcome) - Outcome Deep Dive (root causes, correlation, duration analysis) - Export functionality (Excel, HTML, JSON) Blueprint Compliance: - FCR: 4 categories (Primera Llamada/Rellamada × Sin/Con Riesgo de Fuga) - Churn: Binary view (Sin Riesgo de Fuga / En Riesgo de Fuga) - Agent: Talento Para Replicar / Oportunidades de Mejora - Fixed FCR rate calculation (only FIRST_CALL counts as success) Technical: - Streamlit + Plotly for interactive visualizations - Light theme configuration (.streamlit/config.toml) - Fixed Plotly colorbar titlefont deprecation Documentation: - Updated PROJECT_CONTEXT.md, TODO.md, CHANGELOG.md - Added 4 new technical decisions (TD-014 to TD-017) - Created TROUBLESHOOTING.md with 10 common issues Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
349
src/aggregation/models.py
Normal file
349
src/aggregation/models.py
Normal file
@@ -0,0 +1,349 @@
|
||||
"""
|
||||
CXInsights - Aggregation Models
|
||||
|
||||
Data models for aggregated RCA analysis.
|
||||
Transforms individual call analyses into actionable insights.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Literal
|
||||
|
||||
|
||||
class ImpactLevel(str, Enum):
|
||||
"""Impact level for RCA drivers."""
|
||||
|
||||
CRITICAL = "critical" # >10% of calls, high severity
|
||||
HIGH = "high" # >5% of calls or high severity
|
||||
MEDIUM = "medium" # >2% of calls
|
||||
LOW = "low" # <2% of calls
|
||||
|
||||
|
||||
class TrendDirection(str, Enum):
|
||||
"""Trend direction for time-series analysis."""
|
||||
|
||||
INCREASING = "increasing"
|
||||
STABLE = "stable"
|
||||
DECREASING = "decreasing"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
# Category types for v2.0
|
||||
DriverCategory = Literal[
|
||||
"lost_sales", "poor_cx", "fcr_failure", "churn_risk",
|
||||
"agent_positive", "agent_improvement"
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class DriverFrequency:
|
||||
"""Frequency statistics for a single driver."""
|
||||
|
||||
driver_code: str
|
||||
category: DriverCategory
|
||||
|
||||
# Counts
|
||||
total_occurrences: int
|
||||
calls_affected: int
|
||||
total_calls_in_batch: int
|
||||
|
||||
# Rates
|
||||
occurrence_rate: float # occurrences / total_calls
|
||||
call_rate: float # calls_affected / total_calls
|
||||
|
||||
# Confidence stats
|
||||
avg_confidence: float
|
||||
min_confidence: float
|
||||
max_confidence: float
|
||||
|
||||
# Co-occurrence
|
||||
commonly_co_occurs_with: list[str] = field(default_factory=list)
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate rates."""
|
||||
if not 0 <= self.occurrence_rate <= 1:
|
||||
raise ValueError(f"occurrence_rate must be 0-1, got {self.occurrence_rate}")
|
||||
if not 0 <= self.call_rate <= 1:
|
||||
raise ValueError(f"call_rate must be 0-1, got {self.call_rate}")
|
||||
|
||||
|
||||
@dataclass
|
||||
class DriverSeverity:
|
||||
"""Severity scoring for a driver."""
|
||||
|
||||
driver_code: str
|
||||
category: DriverCategory
|
||||
|
||||
# Base severity from taxonomy
|
||||
base_severity: float # 0-1 from config
|
||||
|
||||
# Computed severity factors
|
||||
frequency_factor: float # Higher frequency = higher impact
|
||||
confidence_factor: float # Higher confidence = more reliable
|
||||
co_occurrence_factor: float # Often with other issues = systemic
|
||||
|
||||
# Final score
|
||||
severity_score: float # 0-100 scale
|
||||
|
||||
# Impact classification
|
||||
impact_level: ImpactLevel
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate severity score."""
|
||||
if not 0 <= self.severity_score <= 100:
|
||||
raise ValueError(f"severity_score must be 0-100, got {self.severity_score}")
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConditionalProbability:
|
||||
"""Conditional probability between drivers."""
|
||||
|
||||
driver_a: str # Given this driver...
|
||||
driver_b: str # ...probability of this driver
|
||||
category_a: DriverCategory
|
||||
category_b: DriverCategory
|
||||
|
||||
# P(B|A) - probability of B given A
|
||||
probability: float
|
||||
support: int # Number of co-occurrences
|
||||
|
||||
# Lift: P(B|A) / P(B) - how much A increases likelihood of B
|
||||
lift: float
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate probability."""
|
||||
if not 0 <= self.probability <= 1:
|
||||
raise ValueError(f"probability must be 0-1, got {self.probability}")
|
||||
|
||||
|
||||
@dataclass
|
||||
class RCANode:
|
||||
"""A node in the RCA tree."""
|
||||
|
||||
driver_code: str
|
||||
category: DriverCategory
|
||||
|
||||
# Statistics
|
||||
frequency: DriverFrequency
|
||||
severity: DriverSeverity
|
||||
|
||||
# Hierarchy
|
||||
parent_code: str | None = None
|
||||
children: list["RCANode"] = field(default_factory=list)
|
||||
|
||||
# Actionability
|
||||
recommended_actions: list[str] = field(default_factory=list)
|
||||
priority_rank: int = 0
|
||||
|
||||
# Evidence summary
|
||||
sample_evidence: list[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary for serialization."""
|
||||
return {
|
||||
"driver_code": self.driver_code,
|
||||
"category": self.category,
|
||||
"frequency": {
|
||||
"total_occurrences": self.frequency.total_occurrences,
|
||||
"calls_affected": self.frequency.calls_affected,
|
||||
"occurrence_rate": self.frequency.occurrence_rate,
|
||||
"call_rate": self.frequency.call_rate,
|
||||
},
|
||||
"severity": {
|
||||
"severity_score": self.severity.severity_score,
|
||||
"impact_level": self.severity.impact_level.value,
|
||||
},
|
||||
"priority_rank": self.priority_rank,
|
||||
"children": [c.to_dict() for c in self.children],
|
||||
"sample_evidence": self.sample_evidence[:3],
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class RCATree:
|
||||
"""Complete RCA tree for a batch."""
|
||||
|
||||
batch_id: str
|
||||
created_at: datetime = field(default_factory=datetime.utcnow)
|
||||
|
||||
# Root nodes (top-level drivers)
|
||||
lost_sales_root: list[RCANode] = field(default_factory=list)
|
||||
poor_cx_root: list[RCANode] = field(default_factory=list)
|
||||
fcr_failure_root: list[RCANode] = field(default_factory=list) # v2.0
|
||||
churn_risk_root: list[RCANode] = field(default_factory=list) # v2.0
|
||||
|
||||
# Summary stats
|
||||
total_calls: int = 0
|
||||
calls_with_lost_sales: int = 0
|
||||
calls_with_poor_cx: int = 0
|
||||
calls_with_both: int = 0
|
||||
|
||||
# FCR stats (v2.0)
|
||||
calls_first_call: int = 0
|
||||
calls_repeat_call: int = 0
|
||||
repeat_call_rate: float = 0.0
|
||||
|
||||
# Churn stats (v2.0)
|
||||
calls_at_risk: int = 0
|
||||
churn_risk_rate: float = 0.0
|
||||
|
||||
# Agent stats (v2.0)
|
||||
agents_good_performer: int = 0
|
||||
agents_needs_improvement: int = 0
|
||||
agents_mixed: int = 0
|
||||
|
||||
# Top drivers by impact
|
||||
top_lost_sales_drivers: list[str] = field(default_factory=list)
|
||||
top_poor_cx_drivers: list[str] = field(default_factory=list)
|
||||
top_fcr_failure_drivers: list[str] = field(default_factory=list) # v2.0
|
||||
top_churn_risk_drivers: list[str] = field(default_factory=list) # v2.0
|
||||
|
||||
# Cross-category patterns
|
||||
conditional_probabilities: list[ConditionalProbability] = field(default_factory=list)
|
||||
|
||||
def get_driver_by_code(self, code: str) -> RCANode | None:
|
||||
"""Find a driver node by code."""
|
||||
all_roots = (
|
||||
self.lost_sales_root + self.poor_cx_root +
|
||||
self.fcr_failure_root + self.churn_risk_root
|
||||
)
|
||||
for node in all_roots:
|
||||
if node.driver_code == code:
|
||||
return node
|
||||
for child in node.children:
|
||||
if child.driver_code == code:
|
||||
return child
|
||||
return None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary for serialization."""
|
||||
return {
|
||||
"batch_id": self.batch_id,
|
||||
"created_at": self.created_at.isoformat(),
|
||||
"summary": {
|
||||
"total_calls": self.total_calls,
|
||||
"calls_with_lost_sales": self.calls_with_lost_sales,
|
||||
"calls_with_poor_cx": self.calls_with_poor_cx,
|
||||
"calls_with_both": self.calls_with_both,
|
||||
"lost_sales_rate": self.calls_with_lost_sales / self.total_calls if self.total_calls > 0 else 0,
|
||||
"poor_cx_rate": self.calls_with_poor_cx / self.total_calls if self.total_calls > 0 else 0,
|
||||
# v2.0 stats
|
||||
"calls_first_call": self.calls_first_call,
|
||||
"calls_repeat_call": self.calls_repeat_call,
|
||||
"repeat_call_rate": self.repeat_call_rate,
|
||||
"calls_at_risk": self.calls_at_risk,
|
||||
"churn_risk_rate": self.churn_risk_rate,
|
||||
"agents_good_performer": self.agents_good_performer,
|
||||
"agents_needs_improvement": self.agents_needs_improvement,
|
||||
},
|
||||
"top_drivers": {
|
||||
"lost_sales": self.top_lost_sales_drivers[:5],
|
||||
"poor_cx": self.top_poor_cx_drivers[:5],
|
||||
"fcr_failure": self.top_fcr_failure_drivers[:5],
|
||||
"churn_risk": self.top_churn_risk_drivers[:5],
|
||||
},
|
||||
"lost_sales_tree": [n.to_dict() for n in self.lost_sales_root],
|
||||
"poor_cx_tree": [n.to_dict() for n in self.poor_cx_root],
|
||||
"fcr_failure_tree": [n.to_dict() for n in self.fcr_failure_root],
|
||||
"churn_risk_tree": [n.to_dict() for n in self.churn_risk_root],
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class BatchAggregation:
|
||||
"""Complete aggregation results for a batch."""
|
||||
|
||||
batch_id: str
|
||||
created_at: datetime = field(default_factory=datetime.utcnow)
|
||||
|
||||
# Input stats
|
||||
total_calls_processed: int = 0
|
||||
successful_analyses: int = 0
|
||||
failed_analyses: int = 0
|
||||
|
||||
# Driver frequencies
|
||||
lost_sales_frequencies: list[DriverFrequency] = field(default_factory=list)
|
||||
poor_cx_frequencies: list[DriverFrequency] = field(default_factory=list)
|
||||
fcr_failure_frequencies: list[DriverFrequency] = field(default_factory=list) # v2.0
|
||||
churn_risk_frequencies: list[DriverFrequency] = field(default_factory=list) # v2.0
|
||||
agent_positive_frequencies: list[DriverFrequency] = field(default_factory=list) # v2.0
|
||||
agent_improvement_frequencies: list[DriverFrequency] = field(default_factory=list) # v2.0
|
||||
|
||||
# Severity scores
|
||||
lost_sales_severities: list[DriverSeverity] = field(default_factory=list)
|
||||
poor_cx_severities: list[DriverSeverity] = field(default_factory=list)
|
||||
fcr_failure_severities: list[DriverSeverity] = field(default_factory=list) # v2.0
|
||||
churn_risk_severities: list[DriverSeverity] = field(default_factory=list) # v2.0
|
||||
|
||||
# RCA Tree
|
||||
rca_tree: RCATree | None = None
|
||||
|
||||
# Emergent patterns (OTHER_EMERGENT analysis)
|
||||
emergent_patterns: list[dict] = field(default_factory=list)
|
||||
|
||||
# v2.0 aggregate stats
|
||||
fcr_stats: dict = field(default_factory=dict)
|
||||
churn_stats: dict = field(default_factory=dict)
|
||||
agent_stats: dict = field(default_factory=dict)
|
||||
|
||||
def get_top_drivers(
|
||||
self,
|
||||
category: DriverCategory,
|
||||
n: int = 5,
|
||||
by: Literal["frequency", "severity"] = "severity",
|
||||
) -> list[str]:
|
||||
"""Get top N drivers by frequency or severity."""
|
||||
freq_map = {
|
||||
"lost_sales": self.lost_sales_frequencies,
|
||||
"poor_cx": self.poor_cx_frequencies,
|
||||
"fcr_failure": self.fcr_failure_frequencies,
|
||||
"churn_risk": self.churn_risk_frequencies,
|
||||
"agent_positive": self.agent_positive_frequencies,
|
||||
"agent_improvement": self.agent_improvement_frequencies,
|
||||
}
|
||||
sev_map = {
|
||||
"lost_sales": self.lost_sales_severities,
|
||||
"poor_cx": self.poor_cx_severities,
|
||||
"fcr_failure": self.fcr_failure_severities,
|
||||
"churn_risk": self.churn_risk_severities,
|
||||
}
|
||||
|
||||
if by == "frequency":
|
||||
items = sorted(
|
||||
freq_map.get(category, []),
|
||||
key=lambda x: x.occurrence_rate,
|
||||
reverse=True,
|
||||
)
|
||||
else:
|
||||
items = sorted(
|
||||
sev_map.get(category, []),
|
||||
key=lambda x: x.severity_score,
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
return [item.driver_code for item in items[:n]]
|
||||
|
||||
|
||||
@dataclass
|
||||
class AggregationConfig:
|
||||
"""Configuration for aggregation."""
|
||||
|
||||
# Severity weights
|
||||
frequency_weight: float = 0.4
|
||||
confidence_weight: float = 0.3
|
||||
co_occurrence_weight: float = 0.3
|
||||
|
||||
# Impact thresholds
|
||||
critical_threshold: float = 0.10 # >10% of calls
|
||||
high_threshold: float = 0.05 # >5% of calls
|
||||
medium_threshold: float = 0.02 # >2% of calls
|
||||
|
||||
# Minimum support for conditional probabilities
|
||||
min_support: int = 5
|
||||
|
||||
# Top N for reports
|
||||
top_n_drivers: int = 10
|
||||
|
||||
# Include emergent patterns
|
||||
include_emergent: bool = True
|
||||
Reference in New Issue
Block a user