feat: Add Streamlit dashboard with Blueprint compliance (v2.1.0)

Dashboard Features:
- 8 navigation sections: Overview, Outcomes, Poor CX, FCR, Churn, Agent, Call Explorer, Export
- Beyond Brand Identity styling (colors #6D84E3, Outfit font)
- RCA Sankey diagram (Driver → Outcome → Churn Risk flow)
- Correlation heatmaps (driver co-occurrence, driver-outcome)
- Outcome Deep Dive (root causes, correlation, duration analysis)
- Export functionality (Excel, HTML, JSON)

Blueprint Compliance:
- FCR: 4 categories (Primera Llamada/Rellamada × Sin/Con Riesgo de Fuga)
- Churn: Binary view (Sin Riesgo de Fuga / En Riesgo de Fuga)
- Agent: Talento Para Replicar / Oportunidades de Mejora
- Fixed FCR rate calculation (only FIRST_CALL counts as success)

Technical:
- Streamlit + Plotly for interactive visualizations
- Light theme configuration (.streamlit/config.toml)
- Fixed Plotly colorbar titlefont deprecation

Documentation:
- Updated PROJECT_CONTEXT.md, TODO.md, CHANGELOG.md
- Added 4 new technical decisions (TD-014 to TD-017)
- Created TROUBLESHOOTING.md with 10 common issues

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
sujucu70
2026-01-19 16:27:30 +01:00
commit 75e7b9da3d
110 changed files with 28247 additions and 0 deletions

349
src/aggregation/models.py Normal file
View File

@@ -0,0 +1,349 @@
"""
CXInsights - Aggregation Models
Data models for aggregated RCA analysis.
Transforms individual call analyses into actionable insights.
"""
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Literal
class ImpactLevel(str, Enum):
"""Impact level for RCA drivers."""
CRITICAL = "critical" # >10% of calls, high severity
HIGH = "high" # >5% of calls or high severity
MEDIUM = "medium" # >2% of calls
LOW = "low" # <2% of calls
class TrendDirection(str, Enum):
"""Trend direction for time-series analysis."""
INCREASING = "increasing"
STABLE = "stable"
DECREASING = "decreasing"
UNKNOWN = "unknown"
# Category types for v2.0
DriverCategory = Literal[
"lost_sales", "poor_cx", "fcr_failure", "churn_risk",
"agent_positive", "agent_improvement"
]
@dataclass
class DriverFrequency:
"""Frequency statistics for a single driver."""
driver_code: str
category: DriverCategory
# Counts
total_occurrences: int
calls_affected: int
total_calls_in_batch: int
# Rates
occurrence_rate: float # occurrences / total_calls
call_rate: float # calls_affected / total_calls
# Confidence stats
avg_confidence: float
min_confidence: float
max_confidence: float
# Co-occurrence
commonly_co_occurs_with: list[str] = field(default_factory=list)
def __post_init__(self):
"""Validate rates."""
if not 0 <= self.occurrence_rate <= 1:
raise ValueError(f"occurrence_rate must be 0-1, got {self.occurrence_rate}")
if not 0 <= self.call_rate <= 1:
raise ValueError(f"call_rate must be 0-1, got {self.call_rate}")
@dataclass
class DriverSeverity:
"""Severity scoring for a driver."""
driver_code: str
category: DriverCategory
# Base severity from taxonomy
base_severity: float # 0-1 from config
# Computed severity factors
frequency_factor: float # Higher frequency = higher impact
confidence_factor: float # Higher confidence = more reliable
co_occurrence_factor: float # Often with other issues = systemic
# Final score
severity_score: float # 0-100 scale
# Impact classification
impact_level: ImpactLevel
def __post_init__(self):
"""Validate severity score."""
if not 0 <= self.severity_score <= 100:
raise ValueError(f"severity_score must be 0-100, got {self.severity_score}")
@dataclass
class ConditionalProbability:
"""Conditional probability between drivers."""
driver_a: str # Given this driver...
driver_b: str # ...probability of this driver
category_a: DriverCategory
category_b: DriverCategory
# P(B|A) - probability of B given A
probability: float
support: int # Number of co-occurrences
# Lift: P(B|A) / P(B) - how much A increases likelihood of B
lift: float
def __post_init__(self):
"""Validate probability."""
if not 0 <= self.probability <= 1:
raise ValueError(f"probability must be 0-1, got {self.probability}")
@dataclass
class RCANode:
"""A node in the RCA tree."""
driver_code: str
category: DriverCategory
# Statistics
frequency: DriverFrequency
severity: DriverSeverity
# Hierarchy
parent_code: str | None = None
children: list["RCANode"] = field(default_factory=list)
# Actionability
recommended_actions: list[str] = field(default_factory=list)
priority_rank: int = 0
# Evidence summary
sample_evidence: list[str] = field(default_factory=list)
def to_dict(self) -> dict:
"""Convert to dictionary for serialization."""
return {
"driver_code": self.driver_code,
"category": self.category,
"frequency": {
"total_occurrences": self.frequency.total_occurrences,
"calls_affected": self.frequency.calls_affected,
"occurrence_rate": self.frequency.occurrence_rate,
"call_rate": self.frequency.call_rate,
},
"severity": {
"severity_score": self.severity.severity_score,
"impact_level": self.severity.impact_level.value,
},
"priority_rank": self.priority_rank,
"children": [c.to_dict() for c in self.children],
"sample_evidence": self.sample_evidence[:3],
}
@dataclass
class RCATree:
"""Complete RCA tree for a batch."""
batch_id: str
created_at: datetime = field(default_factory=datetime.utcnow)
# Root nodes (top-level drivers)
lost_sales_root: list[RCANode] = field(default_factory=list)
poor_cx_root: list[RCANode] = field(default_factory=list)
fcr_failure_root: list[RCANode] = field(default_factory=list) # v2.0
churn_risk_root: list[RCANode] = field(default_factory=list) # v2.0
# Summary stats
total_calls: int = 0
calls_with_lost_sales: int = 0
calls_with_poor_cx: int = 0
calls_with_both: int = 0
# FCR stats (v2.0)
calls_first_call: int = 0
calls_repeat_call: int = 0
repeat_call_rate: float = 0.0
# Churn stats (v2.0)
calls_at_risk: int = 0
churn_risk_rate: float = 0.0
# Agent stats (v2.0)
agents_good_performer: int = 0
agents_needs_improvement: int = 0
agents_mixed: int = 0
# Top drivers by impact
top_lost_sales_drivers: list[str] = field(default_factory=list)
top_poor_cx_drivers: list[str] = field(default_factory=list)
top_fcr_failure_drivers: list[str] = field(default_factory=list) # v2.0
top_churn_risk_drivers: list[str] = field(default_factory=list) # v2.0
# Cross-category patterns
conditional_probabilities: list[ConditionalProbability] = field(default_factory=list)
def get_driver_by_code(self, code: str) -> RCANode | None:
"""Find a driver node by code."""
all_roots = (
self.lost_sales_root + self.poor_cx_root +
self.fcr_failure_root + self.churn_risk_root
)
for node in all_roots:
if node.driver_code == code:
return node
for child in node.children:
if child.driver_code == code:
return child
return None
def to_dict(self) -> dict:
"""Convert to dictionary for serialization."""
return {
"batch_id": self.batch_id,
"created_at": self.created_at.isoformat(),
"summary": {
"total_calls": self.total_calls,
"calls_with_lost_sales": self.calls_with_lost_sales,
"calls_with_poor_cx": self.calls_with_poor_cx,
"calls_with_both": self.calls_with_both,
"lost_sales_rate": self.calls_with_lost_sales / self.total_calls if self.total_calls > 0 else 0,
"poor_cx_rate": self.calls_with_poor_cx / self.total_calls if self.total_calls > 0 else 0,
# v2.0 stats
"calls_first_call": self.calls_first_call,
"calls_repeat_call": self.calls_repeat_call,
"repeat_call_rate": self.repeat_call_rate,
"calls_at_risk": self.calls_at_risk,
"churn_risk_rate": self.churn_risk_rate,
"agents_good_performer": self.agents_good_performer,
"agents_needs_improvement": self.agents_needs_improvement,
},
"top_drivers": {
"lost_sales": self.top_lost_sales_drivers[:5],
"poor_cx": self.top_poor_cx_drivers[:5],
"fcr_failure": self.top_fcr_failure_drivers[:5],
"churn_risk": self.top_churn_risk_drivers[:5],
},
"lost_sales_tree": [n.to_dict() for n in self.lost_sales_root],
"poor_cx_tree": [n.to_dict() for n in self.poor_cx_root],
"fcr_failure_tree": [n.to_dict() for n in self.fcr_failure_root],
"churn_risk_tree": [n.to_dict() for n in self.churn_risk_root],
}
@dataclass
class BatchAggregation:
"""Complete aggregation results for a batch."""
batch_id: str
created_at: datetime = field(default_factory=datetime.utcnow)
# Input stats
total_calls_processed: int = 0
successful_analyses: int = 0
failed_analyses: int = 0
# Driver frequencies
lost_sales_frequencies: list[DriverFrequency] = field(default_factory=list)
poor_cx_frequencies: list[DriverFrequency] = field(default_factory=list)
fcr_failure_frequencies: list[DriverFrequency] = field(default_factory=list) # v2.0
churn_risk_frequencies: list[DriverFrequency] = field(default_factory=list) # v2.0
agent_positive_frequencies: list[DriverFrequency] = field(default_factory=list) # v2.0
agent_improvement_frequencies: list[DriverFrequency] = field(default_factory=list) # v2.0
# Severity scores
lost_sales_severities: list[DriverSeverity] = field(default_factory=list)
poor_cx_severities: list[DriverSeverity] = field(default_factory=list)
fcr_failure_severities: list[DriverSeverity] = field(default_factory=list) # v2.0
churn_risk_severities: list[DriverSeverity] = field(default_factory=list) # v2.0
# RCA Tree
rca_tree: RCATree | None = None
# Emergent patterns (OTHER_EMERGENT analysis)
emergent_patterns: list[dict] = field(default_factory=list)
# v2.0 aggregate stats
fcr_stats: dict = field(default_factory=dict)
churn_stats: dict = field(default_factory=dict)
agent_stats: dict = field(default_factory=dict)
def get_top_drivers(
self,
category: DriverCategory,
n: int = 5,
by: Literal["frequency", "severity"] = "severity",
) -> list[str]:
"""Get top N drivers by frequency or severity."""
freq_map = {
"lost_sales": self.lost_sales_frequencies,
"poor_cx": self.poor_cx_frequencies,
"fcr_failure": self.fcr_failure_frequencies,
"churn_risk": self.churn_risk_frequencies,
"agent_positive": self.agent_positive_frequencies,
"agent_improvement": self.agent_improvement_frequencies,
}
sev_map = {
"lost_sales": self.lost_sales_severities,
"poor_cx": self.poor_cx_severities,
"fcr_failure": self.fcr_failure_severities,
"churn_risk": self.churn_risk_severities,
}
if by == "frequency":
items = sorted(
freq_map.get(category, []),
key=lambda x: x.occurrence_rate,
reverse=True,
)
else:
items = sorted(
sev_map.get(category, []),
key=lambda x: x.severity_score,
reverse=True,
)
return [item.driver_code for item in items[:n]]
@dataclass
class AggregationConfig:
"""Configuration for aggregation."""
# Severity weights
frequency_weight: float = 0.4
confidence_weight: float = 0.3
co_occurrence_weight: float = 0.3
# Impact thresholds
critical_threshold: float = 0.10 # >10% of calls
high_threshold: float = 0.05 # >5% of calls
medium_threshold: float = 0.02 # >2% of calls
# Minimum support for conditional probabilities
min_support: int = 5
# Top N for reports
top_n_drivers: int = 10
# Include emergent patterns
include_emergent: bool = True