""" CXInsights - Aggregation Models Data models for aggregated RCA analysis. Transforms individual call analyses into actionable insights. """ from dataclasses import dataclass, field from datetime import datetime from enum import Enum from typing import Literal class ImpactLevel(str, Enum): """Impact level for RCA drivers.""" CRITICAL = "critical" # >10% of calls, high severity HIGH = "high" # >5% of calls or high severity MEDIUM = "medium" # >2% of calls LOW = "low" # <2% of calls class TrendDirection(str, Enum): """Trend direction for time-series analysis.""" INCREASING = "increasing" STABLE = "stable" DECREASING = "decreasing" UNKNOWN = "unknown" # Category types for v2.0 DriverCategory = Literal[ "lost_sales", "poor_cx", "fcr_failure", "churn_risk", "agent_positive", "agent_improvement" ] @dataclass class DriverFrequency: """Frequency statistics for a single driver.""" driver_code: str category: DriverCategory # Counts total_occurrences: int calls_affected: int total_calls_in_batch: int # Rates occurrence_rate: float # occurrences / total_calls call_rate: float # calls_affected / total_calls # Confidence stats avg_confidence: float min_confidence: float max_confidence: float # Co-occurrence commonly_co_occurs_with: list[str] = field(default_factory=list) def __post_init__(self): """Validate rates.""" if not 0 <= self.occurrence_rate <= 1: raise ValueError(f"occurrence_rate must be 0-1, got {self.occurrence_rate}") if not 0 <= self.call_rate <= 1: raise ValueError(f"call_rate must be 0-1, got {self.call_rate}") @dataclass class DriverSeverity: """Severity scoring for a driver.""" driver_code: str category: DriverCategory # Base severity from taxonomy base_severity: float # 0-1 from config # Computed severity factors frequency_factor: float # Higher frequency = higher impact confidence_factor: float # Higher confidence = more reliable co_occurrence_factor: float # Often with other issues = systemic # Final score severity_score: float # 0-100 scale # Impact classification impact_level: ImpactLevel def __post_init__(self): """Validate severity score.""" if not 0 <= self.severity_score <= 100: raise ValueError(f"severity_score must be 0-100, got {self.severity_score}") @dataclass class ConditionalProbability: """Conditional probability between drivers.""" driver_a: str # Given this driver... driver_b: str # ...probability of this driver category_a: DriverCategory category_b: DriverCategory # P(B|A) - probability of B given A probability: float support: int # Number of co-occurrences # Lift: P(B|A) / P(B) - how much A increases likelihood of B lift: float def __post_init__(self): """Validate probability.""" if not 0 <= self.probability <= 1: raise ValueError(f"probability must be 0-1, got {self.probability}") @dataclass class RCANode: """A node in the RCA tree.""" driver_code: str category: DriverCategory # Statistics frequency: DriverFrequency severity: DriverSeverity # Hierarchy parent_code: str | None = None children: list["RCANode"] = field(default_factory=list) # Actionability recommended_actions: list[str] = field(default_factory=list) priority_rank: int = 0 # Evidence summary sample_evidence: list[str] = field(default_factory=list) def to_dict(self) -> dict: """Convert to dictionary for serialization.""" return { "driver_code": self.driver_code, "category": self.category, "frequency": { "total_occurrences": self.frequency.total_occurrences, "calls_affected": self.frequency.calls_affected, "occurrence_rate": self.frequency.occurrence_rate, "call_rate": self.frequency.call_rate, }, "severity": { "severity_score": self.severity.severity_score, "impact_level": self.severity.impact_level.value, }, "priority_rank": self.priority_rank, "children": [c.to_dict() for c in self.children], "sample_evidence": self.sample_evidence[:3], } @dataclass class RCATree: """Complete RCA tree for a batch.""" batch_id: str created_at: datetime = field(default_factory=datetime.utcnow) # Root nodes (top-level drivers) lost_sales_root: list[RCANode] = field(default_factory=list) poor_cx_root: list[RCANode] = field(default_factory=list) fcr_failure_root: list[RCANode] = field(default_factory=list) # v2.0 churn_risk_root: list[RCANode] = field(default_factory=list) # v2.0 # Summary stats total_calls: int = 0 calls_with_lost_sales: int = 0 calls_with_poor_cx: int = 0 calls_with_both: int = 0 # FCR stats (v2.0) calls_first_call: int = 0 calls_repeat_call: int = 0 repeat_call_rate: float = 0.0 # Churn stats (v2.0) calls_at_risk: int = 0 churn_risk_rate: float = 0.0 # Agent stats (v2.0) agents_good_performer: int = 0 agents_needs_improvement: int = 0 agents_mixed: int = 0 # Top drivers by impact top_lost_sales_drivers: list[str] = field(default_factory=list) top_poor_cx_drivers: list[str] = field(default_factory=list) top_fcr_failure_drivers: list[str] = field(default_factory=list) # v2.0 top_churn_risk_drivers: list[str] = field(default_factory=list) # v2.0 # Cross-category patterns conditional_probabilities: list[ConditionalProbability] = field(default_factory=list) def get_driver_by_code(self, code: str) -> RCANode | None: """Find a driver node by code.""" all_roots = ( self.lost_sales_root + self.poor_cx_root + self.fcr_failure_root + self.churn_risk_root ) for node in all_roots: if node.driver_code == code: return node for child in node.children: if child.driver_code == code: return child return None def to_dict(self) -> dict: """Convert to dictionary for serialization.""" return { "batch_id": self.batch_id, "created_at": self.created_at.isoformat(), "summary": { "total_calls": self.total_calls, "calls_with_lost_sales": self.calls_with_lost_sales, "calls_with_poor_cx": self.calls_with_poor_cx, "calls_with_both": self.calls_with_both, "lost_sales_rate": self.calls_with_lost_sales / self.total_calls if self.total_calls > 0 else 0, "poor_cx_rate": self.calls_with_poor_cx / self.total_calls if self.total_calls > 0 else 0, # v2.0 stats "calls_first_call": self.calls_first_call, "calls_repeat_call": self.calls_repeat_call, "repeat_call_rate": self.repeat_call_rate, "calls_at_risk": self.calls_at_risk, "churn_risk_rate": self.churn_risk_rate, "agents_good_performer": self.agents_good_performer, "agents_needs_improvement": self.agents_needs_improvement, }, "top_drivers": { "lost_sales": self.top_lost_sales_drivers[:5], "poor_cx": self.top_poor_cx_drivers[:5], "fcr_failure": self.top_fcr_failure_drivers[:5], "churn_risk": self.top_churn_risk_drivers[:5], }, "lost_sales_tree": [n.to_dict() for n in self.lost_sales_root], "poor_cx_tree": [n.to_dict() for n in self.poor_cx_root], "fcr_failure_tree": [n.to_dict() for n in self.fcr_failure_root], "churn_risk_tree": [n.to_dict() for n in self.churn_risk_root], } @dataclass class BatchAggregation: """Complete aggregation results for a batch.""" batch_id: str created_at: datetime = field(default_factory=datetime.utcnow) # Input stats total_calls_processed: int = 0 successful_analyses: int = 0 failed_analyses: int = 0 # Driver frequencies lost_sales_frequencies: list[DriverFrequency] = field(default_factory=list) poor_cx_frequencies: list[DriverFrequency] = field(default_factory=list) fcr_failure_frequencies: list[DriverFrequency] = field(default_factory=list) # v2.0 churn_risk_frequencies: list[DriverFrequency] = field(default_factory=list) # v2.0 agent_positive_frequencies: list[DriverFrequency] = field(default_factory=list) # v2.0 agent_improvement_frequencies: list[DriverFrequency] = field(default_factory=list) # v2.0 # Severity scores lost_sales_severities: list[DriverSeverity] = field(default_factory=list) poor_cx_severities: list[DriverSeverity] = field(default_factory=list) fcr_failure_severities: list[DriverSeverity] = field(default_factory=list) # v2.0 churn_risk_severities: list[DriverSeverity] = field(default_factory=list) # v2.0 # RCA Tree rca_tree: RCATree | None = None # Emergent patterns (OTHER_EMERGENT analysis) emergent_patterns: list[dict] = field(default_factory=list) # v2.0 aggregate stats fcr_stats: dict = field(default_factory=dict) churn_stats: dict = field(default_factory=dict) agent_stats: dict = field(default_factory=dict) def get_top_drivers( self, category: DriverCategory, n: int = 5, by: Literal["frequency", "severity"] = "severity", ) -> list[str]: """Get top N drivers by frequency or severity.""" freq_map = { "lost_sales": self.lost_sales_frequencies, "poor_cx": self.poor_cx_frequencies, "fcr_failure": self.fcr_failure_frequencies, "churn_risk": self.churn_risk_frequencies, "agent_positive": self.agent_positive_frequencies, "agent_improvement": self.agent_improvement_frequencies, } sev_map = { "lost_sales": self.lost_sales_severities, "poor_cx": self.poor_cx_severities, "fcr_failure": self.fcr_failure_severities, "churn_risk": self.churn_risk_severities, } if by == "frequency": items = sorted( freq_map.get(category, []), key=lambda x: x.occurrence_rate, reverse=True, ) else: items = sorted( sev_map.get(category, []), key=lambda x: x.severity_score, reverse=True, ) return [item.driver_code for item in items[:n]] @dataclass class AggregationConfig: """Configuration for aggregation.""" # Severity weights frequency_weight: float = 0.4 confidence_weight: float = 0.3 co_occurrence_weight: float = 0.3 # Impact thresholds critical_threshold: float = 0.10 # >10% of calls high_threshold: float = 0.05 # >5% of calls medium_threshold: float = 0.02 # >2% of calls # Minimum support for conditional probabilities min_support: int = 5 # Top N for reports top_n_drivers: int = 10 # Include emergent patterns include_emergent: bool = True