Translate Phase 2 medium-priority files (frontend utils + backend dimensions)
Phase 2 of Spanish-to-English translation for medium-priority files: Frontend utils (2 files): - dataTransformation.ts: Translated ~72 occurrences (comments, docs, console logs) - segmentClassifier.ts: Translated ~20 occurrences (JSDoc, inline comments, UI strings) Backend dimensions (3 files): - OperationalPerformance.py: Translated ~117 lines (docstrings, comments) - SatisfactionExperience.py: Translated ~33 lines (docstrings, comments) - EconomyCost.py: Translated ~79 lines (docstrings, comments) All function names and variable names preserved for API compatibility. Frontend and backend compilation tested and verified successful. Related to TRANSLATION_STATUS.md Phase 2 objectives. https://claude.ai/code/session_01GNbnkFoESkRcnPr3bLCYDg
This commit is contained in:
@@ -23,17 +23,16 @@ REQUIRED_COLUMNS_ECON: List[str] = [
|
||||
@dataclass
|
||||
class EconomyConfig:
|
||||
"""
|
||||
Parámetros manuales para la dimensión de Economía y Costes.
|
||||
Manual parameters for the Economy and Cost dimension.
|
||||
|
||||
- labor_cost_per_hour: coste total/hora de un agente (fully loaded).
|
||||
- overhead_rate: % overhead variable (ej. 0.1 = 10% sobre labor).
|
||||
- tech_costs_annual: coste anual de tecnología (licencias, infra, ...).
|
||||
- automation_cpi: coste por interacción automatizada (ej. 0.15€).
|
||||
- automation_volume_share: % del volumen automatizable (0-1).
|
||||
- automation_success_rate: % éxito de la automatización (0-1).
|
||||
- labor_cost_per_hour: total cost/hour of an agent (fully loaded).
|
||||
- overhead_rate: % variable overhead (e.g. 0.1 = 10% over labor).
|
||||
- tech_costs_annual: annual technology cost (licenses, infrastructure, ...).
|
||||
- automation_cpi: cost per automated interaction (e.g. 0.15€).
|
||||
- automation_volume_share: % of automatable volume (0-1).
|
||||
- automation_success_rate: % automation success (0-1).
|
||||
|
||||
- customer_segments: mapping opcional skill -> segmento ("high"/"medium"/"low")
|
||||
para futuros insights de ROI por segmento.
|
||||
- customer_segments: optional mapping skill -> segment ("high"/"medium"/"low") for future ROI insights by segment.
|
||||
"""
|
||||
|
||||
labor_cost_per_hour: float
|
||||
@@ -48,20 +47,20 @@ class EconomyConfig:
|
||||
@dataclass
|
||||
class EconomyCostMetrics:
|
||||
"""
|
||||
DIMENSIÓN 4: ECONOMÍA y COSTES
|
||||
DIMENSION 4: ECONOMY and COSTS
|
||||
|
||||
Propósito:
|
||||
- Cuantificar el COSTE actual (CPI, coste anual).
|
||||
- Estimar el impacto de overhead y tecnología.
|
||||
- Calcular un primer estimado de "coste de ineficiencia" y ahorro potencial.
|
||||
Purpose:
|
||||
- Quantify the current COST (CPI, annual cost).
|
||||
- Estimate the impact of overhead and technology.
|
||||
- Calculate an initial estimate of "inefficiency cost" and potential savings.
|
||||
|
||||
Requiere:
|
||||
- Columnas del dataset transaccional (ver REQUIRED_COLUMNS_ECON).
|
||||
Requires:
|
||||
- Columns from the transactional dataset (see REQUIRED_COLUMNS_ECON).
|
||||
|
||||
Inputs opcionales vía EconomyConfig:
|
||||
- labor_cost_per_hour (obligatorio para cualquier cálculo de €).
|
||||
Optional inputs via EconomyConfig:
|
||||
- labor_cost_per_hour (required for any € calculation).
|
||||
- overhead_rate, tech_costs_annual, automation_*.
|
||||
- customer_segments (para insights de ROI por segmento).
|
||||
- customer_segments (for ROI insights by segment).
|
||||
"""
|
||||
|
||||
df: pd.DataFrame
|
||||
@@ -72,13 +71,13 @@ class EconomyCostMetrics:
|
||||
self._prepare_data()
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Helpers internos
|
||||
# Internal helpers
|
||||
# ------------------------------------------------------------------ #
|
||||
def _validate_columns(self) -> None:
|
||||
missing = [c for c in REQUIRED_COLUMNS_ECON if c not in self.df.columns]
|
||||
if missing:
|
||||
raise ValueError(
|
||||
f"Faltan columnas obligatorias para EconomyCostMetrics: {missing}"
|
||||
f"Missing required columns for EconomyCostMetrics: {missing}"
|
||||
)
|
||||
|
||||
def _prepare_data(self) -> None:
|
||||
@@ -97,15 +96,15 @@ class EconomyCostMetrics:
|
||||
df["duration_talk"].fillna(0)
|
||||
+ df["hold_time"].fillna(0)
|
||||
+ df["wrap_up_time"].fillna(0)
|
||||
) # segundos
|
||||
) # seconds
|
||||
|
||||
# Filtrar por record_status para cálculos de AHT/CPI
|
||||
# Solo incluir registros VALID (excluir NOISE, ZOMBIE, ABANDON)
|
||||
# Filter by record_status for AHT/CPI calculations
|
||||
# Only include VALID records (exclude NOISE, ZOMBIE, ABANDON)
|
||||
if "record_status" in df.columns:
|
||||
df["record_status"] = df["record_status"].astype(str).str.strip().str.upper()
|
||||
df["_is_valid_for_cost"] = df["record_status"] == "VALID"
|
||||
else:
|
||||
# Legacy data sin record_status: incluir todo
|
||||
# Legacy data without record_status: include all
|
||||
df["_is_valid_for_cost"] = True
|
||||
|
||||
self.df = df
|
||||
@@ -118,11 +117,11 @@ class EconomyCostMetrics:
|
||||
return self.config is not None and self.config.labor_cost_per_hour is not None
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# KPI 1: CPI por canal/skill
|
||||
# KPI 1: CPI by channel/skill
|
||||
# ------------------------------------------------------------------ #
|
||||
def cpi_by_skill_channel(self) -> pd.DataFrame:
|
||||
"""
|
||||
CPI (Coste Por Interacción) por skill/canal.
|
||||
CPI (Cost Per Interaction) by skill/channel.
|
||||
|
||||
CPI = (Labor_cost_per_interaction + Overhead_variable) / EFFECTIVE_PRODUCTIVITY
|
||||
|
||||
@@ -130,19 +129,17 @@ class EconomyCostMetrics:
|
||||
- Overhead_variable = overhead_rate * Labor_cost_per_interaction
|
||||
- EFFECTIVE_PRODUCTIVITY = 0.70 (70% - accounts for non-productive time)
|
||||
|
||||
Excluye registros abandonados del cálculo de costes para consistencia
|
||||
con el path del frontend (fresh CSV).
|
||||
Excludes abandoned records from cost calculation for consistency with the frontend path (fresh CSV).
|
||||
|
||||
Si no hay config de costes -> devuelve DataFrame vacío.
|
||||
If there is no cost config -> returns empty DataFrame.
|
||||
|
||||
Incluye queue_skill y channel como columnas (no solo índice) para que
|
||||
el frontend pueda hacer lookup por nombre de skill.
|
||||
Includes queue_skill and channel as columns (not just index) so that the frontend can lookup by skill name.
|
||||
"""
|
||||
if not self._has_cost_config():
|
||||
return pd.DataFrame()
|
||||
|
||||
cfg = self.config
|
||||
assert cfg is not None # para el type checker
|
||||
assert cfg is not None # for the type checker
|
||||
|
||||
df = self.df.copy()
|
||||
if df.empty:
|
||||
@@ -154,15 +151,15 @@ class EconomyCostMetrics:
|
||||
else:
|
||||
df_cost = df
|
||||
|
||||
# Filtrar por record_status: solo VALID para cálculo de AHT
|
||||
# Excluye NOISE, ZOMBIE, ABANDON
|
||||
# Filter by record_status: only VALID for AHT calculation
|
||||
# Excludes NOISE, ZOMBIE, ABANDON
|
||||
if "_is_valid_for_cost" in df_cost.columns:
|
||||
df_cost = df_cost[df_cost["_is_valid_for_cost"] == True]
|
||||
|
||||
if df_cost.empty:
|
||||
return pd.DataFrame()
|
||||
|
||||
# AHT por skill/canal (en segundos) - solo registros VALID
|
||||
# AHT by skill/channel (in seconds) - only VALID records
|
||||
grouped = df_cost.groupby(["queue_skill", "channel"])["handle_time"].mean()
|
||||
|
||||
if grouped.empty:
|
||||
@@ -193,17 +190,16 @@ class EconomyCostMetrics:
|
||||
return out.sort_index().reset_index()
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# KPI 2: coste anual por skill/canal
|
||||
# KPI 2: annual cost by skill/channel
|
||||
# ------------------------------------------------------------------ #
|
||||
def annual_cost_by_skill_channel(self) -> pd.DataFrame:
|
||||
"""
|
||||
Coste anual por skill/canal.
|
||||
Annual cost by skill/channel.
|
||||
|
||||
cost_annual = CPI * volumen (cantidad de interacciones de la muestra).
|
||||
cost_annual = CPI * volume (number of interactions in the sample).
|
||||
|
||||
Nota: por simplicidad asumimos que el dataset refleja un periodo anual.
|
||||
Si en el futuro quieres anualizar (ej. dataset = 1 mes) se puede añadir
|
||||
un factor de escalado en EconomyConfig.
|
||||
Note: for simplicity we assume the dataset reflects an annual period.
|
||||
If in the future you want to annualize (e.g. dataset = 1 month) you can add a scaling factor in EconomyConfig.
|
||||
"""
|
||||
cpi_table = self.cpi_by_skill_channel()
|
||||
if cpi_table.empty:
|
||||
@@ -224,18 +220,18 @@ class EconomyCostMetrics:
|
||||
return joined
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# KPI 3: desglose de costes (labor / tech / overhead)
|
||||
# KPI 3: cost breakdown (labor / tech / overhead)
|
||||
# ------------------------------------------------------------------ #
|
||||
def cost_breakdown(self) -> Dict[str, float]:
|
||||
"""
|
||||
Desglose % de costes: labor, overhead, tech.
|
||||
Cost breakdown %: labor, overhead, tech.
|
||||
|
||||
labor_total = sum(labor_cost_per_interaction)
|
||||
overhead_total = labor_total * overhead_rate
|
||||
tech_total = tech_costs_annual (si se ha proporcionado)
|
||||
tech_total = tech_costs_annual (if provided)
|
||||
|
||||
Devuelve porcentajes sobre el total.
|
||||
Si falta configuración de coste -> devuelve {}.
|
||||
Returns percentages of the total.
|
||||
If cost configuration is missing -> returns {}.
|
||||
"""
|
||||
if not self._has_cost_config():
|
||||
return {}
|
||||
@@ -258,7 +254,7 @@ class EconomyCostMetrics:
|
||||
cpi_indexed = cpi_table.set_index(["queue_skill", "channel"])
|
||||
joined = cpi_indexed.join(volume, how="left").fillna({"volume": 0})
|
||||
|
||||
# Costes anuales de labor y overhead
|
||||
# Annual labor and overhead costs
|
||||
annual_labor = (joined["labor_cost"] * joined["volume"]).sum()
|
||||
annual_overhead = (joined["overhead_cost"] * joined["volume"]).sum()
|
||||
annual_tech = cfg.tech_costs_annual
|
||||
@@ -278,21 +274,21 @@ class EconomyCostMetrics:
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# KPI 4: coste de ineficiencia (€ por variabilidad/escalación)
|
||||
# KPI 4: inefficiency cost (€ by variability/escalation)
|
||||
# ------------------------------------------------------------------ #
|
||||
def inefficiency_cost_by_skill_channel(self) -> pd.DataFrame:
|
||||
"""
|
||||
Estimación muy simplificada de coste de ineficiencia:
|
||||
Very simplified estimate of inefficiency cost:
|
||||
|
||||
Para cada skill/canal:
|
||||
For each skill/channel:
|
||||
|
||||
- AHT_p50, AHT_p90 (segundos).
|
||||
- AHT_p50, AHT_p90 (seconds).
|
||||
- Delta = max(0, AHT_p90 - AHT_p50).
|
||||
- Se asume que ~40% de las interacciones están por encima de la mediana.
|
||||
- Assumes that ~40% of interactions are above the median.
|
||||
- Ineff_seconds = Delta * volume * 0.4
|
||||
- Ineff_cost = LaborCPI_per_second * Ineff_seconds
|
||||
|
||||
NOTA: Es un modelo aproximado para cuantificar "orden de magnitud".
|
||||
NOTE: This is an approximate model to quantify "order of magnitude".
|
||||
"""
|
||||
if not self._has_cost_config():
|
||||
return pd.DataFrame()
|
||||
@@ -302,8 +298,8 @@ class EconomyCostMetrics:
|
||||
|
||||
df = self.df.copy()
|
||||
|
||||
# Filtrar por record_status: solo VALID para cálculo de AHT
|
||||
# Excluye NOISE, ZOMBIE, ABANDON
|
||||
# Filter by record_status: only VALID for AHT calculation
|
||||
# Excludes NOISE, ZOMBIE, ABANDON
|
||||
if "_is_valid_for_cost" in df.columns:
|
||||
df = df[df["_is_valid_for_cost"] == True]
|
||||
|
||||
@@ -318,7 +314,7 @@ class EconomyCostMetrics:
|
||||
if stats.empty:
|
||||
return pd.DataFrame()
|
||||
|
||||
# CPI para obtener coste/segundo de labor
|
||||
# CPI to get cost/second of labor
|
||||
# cpi_by_skill_channel now returns with reset_index, so we need to set index for join
|
||||
cpi_table_raw = self.cpi_by_skill_channel()
|
||||
if cpi_table_raw.empty:
|
||||
@@ -331,11 +327,11 @@ class EconomyCostMetrics:
|
||||
merged = merged.fillna(0.0)
|
||||
|
||||
delta = (merged["aht_p90"] - merged["aht_p50"]).clip(lower=0.0)
|
||||
affected_fraction = 0.4 # aproximación
|
||||
affected_fraction = 0.4 # approximation
|
||||
ineff_seconds = delta * merged["volume"] * affected_fraction
|
||||
|
||||
# labor_cost = coste por interacción con AHT medio;
|
||||
# aproximamos coste/segundo como labor_cost / AHT_medio
|
||||
# labor_cost = cost per interaction with average AHT;
|
||||
# approximate cost/second as labor_cost / average_AHT
|
||||
aht_mean = grouped["handle_time"].mean()
|
||||
merged["aht_mean"] = aht_mean
|
||||
|
||||
@@ -351,21 +347,21 @@ class EconomyCostMetrics:
|
||||
return merged[["aht_p50", "aht_p90", "volume", "ineff_seconds", "ineff_cost"]].reset_index()
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# KPI 5: ahorro potencial anual por automatización
|
||||
# KPI 5: potential annual savings from automation
|
||||
# ------------------------------------------------------------------ #
|
||||
def potential_savings(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Ahorro potencial anual basado en:
|
||||
Potential annual savings based on:
|
||||
|
||||
Ahorro = (CPI_humano - CPI_automatizado) * Volumen_automatizable * Tasa_éxito
|
||||
Savings = (Human_CPI - Automated_CPI) * Automatable_volume * Success_rate
|
||||
|
||||
Donde:
|
||||
- CPI_humano = media ponderada de cpi_total.
|
||||
- CPI_automatizado = config.automation_cpi
|
||||
- Volumen_automatizable = volume_total * automation_volume_share
|
||||
- Tasa_éxito = automation_success_rate
|
||||
Where:
|
||||
- Human_CPI = weighted average of cpi_total.
|
||||
- Automated_CPI = config.automation_cpi
|
||||
- Automatable_volume = volume_total * automation_volume_share
|
||||
- Success_rate = automation_success_rate
|
||||
|
||||
Si faltan parámetros en config -> devuelve {}.
|
||||
If config parameters are missing -> returns {}.
|
||||
"""
|
||||
if not self._has_cost_config():
|
||||
return {}
|
||||
@@ -384,7 +380,7 @@ class EconomyCostMetrics:
|
||||
if total_volume <= 0:
|
||||
return {}
|
||||
|
||||
# CPI humano medio ponderado
|
||||
# Weighted average human CPI
|
||||
weighted_cpi = (
|
||||
(cpi_table["cpi_total"] * cpi_table["volume"]).sum() / total_volume
|
||||
)
|
||||
@@ -409,12 +405,12 @@ class EconomyCostMetrics:
|
||||
# ------------------------------------------------------------------ #
|
||||
def plot_cost_waterfall(self) -> Axes:
|
||||
"""
|
||||
Waterfall de costes anuales (labor + tech + overhead).
|
||||
Waterfall of annual costs (labor + tech + overhead).
|
||||
"""
|
||||
breakdown = self.cost_breakdown()
|
||||
if not breakdown:
|
||||
fig, ax = plt.subplots()
|
||||
ax.text(0.5, 0.5, "Sin configuración de costes", ha="center", va="center")
|
||||
ax.text(0.5, 0.5, "No cost configuration", ha="center", va="center")
|
||||
ax.set_axis_off()
|
||||
return ax
|
||||
|
||||
@@ -436,14 +432,14 @@ class EconomyCostMetrics:
|
||||
bottoms.append(running)
|
||||
running += v
|
||||
|
||||
# barras estilo waterfall
|
||||
# waterfall style bars
|
||||
x = np.arange(len(labels))
|
||||
ax.bar(x, values)
|
||||
|
||||
ax.set_xticks(x)
|
||||
ax.set_xticklabels(labels)
|
||||
ax.set_ylabel("€ anuales")
|
||||
ax.set_title("Desglose anual de costes")
|
||||
ax.set_ylabel("€ annual")
|
||||
ax.set_title("Annual cost breakdown")
|
||||
|
||||
for idx, v in enumerate(values):
|
||||
ax.text(idx, v, f"{v:,.0f}", ha="center", va="bottom")
|
||||
@@ -454,12 +450,12 @@ class EconomyCostMetrics:
|
||||
|
||||
def plot_cpi_by_channel(self) -> Axes:
|
||||
"""
|
||||
Gráfico de barras de CPI medio por canal.
|
||||
Bar chart of average CPI by channel.
|
||||
"""
|
||||
cpi_table = self.cpi_by_skill_channel()
|
||||
if cpi_table.empty:
|
||||
fig, ax = plt.subplots()
|
||||
ax.text(0.5, 0.5, "Sin configuración de costes", ha="center", va="center")
|
||||
ax.text(0.5, 0.5, "No cost configuration", ha="center", va="center")
|
||||
ax.set_axis_off()
|
||||
return ax
|
||||
|
||||
@@ -474,7 +470,7 @@ class EconomyCostMetrics:
|
||||
cpi_indexed = cpi_table.set_index(["queue_skill", "channel"])
|
||||
joined = cpi_indexed.join(volume, how="left").fillna({"volume": 0})
|
||||
|
||||
# CPI medio ponderado por canal
|
||||
# Weighted average CPI by channel
|
||||
per_channel = (
|
||||
joined.reset_index()
|
||||
.groupby("channel")
|
||||
@@ -486,9 +482,9 @@ class EconomyCostMetrics:
|
||||
fig, ax = plt.subplots(figsize=(6, 4))
|
||||
per_channel.plot(kind="bar", ax=ax)
|
||||
|
||||
ax.set_xlabel("Canal")
|
||||
ax.set_ylabel("CPI medio (€)")
|
||||
ax.set_title("Coste por interacción (CPI) por canal")
|
||||
ax.set_xlabel("Channel")
|
||||
ax.set_ylabel("Average CPI (€)")
|
||||
ax.set_title("Cost per interaction (CPI) by channel")
|
||||
ax.grid(axis="y", alpha=0.3)
|
||||
|
||||
return ax
|
||||
|
||||
@@ -25,32 +25,31 @@ REQUIRED_COLUMNS_OP: List[str] = [
|
||||
@dataclass
|
||||
class OperationalPerformanceMetrics:
|
||||
"""
|
||||
Dimensión: RENDIMIENTO OPERACIONAL Y DE SERVICIO
|
||||
Dimension: OPERATIONAL PERFORMANCE AND SERVICE
|
||||
|
||||
Propósito: medir el balance entre rapidez (eficiencia) y calidad de resolución,
|
||||
más la variabilidad del servicio.
|
||||
Purpose: measure the balance between speed (efficiency) and resolution quality, plus service variability.
|
||||
|
||||
Requiere como mínimo:
|
||||
Requires at minimum:
|
||||
- interaction_id
|
||||
- datetime_start
|
||||
- queue_skill
|
||||
- channel
|
||||
- duration_talk (segundos)
|
||||
- hold_time (segundos)
|
||||
- wrap_up_time (segundos)
|
||||
- duration_talk (seconds)
|
||||
- hold_time (seconds)
|
||||
- wrap_up_time (seconds)
|
||||
- agent_id
|
||||
- transfer_flag (bool/int)
|
||||
|
||||
Columnas opcionales:
|
||||
- is_resolved (bool/int) -> para FCR
|
||||
- abandoned_flag (bool/int) -> para tasa de abandono
|
||||
- customer_id / caller_id -> para reincidencia y repetición de canal
|
||||
- logged_time (segundos) -> para occupancy_rate
|
||||
Optional columns:
|
||||
- is_resolved (bool/int) -> for FCR
|
||||
- abandoned_flag (bool/int) -> for abandonment rate
|
||||
- customer_id / caller_id -> for recurrence and channel repetition
|
||||
- logged_time (seconds) -> for occupancy_rate
|
||||
"""
|
||||
|
||||
df: pd.DataFrame
|
||||
|
||||
# Benchmarks / parámetros de normalización (puedes ajustarlos)
|
||||
# Benchmarks / normalization parameters (you can adjust them)
|
||||
AHT_GOOD: float = 300.0 # 5 min
|
||||
AHT_BAD: float = 900.0 # 15 min
|
||||
VAR_RATIO_GOOD: float = 1.2 # P90/P50 ~1.2 muy estable
|
||||
@@ -61,19 +60,19 @@ class OperationalPerformanceMetrics:
|
||||
self._prepare_data()
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Helpers internos
|
||||
# Internal helpers
|
||||
# ------------------------------------------------------------------ #
|
||||
def _validate_columns(self) -> None:
|
||||
missing = [c for c in REQUIRED_COLUMNS_OP if c not in self.df.columns]
|
||||
if missing:
|
||||
raise ValueError(
|
||||
f"Faltan columnas obligatorias para OperationalPerformanceMetrics: {missing}"
|
||||
f"Missing required columns for OperationalPerformanceMetrics: {missing}"
|
||||
)
|
||||
|
||||
def _prepare_data(self) -> None:
|
||||
df = self.df.copy()
|
||||
|
||||
# Tipos
|
||||
# Types
|
||||
df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
|
||||
|
||||
for col in ["duration_talk", "hold_time", "wrap_up_time"]:
|
||||
@@ -86,13 +85,13 @@ class OperationalPerformanceMetrics:
|
||||
+ df["wrap_up_time"].fillna(0)
|
||||
)
|
||||
|
||||
# v3.0: Filtrar NOISE y ZOMBIE para cálculos de variabilidad
|
||||
# v3.0: Filter NOISE and ZOMBIE for variability calculations
|
||||
# record_status: 'VALID', 'NOISE', 'ZOMBIE', 'ABANDON'
|
||||
# Para AHT/CV solo usamos 'VALID' (excluye noise, zombie, abandon)
|
||||
# For AHT/CV we only use 'VALID' (excludes noise, zombie, abandon)
|
||||
if "record_status" in df.columns:
|
||||
df["record_status"] = df["record_status"].astype(str).str.strip().str.upper()
|
||||
# Crear máscara para registros válidos: SOLO "VALID"
|
||||
# Excluye explícitamente NOISE, ZOMBIE, ABANDON y cualquier otro valor
|
||||
# Create mask for valid records: ONLY "VALID"
|
||||
# Explicitly excludes NOISE, ZOMBIE, ABANDON and any other value
|
||||
df["_is_valid_for_cv"] = df["record_status"] == "VALID"
|
||||
|
||||
# Log record_status breakdown for debugging
|
||||
@@ -104,21 +103,21 @@ class OperationalPerformanceMetrics:
|
||||
print(f" - {status}: {count}")
|
||||
print(f" VALID rows for AHT calculation: {valid_count}")
|
||||
else:
|
||||
# Legacy data sin record_status: incluir todo
|
||||
# Legacy data without record_status: include all
|
||||
df["_is_valid_for_cv"] = True
|
||||
print(f"[OperationalPerformance] No record_status column - using all {len(df)} rows")
|
||||
|
||||
# Normalización básica
|
||||
# Basic normalization
|
||||
df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
|
||||
df["channel"] = df["channel"].astype(str).str.strip()
|
||||
df["agent_id"] = df["agent_id"].astype(str).str.strip()
|
||||
|
||||
# Flags opcionales convertidos a bool cuando existan
|
||||
# Optional flags converted to bool when they exist
|
||||
for flag_col in ["is_resolved", "abandoned_flag", "transfer_flag"]:
|
||||
if flag_col in df.columns:
|
||||
df[flag_col] = df[flag_col].astype(int).astype(bool)
|
||||
|
||||
# customer_id: usamos customer_id si existe, si no caller_id
|
||||
# customer_id: we use customer_id if it exists, otherwise caller_id
|
||||
if "customer_id" in df.columns:
|
||||
df["customer_id"] = df["customer_id"].astype(str)
|
||||
elif "caller_id" in df.columns:
|
||||
@@ -126,8 +125,8 @@ class OperationalPerformanceMetrics:
|
||||
else:
|
||||
df["customer_id"] = None
|
||||
|
||||
# logged_time opcional
|
||||
# Normalizamos logged_time: siempre será una serie float con NaN si no existe
|
||||
# logged_time optional
|
||||
# Normalize logged_time: will always be a float series with NaN if it does not exist
|
||||
df["logged_time"] = pd.to_numeric(df.get("logged_time", np.nan), errors="coerce")
|
||||
|
||||
|
||||
@@ -138,16 +137,16 @@ class OperationalPerformanceMetrics:
|
||||
return self.df.empty
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# AHT y variabilidad
|
||||
# AHT and variability
|
||||
# ------------------------------------------------------------------ #
|
||||
def aht_distribution(self) -> Dict[str, float]:
|
||||
"""
|
||||
Devuelve P10, P50, P90 del AHT y el ratio P90/P50 como medida de variabilidad.
|
||||
Returns P10, P50, P90 of AHT and the P90/P50 ratio as a measure of variability.
|
||||
|
||||
v3.0: Filtra NOISE y ZOMBIE para el cálculo de variabilidad.
|
||||
Solo usa registros con record_status='valid' o sin status (legacy).
|
||||
v3.0: Filters NOISE and ZOMBIE for variability calculation.
|
||||
Only uses records with record_status='valid' or without status (legacy).
|
||||
"""
|
||||
# Filtrar solo registros válidos para cálculo de variabilidad
|
||||
# Filter only valid records for variability calculation
|
||||
df_valid = self.df[self.df["_is_valid_for_cv"] == True]
|
||||
ht = df_valid["handle_time"].dropna().astype(float)
|
||||
if ht.empty:
|
||||
@@ -167,10 +166,9 @@ class OperationalPerformanceMetrics:
|
||||
|
||||
def talk_hold_acw_p50_by_skill(self) -> pd.DataFrame:
|
||||
"""
|
||||
P50 de talk_time, hold_time y wrap_up_time por skill.
|
||||
P50 of talk_time, hold_time and wrap_up_time by skill.
|
||||
|
||||
Incluye queue_skill como columna (no solo índice) para que
|
||||
el frontend pueda hacer lookup por nombre de skill.
|
||||
Includes queue_skill as a column (not just index) so that the frontend can lookup by skill name.
|
||||
"""
|
||||
df = self.df
|
||||
|
||||
@@ -192,24 +190,24 @@ class OperationalPerformanceMetrics:
|
||||
return result.round(2).sort_index().reset_index()
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# FCR, escalación, abandono, reincidencia, repetición canal
|
||||
# FCR, escalation, abandonment, recurrence, channel repetition
|
||||
# ------------------------------------------------------------------ #
|
||||
def fcr_rate(self) -> float:
|
||||
"""
|
||||
FCR (First Contact Resolution).
|
||||
|
||||
Prioridad 1: Usar fcr_real_flag del CSV si existe
|
||||
Prioridad 2: Calcular como 100 - escalation_rate
|
||||
Priority 1: Use fcr_real_flag from CSV if it exists
|
||||
Priority 2: Calculate as 100 - escalation_rate
|
||||
"""
|
||||
df = self.df
|
||||
total = len(df)
|
||||
if total == 0:
|
||||
return float("nan")
|
||||
|
||||
# Prioridad 1: Usar fcr_real_flag si existe
|
||||
# Priority 1: Use fcr_real_flag if it exists
|
||||
if "fcr_real_flag" in df.columns:
|
||||
col = df["fcr_real_flag"]
|
||||
# Normalizar a booleano
|
||||
# Normalize to boolean
|
||||
if col.dtype == "O":
|
||||
fcr_mask = (
|
||||
col.astype(str)
|
||||
@@ -224,7 +222,7 @@ class OperationalPerformanceMetrics:
|
||||
fcr = (fcr_count / total) * 100.0
|
||||
return float(max(0.0, min(100.0, round(fcr, 2))))
|
||||
|
||||
# Prioridad 2: Fallback a 100 - escalation_rate
|
||||
# Priority 2: Fallback to 100 - escalation_rate
|
||||
try:
|
||||
esc = self.escalation_rate()
|
||||
except Exception:
|
||||
@@ -239,7 +237,7 @@ class OperationalPerformanceMetrics:
|
||||
|
||||
def escalation_rate(self) -> float:
|
||||
"""
|
||||
% de interacciones que requieren escalación (transfer_flag == True).
|
||||
% of interactions that require escalation (transfer_flag == True).
|
||||
"""
|
||||
df = self.df
|
||||
total = len(df)
|
||||
@@ -251,17 +249,17 @@ class OperationalPerformanceMetrics:
|
||||
|
||||
def abandonment_rate(self) -> float:
|
||||
"""
|
||||
% de interacciones abandonadas.
|
||||
% of abandoned interactions.
|
||||
|
||||
Busca en orden: is_abandoned, abandoned_flag, abandoned
|
||||
Si ninguna columna existe, devuelve NaN.
|
||||
Searches in order: is_abandoned, abandoned_flag, abandoned
|
||||
If no column exists, returns NaN.
|
||||
"""
|
||||
df = self.df
|
||||
total = len(df)
|
||||
if total == 0:
|
||||
return float("nan")
|
||||
|
||||
# Buscar columna de abandono en orden de prioridad
|
||||
# Search for abandonment column in priority order
|
||||
abandon_col = None
|
||||
for col_name in ["is_abandoned", "abandoned_flag", "abandoned"]:
|
||||
if col_name in df.columns:
|
||||
@@ -273,7 +271,7 @@ class OperationalPerformanceMetrics:
|
||||
|
||||
col = df[abandon_col]
|
||||
|
||||
# Normalizar a booleano
|
||||
# Normalize to boolean
|
||||
if col.dtype == "O":
|
||||
abandon_mask = (
|
||||
col.astype(str)
|
||||
@@ -289,10 +287,9 @@ class OperationalPerformanceMetrics:
|
||||
|
||||
def high_hold_time_rate(self, threshold_seconds: float = 60.0) -> float:
|
||||
"""
|
||||
% de interacciones con hold_time > threshold (por defecto 60s).
|
||||
% of interactions with hold_time > threshold (default 60s).
|
||||
|
||||
Proxy de complejidad: si el agente tuvo que poner en espera al cliente
|
||||
más de 60 segundos, probablemente tuvo que consultar/investigar.
|
||||
Complexity proxy: if the agent had to put the customer on hold for more than 60 seconds, they probably had to consult/investigate.
|
||||
"""
|
||||
df = self.df
|
||||
total = len(df)
|
||||
@@ -306,44 +303,43 @@ class OperationalPerformanceMetrics:
|
||||
|
||||
def recurrence_rate_7d(self) -> float:
|
||||
"""
|
||||
% de clientes que vuelven a contactar en < 7 días para el MISMO skill.
|
||||
% of customers who contact again in < 7 days for the SAME skill.
|
||||
|
||||
Se basa en customer_id (o caller_id si no hay customer_id) + queue_skill.
|
||||
Calcula:
|
||||
- Para cada combinación cliente + skill, ordena por datetime_start
|
||||
- Si hay dos contactos consecutivos separados < 7 días (mismo cliente, mismo skill),
|
||||
cuenta como "recurrente"
|
||||
- Tasa = nº clientes recurrentes / nº total de clientes
|
||||
Based on customer_id (or caller_id if no customer_id) + queue_skill.
|
||||
Calculates:
|
||||
- For each client + skill combination, sorts by datetime_start
|
||||
- If there are two consecutive contacts separated by < 7 days (same client, same skill), counts as "recurrent"
|
||||
- Rate = number of recurrent clients / total number of clients
|
||||
|
||||
NOTA: Solo cuenta como recurrencia si el cliente llama por el MISMO skill.
|
||||
Un cliente que llama a "Ventas" y luego a "Soporte" NO es recurrente.
|
||||
NOTE: Only counts as recurrence if the client calls for the SAME skill.
|
||||
A client who calls "Sales" and then "Support" is NOT recurrent.
|
||||
"""
|
||||
|
||||
df = self.df.dropna(subset=["datetime_start"]).copy()
|
||||
|
||||
# Normalizar identificador de cliente
|
||||
# Normalize client identifier
|
||||
if "customer_id" not in df.columns:
|
||||
if "caller_id" in df.columns:
|
||||
df["customer_id"] = df["caller_id"]
|
||||
else:
|
||||
# No hay identificador de cliente -> no se puede calcular
|
||||
# No client identifier -> cannot calculate
|
||||
return float("nan")
|
||||
|
||||
df = df.dropna(subset=["customer_id"])
|
||||
if df.empty:
|
||||
return float("nan")
|
||||
|
||||
# Ordenar por cliente + skill + fecha
|
||||
# Sort by client + skill + date
|
||||
df = df.sort_values(["customer_id", "queue_skill", "datetime_start"])
|
||||
|
||||
# Diferencia de tiempo entre contactos consecutivos por cliente Y skill
|
||||
# Esto asegura que solo contamos recontactos del mismo cliente para el mismo skill
|
||||
# Time difference between consecutive contacts by client AND skill
|
||||
# This ensures we only count re-contacts from the same client for the same skill
|
||||
df["delta"] = df.groupby(["customer_id", "queue_skill"])["datetime_start"].diff()
|
||||
|
||||
# Marcamos los contactos que ocurren a menos de 7 días del anterior (mismo skill)
|
||||
# Mark contacts that occur less than 7 days from the previous one (same skill)
|
||||
recurrence_mask = df["delta"] < pd.Timedelta(days=7)
|
||||
|
||||
# Nº de clientes que tienen al menos un contacto recurrente (para cualquier skill)
|
||||
# Number of clients who have at least one recurrent contact (for any skill)
|
||||
recurrent_customers = df.loc[recurrence_mask, "customer_id"].nunique()
|
||||
total_customers = df["customer_id"].nunique()
|
||||
|
||||
@@ -356,9 +352,9 @@ class OperationalPerformanceMetrics:
|
||||
|
||||
def repeat_channel_rate(self) -> float:
|
||||
"""
|
||||
% de reincidencias (<7 días) en las que el cliente usa el MISMO canal.
|
||||
% of recurrences (<7 days) in which the client uses the SAME channel.
|
||||
|
||||
Si no hay customer_id/caller_id o solo un contacto por cliente, devuelve NaN.
|
||||
If there is no customer_id/caller_id or only one contact per client, returns NaN.
|
||||
"""
|
||||
df = self.df.dropna(subset=["datetime_start"]).copy()
|
||||
if df["customer_id"].isna().all():
|
||||
@@ -387,11 +383,11 @@ class OperationalPerformanceMetrics:
|
||||
# ------------------------------------------------------------------ #
|
||||
def occupancy_rate(self) -> float:
|
||||
"""
|
||||
Tasa de ocupación:
|
||||
Occupancy rate:
|
||||
|
||||
occupancy = sum(handle_time) / sum(logged_time) * 100.
|
||||
|
||||
Requiere columna 'logged_time'. Si no existe o es todo 0, devuelve NaN.
|
||||
Requires 'logged_time' column. If it does not exist or is all 0, returns NaN.
|
||||
"""
|
||||
df = self.df
|
||||
if "logged_time" not in df.columns:
|
||||
@@ -408,23 +404,23 @@ class OperationalPerformanceMetrics:
|
||||
return float(round(occ * 100, 2))
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Score de rendimiento 0-10
|
||||
# Performance score 0-10
|
||||
# ------------------------------------------------------------------ #
|
||||
def performance_score(self) -> Dict[str, float]:
|
||||
"""
|
||||
Calcula un score 0-10 combinando:
|
||||
- AHT (bajo es mejor)
|
||||
- FCR (alto es mejor)
|
||||
- Variabilidad (P90/P50, bajo es mejor)
|
||||
- Otros factores (ocupación / escalación)
|
||||
Calculates a 0-10 score combining:
|
||||
- AHT (lower is better)
|
||||
- FCR (higher is better)
|
||||
- Variability (P90/P50, lower is better)
|
||||
- Other factors (occupancy / escalation)
|
||||
|
||||
Fórmula:
|
||||
Formula:
|
||||
score = 0.4 * (10 - AHT_norm) +
|
||||
0.3 * FCR_norm +
|
||||
0.2 * (10 - Var_norm) +
|
||||
0.1 * Otros_score
|
||||
|
||||
Donde *_norm son valores en escala 0-10.
|
||||
Where *_norm are values on a 0-10 scale.
|
||||
"""
|
||||
dist = self.aht_distribution()
|
||||
if not dist:
|
||||
@@ -433,15 +429,15 @@ class OperationalPerformanceMetrics:
|
||||
p50 = dist["p50"]
|
||||
ratio = dist["p90_p50_ratio"]
|
||||
|
||||
# AHT_normalized: 0 (mejor) a 10 (peor)
|
||||
# AHT_normalized: 0 (better) to 10 (worse)
|
||||
aht_norm = self._scale_to_0_10(p50, self.AHT_GOOD, self.AHT_BAD)
|
||||
# FCR_normalized: 0-10 directamente desde % (0-100)
|
||||
# FCR_normalized: 0-10 directly from % (0-100)
|
||||
fcr_pct = self.fcr_rate()
|
||||
fcr_norm = fcr_pct / 10.0 if not np.isnan(fcr_pct) else 0.0
|
||||
# Variabilidad_normalized: 0 (ratio bueno) a 10 (ratio malo)
|
||||
# Variability_normalized: 0 (good ratio) to 10 (bad ratio)
|
||||
var_norm = self._scale_to_0_10(ratio, self.VAR_RATIO_GOOD, self.VAR_RATIO_BAD)
|
||||
|
||||
# Otros factores: combinamos ocupación (ideal ~80%) y escalación (ideal baja)
|
||||
# Other factors: combine occupancy (ideal ~80%) and escalation (ideal low)
|
||||
occ = self.occupancy_rate()
|
||||
esc = self.escalation_rate()
|
||||
|
||||
@@ -467,26 +463,26 @@ class OperationalPerformanceMetrics:
|
||||
|
||||
def _scale_to_0_10(self, value: float, good: float, bad: float) -> float:
|
||||
"""
|
||||
Escala linealmente un valor:
|
||||
Linearly scales a value:
|
||||
- good -> 0
|
||||
- bad -> 10
|
||||
Con saturación fuera de rango.
|
||||
With saturation outside range.
|
||||
"""
|
||||
if np.isnan(value):
|
||||
return 5.0 # neutro
|
||||
return 5.0 # neutral
|
||||
|
||||
if good == bad:
|
||||
return 5.0
|
||||
|
||||
if good < bad:
|
||||
# Menor es mejor
|
||||
# Lower is better
|
||||
if value <= good:
|
||||
return 0.0
|
||||
if value >= bad:
|
||||
return 10.0
|
||||
return 10.0 * (value - good) / (bad - good)
|
||||
else:
|
||||
# Mayor es mejor
|
||||
# Higher is better
|
||||
if value >= good:
|
||||
return 0.0
|
||||
if value <= bad:
|
||||
@@ -495,19 +491,19 @@ class OperationalPerformanceMetrics:
|
||||
|
||||
def _compute_other_factors_score(self, occ_pct: float, esc_pct: float) -> float:
|
||||
"""
|
||||
Otros factores (0-10) basados en:
|
||||
- ocupación ideal alrededor de 80%
|
||||
- tasa de escalación ideal baja (<10%)
|
||||
Other factors (0-10) based on:
|
||||
- ideal occupancy around 80%
|
||||
- ideal escalation rate low (<10%)
|
||||
"""
|
||||
# Ocupación: 0 penalización si está entre 75-85, se penaliza fuera
|
||||
# Occupancy: 0 penalty if between 75-85, penalized outside
|
||||
if np.isnan(occ_pct):
|
||||
occ_penalty = 5.0
|
||||
else:
|
||||
deviation = abs(occ_pct - 80.0)
|
||||
occ_penalty = min(10.0, deviation / 5.0 * 2.0) # cada 5 puntos se suman 2, máx 10
|
||||
occ_penalty = min(10.0, deviation / 5.0 * 2.0) # each 5 points add 2, max 10
|
||||
occ_score = max(0.0, 10.0 - occ_penalty)
|
||||
|
||||
# Escalación: 0-10 donde 0% -> 10 puntos, >=40% -> 0
|
||||
# Escalation: 0-10 where 0% -> 10 points, >=40% -> 0
|
||||
if np.isnan(esc_pct):
|
||||
esc_score = 5.0
|
||||
else:
|
||||
@@ -518,7 +514,7 @@ class OperationalPerformanceMetrics:
|
||||
else:
|
||||
esc_score = 10.0 * (1.0 - esc_pct / 40.0)
|
||||
|
||||
# Media simple de ambos
|
||||
# Simple average of both
|
||||
return (occ_score + esc_score) / 2.0
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
@@ -526,29 +522,29 @@ class OperationalPerformanceMetrics:
|
||||
# ------------------------------------------------------------------ #
|
||||
def plot_aht_boxplot_by_skill(self) -> Axes:
|
||||
"""
|
||||
Boxplot del AHT por skill (P10-P50-P90 visual).
|
||||
Boxplot of AHT by skill (P10-P50-P90 visual).
|
||||
"""
|
||||
df = self.df.copy()
|
||||
|
||||
if df.empty or "handle_time" not in df.columns:
|
||||
fig, ax = plt.subplots()
|
||||
ax.text(0.5, 0.5, "Sin datos de AHT", ha="center", va="center")
|
||||
ax.text(0.5, 0.5, "No AHT data", ha="center", va="center")
|
||||
ax.set_axis_off()
|
||||
return ax
|
||||
|
||||
df = df.dropna(subset=["handle_time"])
|
||||
if df.empty:
|
||||
fig, ax = plt.subplots()
|
||||
ax.text(0.5, 0.5, "AHT no disponible", ha="center", va="center")
|
||||
ax.text(0.5, 0.5, "AHT not available", ha="center", va="center")
|
||||
ax.set_axis_off()
|
||||
return ax
|
||||
|
||||
fig, ax = plt.subplots(figsize=(8, 4))
|
||||
df.boxplot(column="handle_time", by="queue_skill", ax=ax, showfliers=False)
|
||||
|
||||
ax.set_xlabel("Skill / Cola")
|
||||
ax.set_ylabel("AHT (segundos)")
|
||||
ax.set_title("Distribución de AHT por skill")
|
||||
ax.set_xlabel("Skill / Queue")
|
||||
ax.set_ylabel("AHT (seconds)")
|
||||
ax.set_title("AHT distribution by skill")
|
||||
plt.suptitle("")
|
||||
plt.xticks(rotation=45, ha="right")
|
||||
ax.grid(axis="y", alpha=0.3)
|
||||
@@ -557,14 +553,14 @@ class OperationalPerformanceMetrics:
|
||||
|
||||
def plot_resolution_funnel_by_skill(self) -> Axes:
|
||||
"""
|
||||
Funnel / barras apiladas de Talk + Hold + ACW por skill (P50).
|
||||
Funnel / stacked bars of Talk + Hold + ACW by skill (P50).
|
||||
|
||||
Permite ver el equilibrio de tiempos por skill.
|
||||
Allows viewing the time balance by skill.
|
||||
"""
|
||||
p50 = self.talk_hold_acw_p50_by_skill()
|
||||
if p50.empty:
|
||||
fig, ax = plt.subplots()
|
||||
ax.text(0.5, 0.5, "Sin datos para funnel", ha="center", va="center")
|
||||
ax.text(0.5, 0.5, "No data for funnel", ha="center", va="center")
|
||||
ax.set_axis_off()
|
||||
return ax
|
||||
|
||||
@@ -583,27 +579,26 @@ class OperationalPerformanceMetrics:
|
||||
|
||||
ax.set_xticks(x)
|
||||
ax.set_xticklabels(skills, rotation=45, ha="right")
|
||||
ax.set_ylabel("Segundos")
|
||||
ax.set_title("Funnel de resolución (P50) por skill")
|
||||
ax.set_ylabel("Seconds")
|
||||
ax.set_title("Resolution funnel (P50) by skill")
|
||||
ax.legend()
|
||||
ax.grid(axis="y", alpha=0.3)
|
||||
|
||||
return ax
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# Métricas por skill (para consistencia frontend cached/fresh)
|
||||
# Metrics by skill (for frontend cached/fresh consistency)
|
||||
# ------------------------------------------------------------------ #
|
||||
def metrics_by_skill(self) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Calcula métricas operacionales por skill:
|
||||
- transfer_rate: % de interacciones con transfer_flag == True
|
||||
- abandonment_rate: % de interacciones abandonadas
|
||||
- fcr_tecnico: 100 - transfer_rate (sin transferencia)
|
||||
- fcr_real: % sin transferencia Y sin recontacto 7d (si hay datos)
|
||||
- volume: número de interacciones
|
||||
Calculates operational metrics by skill:
|
||||
- transfer_rate: % of interactions with transfer_flag == True
|
||||
- abandonment_rate: % of abandoned interactions
|
||||
- fcr_tecnico: 100 - transfer_rate (without transfer)
|
||||
- fcr_real: % without transfer AND without 7d re-contact (if there is data)
|
||||
- volume: number of interactions
|
||||
|
||||
Devuelve una lista de dicts, uno por skill, para que el frontend
|
||||
tenga acceso a las métricas reales por skill (no estimadas).
|
||||
Returns a list of dicts, one per skill, so that the frontend has access to real metrics by skill (not estimated).
|
||||
"""
|
||||
df = self.df
|
||||
if df.empty:
|
||||
@@ -611,14 +606,14 @@ class OperationalPerformanceMetrics:
|
||||
|
||||
results = []
|
||||
|
||||
# Detectar columna de abandono
|
||||
# Detect abandonment column
|
||||
abandon_col = None
|
||||
for col_name in ["is_abandoned", "abandoned_flag", "abandoned"]:
|
||||
if col_name in df.columns:
|
||||
abandon_col = col_name
|
||||
break
|
||||
|
||||
# Detectar columna de repeat_call_7d para FCR real
|
||||
# Detect repeat_call_7d column for real FCR
|
||||
repeat_col = None
|
||||
for col_name in ["repeat_call_7d", "repeat_7d", "is_repeat_7d"]:
|
||||
if col_name in df.columns:
|
||||
@@ -637,7 +632,7 @@ class OperationalPerformanceMetrics:
|
||||
else:
|
||||
transfer_rate = 0.0
|
||||
|
||||
# FCR Técnico = 100 - transfer_rate
|
||||
# Technical FCR = 100 - transfer_rate
|
||||
fcr_tecnico = float(round(100.0 - transfer_rate, 2))
|
||||
|
||||
# Abandonment rate
|
||||
@@ -656,7 +651,7 @@ class OperationalPerformanceMetrics:
|
||||
abandoned = int(abandon_mask.sum())
|
||||
abandonment_rate = float(round(abandoned / total * 100, 2))
|
||||
|
||||
# FCR Real (sin transferencia Y sin recontacto 7d)
|
||||
# Real FCR (without transfer AND without 7d re-contact)
|
||||
fcr_real = fcr_tecnico # default to fcr_tecnico if no repeat data
|
||||
if repeat_col and "transfer_flag" in group.columns:
|
||||
repeat_data = group[repeat_col]
|
||||
@@ -670,13 +665,13 @@ class OperationalPerformanceMetrics:
|
||||
else:
|
||||
repeat_mask = pd.to_numeric(repeat_data, errors="coerce").fillna(0) > 0
|
||||
|
||||
# FCR Real: no transfer AND no repeat
|
||||
# Real FCR: no transfer AND no repeat
|
||||
fcr_real_mask = (~group["transfer_flag"]) & (~repeat_mask)
|
||||
fcr_real_count = fcr_real_mask.sum()
|
||||
fcr_real = float(round(fcr_real_count / total * 100, 2))
|
||||
|
||||
# AHT Mean (promedio de handle_time sobre registros válidos)
|
||||
# Filtramos solo registros 'valid' (excluye noise/zombie) para consistencia
|
||||
# AHT Mean (average of handle_time over valid records)
|
||||
# Filter only 'valid' records (excludes noise/zombie) for consistency
|
||||
if "_is_valid_for_cv" in group.columns:
|
||||
valid_records = group[group["_is_valid_for_cv"]]
|
||||
else:
|
||||
@@ -687,15 +682,15 @@ class OperationalPerformanceMetrics:
|
||||
else:
|
||||
aht_mean = 0.0
|
||||
|
||||
# AHT Total (promedio de handle_time sobre TODOS los registros)
|
||||
# Incluye NOISE, ZOMBIE, ABANDON - solo para información/comparación
|
||||
# AHT Total (average of handle_time over ALL records)
|
||||
# Includes NOISE, ZOMBIE, ABANDON - for information/comparison only
|
||||
if len(group) > 0 and "handle_time" in group.columns:
|
||||
aht_total = float(round(group["handle_time"].mean(), 2))
|
||||
else:
|
||||
aht_total = 0.0
|
||||
|
||||
# Hold Time Mean (promedio de hold_time sobre registros válidos)
|
||||
# Consistente con fresh path que usa MEAN, no P50
|
||||
# Hold Time Mean (average of hold_time over valid records)
|
||||
# Consistent with fresh path that uses MEAN, not P50
|
||||
if len(valid_records) > 0 and "hold_time" in valid_records.columns:
|
||||
hold_time_mean = float(round(valid_records["hold_time"].mean(), 2))
|
||||
else:
|
||||
|
||||
@@ -24,11 +24,10 @@ REQUIRED_COLUMNS_SAT: List[str] = [
|
||||
@dataclass
|
||||
class SatisfactionExperienceMetrics:
|
||||
"""
|
||||
Dimensión 3: SATISFACCIÓN y EXPERIENCIA
|
||||
Dimension 3: SATISFACTION and EXPERIENCE
|
||||
|
||||
Todas las columnas de satisfacción (csat/nps/ces/aht) son OPCIONALES.
|
||||
Si no están, las métricas que las usan devuelven vacío/NaN pero
|
||||
nunca rompen el pipeline.
|
||||
All satisfaction columns (csat/nps/ces/aht) are OPTIONAL.
|
||||
If they are not present, the metrics that use them return empty/NaN but never break the pipeline.
|
||||
"""
|
||||
|
||||
df: pd.DataFrame
|
||||
@@ -44,7 +43,7 @@ class SatisfactionExperienceMetrics:
|
||||
missing = [c for c in REQUIRED_COLUMNS_SAT if c not in self.df.columns]
|
||||
if missing:
|
||||
raise ValueError(
|
||||
f"Faltan columnas obligatorias para SatisfactionExperienceMetrics: {missing}"
|
||||
f"Missing required columns for SatisfactionExperienceMetrics: {missing}"
|
||||
)
|
||||
|
||||
def _prepare_data(self) -> None:
|
||||
@@ -52,7 +51,7 @@ class SatisfactionExperienceMetrics:
|
||||
|
||||
df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
|
||||
|
||||
# Duraciones base siempre existen
|
||||
# Base durations always exist
|
||||
for col in ["duration_talk", "hold_time", "wrap_up_time"]:
|
||||
df[col] = pd.to_numeric(df[col], errors="coerce")
|
||||
|
||||
@@ -63,16 +62,16 @@ class SatisfactionExperienceMetrics:
|
||||
+ df["wrap_up_time"].fillna(0)
|
||||
)
|
||||
|
||||
# csat_score opcional
|
||||
# csat_score optional
|
||||
df["csat_score"] = pd.to_numeric(df.get("csat_score", np.nan), errors="coerce")
|
||||
|
||||
# aht opcional: si existe columna explícita la usamos, si no usamos handle_time
|
||||
# aht optional: if explicit column exists we use it, otherwise we use handle_time
|
||||
if "aht" in df.columns:
|
||||
df["aht"] = pd.to_numeric(df["aht"], errors="coerce")
|
||||
else:
|
||||
df["aht"] = df["handle_time"]
|
||||
|
||||
# NPS / CES opcionales
|
||||
# NPS / CES optional
|
||||
df["nps_score"] = pd.to_numeric(df.get("nps_score", np.nan), errors="coerce")
|
||||
df["ces_score"] = pd.to_numeric(df.get("ces_score", np.nan), errors="coerce")
|
||||
|
||||
@@ -90,8 +89,8 @@ class SatisfactionExperienceMetrics:
|
||||
# ------------------------------------------------------------------ #
|
||||
def csat_avg_by_skill_channel(self) -> pd.DataFrame:
|
||||
"""
|
||||
CSAT promedio por skill/canal.
|
||||
Si no hay csat_score, devuelve DataFrame vacío.
|
||||
Average CSAT by skill/channel.
|
||||
If there is no csat_score, returns empty DataFrame.
|
||||
"""
|
||||
df = self.df
|
||||
if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0:
|
||||
@@ -115,7 +114,7 @@ class SatisfactionExperienceMetrics:
|
||||
|
||||
def nps_avg_by_skill_channel(self) -> pd.DataFrame:
|
||||
"""
|
||||
NPS medio por skill/canal, si existe nps_score.
|
||||
Average NPS by skill/channel, if nps_score exists.
|
||||
"""
|
||||
df = self.df
|
||||
if "nps_score" not in df.columns or df["nps_score"].notna().sum() == 0:
|
||||
@@ -139,7 +138,7 @@ class SatisfactionExperienceMetrics:
|
||||
|
||||
def ces_avg_by_skill_channel(self) -> pd.DataFrame:
|
||||
"""
|
||||
CES medio por skill/canal, si existe ces_score.
|
||||
Average CES by skill/channel, if ces_score exists.
|
||||
"""
|
||||
df = self.df
|
||||
if "ces_score" not in df.columns or df["ces_score"].notna().sum() == 0:
|
||||
@@ -163,11 +162,11 @@ class SatisfactionExperienceMetrics:
|
||||
|
||||
def csat_global(self) -> float:
|
||||
"""
|
||||
CSAT medio global (todas las interacciones).
|
||||
Global average CSAT (all interactions).
|
||||
|
||||
Usa la columna opcional `csat_score`:
|
||||
- Si no existe, devuelve NaN.
|
||||
- Si todos los valores son NaN / vacíos, devuelve NaN.
|
||||
Uses the optional `csat_score` column:
|
||||
- If it does not exist, returns NaN.
|
||||
- If all values are NaN / empty, returns NaN.
|
||||
"""
|
||||
df = self.df
|
||||
if "csat_score" not in df.columns:
|
||||
@@ -183,8 +182,8 @@ class SatisfactionExperienceMetrics:
|
||||
|
||||
def csat_aht_correlation(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Correlación Pearson CSAT vs AHT.
|
||||
Si falta csat o aht, o no hay varianza, devuelve NaN y código adecuado.
|
||||
Pearson correlation CSAT vs AHT.
|
||||
If csat or aht is missing, or there is no variance, returns NaN and appropriate code.
|
||||
"""
|
||||
df = self.df
|
||||
if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0:
|
||||
@@ -216,8 +215,8 @@ class SatisfactionExperienceMetrics:
|
||||
|
||||
def csat_aht_skill_summary(self) -> pd.DataFrame:
|
||||
"""
|
||||
Resumen por skill con clasificación del "sweet spot".
|
||||
Si falta csat o aht, devuelve DataFrame vacío.
|
||||
Summary by skill with "sweet spot" classification.
|
||||
If csat or aht is missing, returns empty DataFrame.
|
||||
"""
|
||||
df = self.df
|
||||
if df["csat_score"].notna().sum() == 0 or df["aht"].notna().sum() == 0:
|
||||
@@ -258,20 +257,20 @@ class SatisfactionExperienceMetrics:
|
||||
# ------------------------------------------------------------------ #
|
||||
def plot_csat_vs_aht_scatter(self) -> Axes:
|
||||
"""
|
||||
Scatter CSAT vs AHT por skill.
|
||||
Si no hay datos suficientes, devuelve un Axes con mensaje.
|
||||
Scatter CSAT vs AHT by skill.
|
||||
If there is insufficient data, returns an Axes with message.
|
||||
"""
|
||||
df = self.df
|
||||
if df["csat_score"].notna().sum() == 0 or df["aht"].notna().sum() == 0:
|
||||
fig, ax = plt.subplots()
|
||||
ax.text(0.5, 0.5, "Sin datos de CSAT/AHT", ha="center", va="center")
|
||||
ax.text(0.5, 0.5, "No CSAT/AHT data", ha="center", va="center")
|
||||
ax.set_axis_off()
|
||||
return ax
|
||||
|
||||
df = df.dropna(subset=["csat_score", "aht"]).copy()
|
||||
if df.empty:
|
||||
fig, ax = plt.subplots()
|
||||
ax.text(0.5, 0.5, "Sin datos de CSAT/AHT", ha="center", va="center")
|
||||
ax.text(0.5, 0.5, "No CSAT/AHT data", ha="center", va="center")
|
||||
ax.set_axis_off()
|
||||
return ax
|
||||
|
||||
@@ -280,9 +279,9 @@ class SatisfactionExperienceMetrics:
|
||||
for skill, sub in df.groupby("queue_skill"):
|
||||
ax.scatter(sub["aht"], sub["csat_score"], label=skill, alpha=0.7)
|
||||
|
||||
ax.set_xlabel("AHT (segundos)")
|
||||
ax.set_xlabel("AHT (seconds)")
|
||||
ax.set_ylabel("CSAT")
|
||||
ax.set_title("CSAT vs AHT por skill")
|
||||
ax.set_title("CSAT vs AHT by skill")
|
||||
ax.grid(alpha=0.3)
|
||||
ax.legend(title="Skill", bbox_to_anchor=(1.05, 1), loc="upper left")
|
||||
|
||||
@@ -291,28 +290,28 @@ class SatisfactionExperienceMetrics:
|
||||
|
||||
def plot_csat_distribution(self) -> Axes:
|
||||
"""
|
||||
Histograma de CSAT.
|
||||
Si no hay csat_score, devuelve un Axes con mensaje.
|
||||
CSAT histogram.
|
||||
If there is no csat_score, returns an Axes with message.
|
||||
"""
|
||||
df = self.df
|
||||
if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0:
|
||||
fig, ax = plt.subplots()
|
||||
ax.text(0.5, 0.5, "Sin datos de CSAT", ha="center", va="center")
|
||||
ax.text(0.5, 0.5, "No CSAT data", ha="center", va="center")
|
||||
ax.set_axis_off()
|
||||
return ax
|
||||
|
||||
df = df.dropna(subset=["csat_score"]).copy()
|
||||
if df.empty:
|
||||
fig, ax = plt.subplots()
|
||||
ax.text(0.5, 0.5, "Sin datos de CSAT", ha="center", va="center")
|
||||
ax.text(0.5, 0.5, "No CSAT data", ha="center", va="center")
|
||||
ax.set_axis_off()
|
||||
return ax
|
||||
|
||||
fig, ax = plt.subplots(figsize=(6, 4))
|
||||
ax.hist(df["csat_score"], bins=10, alpha=0.7)
|
||||
ax.set_xlabel("CSAT")
|
||||
ax.set_ylabel("Frecuencia")
|
||||
ax.set_title("Distribución de CSAT")
|
||||
ax.set_ylabel("Frequency")
|
||||
ax.set_title("CSAT distribution")
|
||||
ax.grid(axis="y", alpha=0.3)
|
||||
|
||||
return ax
|
||||
|
||||
Reference in New Issue
Block a user