Translate Phase 2 medium-priority files (frontend utils + backend dimensions)

Phase 2 of Spanish-to-English translation for medium-priority files:

Frontend utils (2 files):
- dataTransformation.ts: Translated ~72 occurrences (comments, docs, console logs)
- segmentClassifier.ts: Translated ~20 occurrences (JSDoc, inline comments, UI strings)

Backend dimensions (3 files):
- OperationalPerformance.py: Translated ~117 lines (docstrings, comments)
- SatisfactionExperience.py: Translated ~33 lines (docstrings, comments)
- EconomyCost.py: Translated ~79 lines (docstrings, comments)

All function names and variable names preserved for API compatibility.
Frontend and backend compilation tested and verified successful.

Related to TRANSLATION_STATUS.md Phase 2 objectives.

https://claude.ai/code/session_01GNbnkFoESkRcnPr3bLCYDg
This commit is contained in:
Claude
2026-02-07 11:03:00 +00:00
parent 94178eaaae
commit 8c7f5fa827
5 changed files with 325 additions and 335 deletions

View File

@@ -23,17 +23,16 @@ REQUIRED_COLUMNS_ECON: List[str] = [
@dataclass @dataclass
class EconomyConfig: class EconomyConfig:
""" """
Parámetros manuales para la dimensión de Economía y Costes. Manual parameters for the Economy and Cost dimension.
- labor_cost_per_hour: coste total/hora de un agente (fully loaded). - labor_cost_per_hour: total cost/hour of an agent (fully loaded).
- overhead_rate: % overhead variable (ej. 0.1 = 10% sobre labor). - overhead_rate: % variable overhead (e.g. 0.1 = 10% over labor).
- tech_costs_annual: coste anual de tecnología (licencias, infra, ...). - tech_costs_annual: annual technology cost (licenses, infrastructure, ...).
- automation_cpi: coste por interacción automatizada (ej. 0.15€). - automation_cpi: cost per automated interaction (e.g. 0.15€).
- automation_volume_share: % del volumen automatizable (0-1). - automation_volume_share: % of automatable volume (0-1).
- automation_success_rate: % éxito de la automatización (0-1). - automation_success_rate: % automation success (0-1).
- customer_segments: mapping opcional skill -> segmento ("high"/"medium"/"low") - customer_segments: optional mapping skill -> segment ("high"/"medium"/"low") for future ROI insights by segment.
para futuros insights de ROI por segmento.
""" """
labor_cost_per_hour: float labor_cost_per_hour: float
@@ -48,20 +47,20 @@ class EconomyConfig:
@dataclass @dataclass
class EconomyCostMetrics: class EconomyCostMetrics:
""" """
DIMENSIÓN 4: ECONOMÍA y COSTES DIMENSION 4: ECONOMY and COSTS
Propósito: Purpose:
- Cuantificar el COSTE actual (CPI, coste anual). - Quantify the current COST (CPI, annual cost).
- Estimar el impacto de overhead y tecnología. - Estimate the impact of overhead and technology.
- Calcular un primer estimado de "coste de ineficiencia" y ahorro potencial. - Calculate an initial estimate of "inefficiency cost" and potential savings.
Requiere: Requires:
- Columnas del dataset transaccional (ver REQUIRED_COLUMNS_ECON). - Columns from the transactional dataset (see REQUIRED_COLUMNS_ECON).
Inputs opcionales vía EconomyConfig: Optional inputs via EconomyConfig:
- labor_cost_per_hour (obligatorio para cualquier cálculo de €). - labor_cost_per_hour (required for any € calculation).
- overhead_rate, tech_costs_annual, automation_*. - overhead_rate, tech_costs_annual, automation_*.
- customer_segments (para insights de ROI por segmento). - customer_segments (for ROI insights by segment).
""" """
df: pd.DataFrame df: pd.DataFrame
@@ -72,13 +71,13 @@ class EconomyCostMetrics:
self._prepare_data() self._prepare_data()
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
# Helpers internos # Internal helpers
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
def _validate_columns(self) -> None: def _validate_columns(self) -> None:
missing = [c for c in REQUIRED_COLUMNS_ECON if c not in self.df.columns] missing = [c for c in REQUIRED_COLUMNS_ECON if c not in self.df.columns]
if missing: if missing:
raise ValueError( raise ValueError(
f"Faltan columnas obligatorias para EconomyCostMetrics: {missing}" f"Missing required columns for EconomyCostMetrics: {missing}"
) )
def _prepare_data(self) -> None: def _prepare_data(self) -> None:
@@ -97,15 +96,15 @@ class EconomyCostMetrics:
df["duration_talk"].fillna(0) df["duration_talk"].fillna(0)
+ df["hold_time"].fillna(0) + df["hold_time"].fillna(0)
+ df["wrap_up_time"].fillna(0) + df["wrap_up_time"].fillna(0)
) # segundos ) # seconds
# Filtrar por record_status para cálculos de AHT/CPI # Filter by record_status for AHT/CPI calculations
# Solo incluir registros VALID (excluir NOISE, ZOMBIE, ABANDON) # Only include VALID records (exclude NOISE, ZOMBIE, ABANDON)
if "record_status" in df.columns: if "record_status" in df.columns:
df["record_status"] = df["record_status"].astype(str).str.strip().str.upper() df["record_status"] = df["record_status"].astype(str).str.strip().str.upper()
df["_is_valid_for_cost"] = df["record_status"] == "VALID" df["_is_valid_for_cost"] = df["record_status"] == "VALID"
else: else:
# Legacy data sin record_status: incluir todo # Legacy data without record_status: include all
df["_is_valid_for_cost"] = True df["_is_valid_for_cost"] = True
self.df = df self.df = df
@@ -118,11 +117,11 @@ class EconomyCostMetrics:
return self.config is not None and self.config.labor_cost_per_hour is not None return self.config is not None and self.config.labor_cost_per_hour is not None
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
# KPI 1: CPI por canal/skill # KPI 1: CPI by channel/skill
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
def cpi_by_skill_channel(self) -> pd.DataFrame: def cpi_by_skill_channel(self) -> pd.DataFrame:
""" """
CPI (Coste Por Interacción) por skill/canal. CPI (Cost Per Interaction) by skill/channel.
CPI = (Labor_cost_per_interaction + Overhead_variable) / EFFECTIVE_PRODUCTIVITY CPI = (Labor_cost_per_interaction + Overhead_variable) / EFFECTIVE_PRODUCTIVITY
@@ -130,19 +129,17 @@ class EconomyCostMetrics:
- Overhead_variable = overhead_rate * Labor_cost_per_interaction - Overhead_variable = overhead_rate * Labor_cost_per_interaction
- EFFECTIVE_PRODUCTIVITY = 0.70 (70% - accounts for non-productive time) - EFFECTIVE_PRODUCTIVITY = 0.70 (70% - accounts for non-productive time)
Excluye registros abandonados del cálculo de costes para consistencia Excludes abandoned records from cost calculation for consistency with the frontend path (fresh CSV).
con el path del frontend (fresh CSV).
Si no hay config de costes -> devuelve DataFrame vacío. If there is no cost config -> returns empty DataFrame.
Incluye queue_skill y channel como columnas (no solo índice) para que Includes queue_skill and channel as columns (not just index) so that the frontend can lookup by skill name.
el frontend pueda hacer lookup por nombre de skill.
""" """
if not self._has_cost_config(): if not self._has_cost_config():
return pd.DataFrame() return pd.DataFrame()
cfg = self.config cfg = self.config
assert cfg is not None # para el type checker assert cfg is not None # for the type checker
df = self.df.copy() df = self.df.copy()
if df.empty: if df.empty:
@@ -154,15 +151,15 @@ class EconomyCostMetrics:
else: else:
df_cost = df df_cost = df
# Filtrar por record_status: solo VALID para cálculo de AHT # Filter by record_status: only VALID for AHT calculation
# Excluye NOISE, ZOMBIE, ABANDON # Excludes NOISE, ZOMBIE, ABANDON
if "_is_valid_for_cost" in df_cost.columns: if "_is_valid_for_cost" in df_cost.columns:
df_cost = df_cost[df_cost["_is_valid_for_cost"] == True] df_cost = df_cost[df_cost["_is_valid_for_cost"] == True]
if df_cost.empty: if df_cost.empty:
return pd.DataFrame() return pd.DataFrame()
# AHT por skill/canal (en segundos) - solo registros VALID # AHT by skill/channel (in seconds) - only VALID records
grouped = df_cost.groupby(["queue_skill", "channel"])["handle_time"].mean() grouped = df_cost.groupby(["queue_skill", "channel"])["handle_time"].mean()
if grouped.empty: if grouped.empty:
@@ -193,17 +190,16 @@ class EconomyCostMetrics:
return out.sort_index().reset_index() return out.sort_index().reset_index()
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
# KPI 2: coste anual por skill/canal # KPI 2: annual cost by skill/channel
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
def annual_cost_by_skill_channel(self) -> pd.DataFrame: def annual_cost_by_skill_channel(self) -> pd.DataFrame:
""" """
Coste anual por skill/canal. Annual cost by skill/channel.
cost_annual = CPI * volumen (cantidad de interacciones de la muestra). cost_annual = CPI * volume (number of interactions in the sample).
Nota: por simplicidad asumimos que el dataset refleja un periodo anual. Note: for simplicity we assume the dataset reflects an annual period.
Si en el futuro quieres anualizar (ej. dataset = 1 mes) se puede añadir If in the future you want to annualize (e.g. dataset = 1 month) you can add a scaling factor in EconomyConfig.
un factor de escalado en EconomyConfig.
""" """
cpi_table = self.cpi_by_skill_channel() cpi_table = self.cpi_by_skill_channel()
if cpi_table.empty: if cpi_table.empty:
@@ -224,18 +220,18 @@ class EconomyCostMetrics:
return joined return joined
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
# KPI 3: desglose de costes (labor / tech / overhead) # KPI 3: cost breakdown (labor / tech / overhead)
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
def cost_breakdown(self) -> Dict[str, float]: def cost_breakdown(self) -> Dict[str, float]:
""" """
Desglose % de costes: labor, overhead, tech. Cost breakdown %: labor, overhead, tech.
labor_total = sum(labor_cost_per_interaction) labor_total = sum(labor_cost_per_interaction)
overhead_total = labor_total * overhead_rate overhead_total = labor_total * overhead_rate
tech_total = tech_costs_annual (si se ha proporcionado) tech_total = tech_costs_annual (if provided)
Devuelve porcentajes sobre el total. Returns percentages of the total.
Si falta configuración de coste -> devuelve {}. If cost configuration is missing -> returns {}.
""" """
if not self._has_cost_config(): if not self._has_cost_config():
return {} return {}
@@ -258,7 +254,7 @@ class EconomyCostMetrics:
cpi_indexed = cpi_table.set_index(["queue_skill", "channel"]) cpi_indexed = cpi_table.set_index(["queue_skill", "channel"])
joined = cpi_indexed.join(volume, how="left").fillna({"volume": 0}) joined = cpi_indexed.join(volume, how="left").fillna({"volume": 0})
# Costes anuales de labor y overhead # Annual labor and overhead costs
annual_labor = (joined["labor_cost"] * joined["volume"]).sum() annual_labor = (joined["labor_cost"] * joined["volume"]).sum()
annual_overhead = (joined["overhead_cost"] * joined["volume"]).sum() annual_overhead = (joined["overhead_cost"] * joined["volume"]).sum()
annual_tech = cfg.tech_costs_annual annual_tech = cfg.tech_costs_annual
@@ -278,21 +274,21 @@ class EconomyCostMetrics:
} }
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
# KPI 4: coste de ineficiencia (€ por variabilidad/escalación) # KPI 4: inefficiency cost (€ by variability/escalation)
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
def inefficiency_cost_by_skill_channel(self) -> pd.DataFrame: def inefficiency_cost_by_skill_channel(self) -> pd.DataFrame:
""" """
Estimación muy simplificada de coste de ineficiencia: Very simplified estimate of inefficiency cost:
Para cada skill/canal: For each skill/channel:
- AHT_p50, AHT_p90 (segundos). - AHT_p50, AHT_p90 (seconds).
- Delta = max(0, AHT_p90 - AHT_p50). - Delta = max(0, AHT_p90 - AHT_p50).
- Se asume que ~40% de las interacciones están por encima de la mediana. - Assumes that ~40% of interactions are above the median.
- Ineff_seconds = Delta * volume * 0.4 - Ineff_seconds = Delta * volume * 0.4
- Ineff_cost = LaborCPI_per_second * Ineff_seconds - Ineff_cost = LaborCPI_per_second * Ineff_seconds
NOTA: Es un modelo aproximado para cuantificar "orden de magnitud". NOTE: This is an approximate model to quantify "order of magnitude".
""" """
if not self._has_cost_config(): if not self._has_cost_config():
return pd.DataFrame() return pd.DataFrame()
@@ -302,8 +298,8 @@ class EconomyCostMetrics:
df = self.df.copy() df = self.df.copy()
# Filtrar por record_status: solo VALID para cálculo de AHT # Filter by record_status: only VALID for AHT calculation
# Excluye NOISE, ZOMBIE, ABANDON # Excludes NOISE, ZOMBIE, ABANDON
if "_is_valid_for_cost" in df.columns: if "_is_valid_for_cost" in df.columns:
df = df[df["_is_valid_for_cost"] == True] df = df[df["_is_valid_for_cost"] == True]
@@ -318,7 +314,7 @@ class EconomyCostMetrics:
if stats.empty: if stats.empty:
return pd.DataFrame() return pd.DataFrame()
# CPI para obtener coste/segundo de labor # CPI to get cost/second of labor
# cpi_by_skill_channel now returns with reset_index, so we need to set index for join # cpi_by_skill_channel now returns with reset_index, so we need to set index for join
cpi_table_raw = self.cpi_by_skill_channel() cpi_table_raw = self.cpi_by_skill_channel()
if cpi_table_raw.empty: if cpi_table_raw.empty:
@@ -331,11 +327,11 @@ class EconomyCostMetrics:
merged = merged.fillna(0.0) merged = merged.fillna(0.0)
delta = (merged["aht_p90"] - merged["aht_p50"]).clip(lower=0.0) delta = (merged["aht_p90"] - merged["aht_p50"]).clip(lower=0.0)
affected_fraction = 0.4 # aproximación affected_fraction = 0.4 # approximation
ineff_seconds = delta * merged["volume"] * affected_fraction ineff_seconds = delta * merged["volume"] * affected_fraction
# labor_cost = coste por interacción con AHT medio; # labor_cost = cost per interaction with average AHT;
# aproximamos coste/segundo como labor_cost / AHT_medio # approximate cost/second as labor_cost / average_AHT
aht_mean = grouped["handle_time"].mean() aht_mean = grouped["handle_time"].mean()
merged["aht_mean"] = aht_mean merged["aht_mean"] = aht_mean
@@ -351,21 +347,21 @@ class EconomyCostMetrics:
return merged[["aht_p50", "aht_p90", "volume", "ineff_seconds", "ineff_cost"]].reset_index() return merged[["aht_p50", "aht_p90", "volume", "ineff_seconds", "ineff_cost"]].reset_index()
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
# KPI 5: ahorro potencial anual por automatización # KPI 5: potential annual savings from automation
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
def potential_savings(self) -> Dict[str, Any]: def potential_savings(self) -> Dict[str, Any]:
""" """
Ahorro potencial anual basado en: Potential annual savings based on:
Ahorro = (CPI_humano - CPI_automatizado) * Volumen_automatizable * Tasa_éxito Savings = (Human_CPI - Automated_CPI) * Automatable_volume * Success_rate
Donde: Where:
- CPI_humano = media ponderada de cpi_total. - Human_CPI = weighted average of cpi_total.
- CPI_automatizado = config.automation_cpi - Automated_CPI = config.automation_cpi
- Volumen_automatizable = volume_total * automation_volume_share - Automatable_volume = volume_total * automation_volume_share
- Tasa_éxito = automation_success_rate - Success_rate = automation_success_rate
Si faltan parámetros en config -> devuelve {}. If config parameters are missing -> returns {}.
""" """
if not self._has_cost_config(): if not self._has_cost_config():
return {} return {}
@@ -384,7 +380,7 @@ class EconomyCostMetrics:
if total_volume <= 0: if total_volume <= 0:
return {} return {}
# CPI humano medio ponderado # Weighted average human CPI
weighted_cpi = ( weighted_cpi = (
(cpi_table["cpi_total"] * cpi_table["volume"]).sum() / total_volume (cpi_table["cpi_total"] * cpi_table["volume"]).sum() / total_volume
) )
@@ -409,12 +405,12 @@ class EconomyCostMetrics:
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
def plot_cost_waterfall(self) -> Axes: def plot_cost_waterfall(self) -> Axes:
""" """
Waterfall de costes anuales (labor + tech + overhead). Waterfall of annual costs (labor + tech + overhead).
""" """
breakdown = self.cost_breakdown() breakdown = self.cost_breakdown()
if not breakdown: if not breakdown:
fig, ax = plt.subplots() fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin configuración de costes", ha="center", va="center") ax.text(0.5, 0.5, "No cost configuration", ha="center", va="center")
ax.set_axis_off() ax.set_axis_off()
return ax return ax
@@ -436,14 +432,14 @@ class EconomyCostMetrics:
bottoms.append(running) bottoms.append(running)
running += v running += v
# barras estilo waterfall # waterfall style bars
x = np.arange(len(labels)) x = np.arange(len(labels))
ax.bar(x, values) ax.bar(x, values)
ax.set_xticks(x) ax.set_xticks(x)
ax.set_xticklabels(labels) ax.set_xticklabels(labels)
ax.set_ylabel("€ anuales") ax.set_ylabel("€ annual")
ax.set_title("Desglose anual de costes") ax.set_title("Annual cost breakdown")
for idx, v in enumerate(values): for idx, v in enumerate(values):
ax.text(idx, v, f"{v:,.0f}", ha="center", va="bottom") ax.text(idx, v, f"{v:,.0f}", ha="center", va="bottom")
@@ -454,12 +450,12 @@ class EconomyCostMetrics:
def plot_cpi_by_channel(self) -> Axes: def plot_cpi_by_channel(self) -> Axes:
""" """
Gráfico de barras de CPI medio por canal. Bar chart of average CPI by channel.
""" """
cpi_table = self.cpi_by_skill_channel() cpi_table = self.cpi_by_skill_channel()
if cpi_table.empty: if cpi_table.empty:
fig, ax = plt.subplots() fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin configuración de costes", ha="center", va="center") ax.text(0.5, 0.5, "No cost configuration", ha="center", va="center")
ax.set_axis_off() ax.set_axis_off()
return ax return ax
@@ -474,7 +470,7 @@ class EconomyCostMetrics:
cpi_indexed = cpi_table.set_index(["queue_skill", "channel"]) cpi_indexed = cpi_table.set_index(["queue_skill", "channel"])
joined = cpi_indexed.join(volume, how="left").fillna({"volume": 0}) joined = cpi_indexed.join(volume, how="left").fillna({"volume": 0})
# CPI medio ponderado por canal # Weighted average CPI by channel
per_channel = ( per_channel = (
joined.reset_index() joined.reset_index()
.groupby("channel") .groupby("channel")
@@ -486,9 +482,9 @@ class EconomyCostMetrics:
fig, ax = plt.subplots(figsize=(6, 4)) fig, ax = plt.subplots(figsize=(6, 4))
per_channel.plot(kind="bar", ax=ax) per_channel.plot(kind="bar", ax=ax)
ax.set_xlabel("Canal") ax.set_xlabel("Channel")
ax.set_ylabel("CPI medio (€)") ax.set_ylabel("Average CPI (€)")
ax.set_title("Coste por interacción (CPI) por canal") ax.set_title("Cost per interaction (CPI) by channel")
ax.grid(axis="y", alpha=0.3) ax.grid(axis="y", alpha=0.3)
return ax return ax

View File

@@ -25,32 +25,31 @@ REQUIRED_COLUMNS_OP: List[str] = [
@dataclass @dataclass
class OperationalPerformanceMetrics: class OperationalPerformanceMetrics:
""" """
Dimensión: RENDIMIENTO OPERACIONAL Y DE SERVICIO Dimension: OPERATIONAL PERFORMANCE AND SERVICE
Propósito: medir el balance entre rapidez (eficiencia) y calidad de resolución, Purpose: measure the balance between speed (efficiency) and resolution quality, plus service variability.
más la variabilidad del servicio.
Requiere como mínimo: Requires at minimum:
- interaction_id - interaction_id
- datetime_start - datetime_start
- queue_skill - queue_skill
- channel - channel
- duration_talk (segundos) - duration_talk (seconds)
- hold_time (segundos) - hold_time (seconds)
- wrap_up_time (segundos) - wrap_up_time (seconds)
- agent_id - agent_id
- transfer_flag (bool/int) - transfer_flag (bool/int)
Columnas opcionales: Optional columns:
- is_resolved (bool/int) -> para FCR - is_resolved (bool/int) -> for FCR
- abandoned_flag (bool/int) -> para tasa de abandono - abandoned_flag (bool/int) -> for abandonment rate
- customer_id / caller_id -> para reincidencia y repetición de canal - customer_id / caller_id -> for recurrence and channel repetition
- logged_time (segundos) -> para occupancy_rate - logged_time (seconds) -> for occupancy_rate
""" """
df: pd.DataFrame df: pd.DataFrame
# Benchmarks / parámetros de normalización (puedes ajustarlos) # Benchmarks / normalization parameters (you can adjust them)
AHT_GOOD: float = 300.0 # 5 min AHT_GOOD: float = 300.0 # 5 min
AHT_BAD: float = 900.0 # 15 min AHT_BAD: float = 900.0 # 15 min
VAR_RATIO_GOOD: float = 1.2 # P90/P50 ~1.2 muy estable VAR_RATIO_GOOD: float = 1.2 # P90/P50 ~1.2 muy estable
@@ -61,19 +60,19 @@ class OperationalPerformanceMetrics:
self._prepare_data() self._prepare_data()
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
# Helpers internos # Internal helpers
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
def _validate_columns(self) -> None: def _validate_columns(self) -> None:
missing = [c for c in REQUIRED_COLUMNS_OP if c not in self.df.columns] missing = [c for c in REQUIRED_COLUMNS_OP if c not in self.df.columns]
if missing: if missing:
raise ValueError( raise ValueError(
f"Faltan columnas obligatorias para OperationalPerformanceMetrics: {missing}" f"Missing required columns for OperationalPerformanceMetrics: {missing}"
) )
def _prepare_data(self) -> None: def _prepare_data(self) -> None:
df = self.df.copy() df = self.df.copy()
# Tipos # Types
df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce") df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
for col in ["duration_talk", "hold_time", "wrap_up_time"]: for col in ["duration_talk", "hold_time", "wrap_up_time"]:
@@ -86,13 +85,13 @@ class OperationalPerformanceMetrics:
+ df["wrap_up_time"].fillna(0) + df["wrap_up_time"].fillna(0)
) )
# v3.0: Filtrar NOISE y ZOMBIE para cálculos de variabilidad # v3.0: Filter NOISE and ZOMBIE for variability calculations
# record_status: 'VALID', 'NOISE', 'ZOMBIE', 'ABANDON' # record_status: 'VALID', 'NOISE', 'ZOMBIE', 'ABANDON'
# Para AHT/CV solo usamos 'VALID' (excluye noise, zombie, abandon) # For AHT/CV we only use 'VALID' (excludes noise, zombie, abandon)
if "record_status" in df.columns: if "record_status" in df.columns:
df["record_status"] = df["record_status"].astype(str).str.strip().str.upper() df["record_status"] = df["record_status"].astype(str).str.strip().str.upper()
# Crear máscara para registros válidos: SOLO "VALID" # Create mask for valid records: ONLY "VALID"
# Excluye explícitamente NOISE, ZOMBIE, ABANDON y cualquier otro valor # Explicitly excludes NOISE, ZOMBIE, ABANDON and any other value
df["_is_valid_for_cv"] = df["record_status"] == "VALID" df["_is_valid_for_cv"] = df["record_status"] == "VALID"
# Log record_status breakdown for debugging # Log record_status breakdown for debugging
@@ -104,21 +103,21 @@ class OperationalPerformanceMetrics:
print(f" - {status}: {count}") print(f" - {status}: {count}")
print(f" VALID rows for AHT calculation: {valid_count}") print(f" VALID rows for AHT calculation: {valid_count}")
else: else:
# Legacy data sin record_status: incluir todo # Legacy data without record_status: include all
df["_is_valid_for_cv"] = True df["_is_valid_for_cv"] = True
print(f"[OperationalPerformance] No record_status column - using all {len(df)} rows") print(f"[OperationalPerformance] No record_status column - using all {len(df)} rows")
# Normalización básica # Basic normalization
df["queue_skill"] = df["queue_skill"].astype(str).str.strip() df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
df["channel"] = df["channel"].astype(str).str.strip() df["channel"] = df["channel"].astype(str).str.strip()
df["agent_id"] = df["agent_id"].astype(str).str.strip() df["agent_id"] = df["agent_id"].astype(str).str.strip()
# Flags opcionales convertidos a bool cuando existan # Optional flags converted to bool when they exist
for flag_col in ["is_resolved", "abandoned_flag", "transfer_flag"]: for flag_col in ["is_resolved", "abandoned_flag", "transfer_flag"]:
if flag_col in df.columns: if flag_col in df.columns:
df[flag_col] = df[flag_col].astype(int).astype(bool) df[flag_col] = df[flag_col].astype(int).astype(bool)
# customer_id: usamos customer_id si existe, si no caller_id # customer_id: we use customer_id if it exists, otherwise caller_id
if "customer_id" in df.columns: if "customer_id" in df.columns:
df["customer_id"] = df["customer_id"].astype(str) df["customer_id"] = df["customer_id"].astype(str)
elif "caller_id" in df.columns: elif "caller_id" in df.columns:
@@ -126,8 +125,8 @@ class OperationalPerformanceMetrics:
else: else:
df["customer_id"] = None df["customer_id"] = None
# logged_time opcional # logged_time optional
# Normalizamos logged_time: siempre será una serie float con NaN si no existe # Normalize logged_time: will always be a float series with NaN if it does not exist
df["logged_time"] = pd.to_numeric(df.get("logged_time", np.nan), errors="coerce") df["logged_time"] = pd.to_numeric(df.get("logged_time", np.nan), errors="coerce")
@@ -138,16 +137,16 @@ class OperationalPerformanceMetrics:
return self.df.empty return self.df.empty
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
# AHT y variabilidad # AHT and variability
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
def aht_distribution(self) -> Dict[str, float]: def aht_distribution(self) -> Dict[str, float]:
""" """
Devuelve P10, P50, P90 del AHT y el ratio P90/P50 como medida de variabilidad. Returns P10, P50, P90 of AHT and the P90/P50 ratio as a measure of variability.
v3.0: Filtra NOISE y ZOMBIE para el cálculo de variabilidad. v3.0: Filters NOISE and ZOMBIE for variability calculation.
Solo usa registros con record_status='valid' o sin status (legacy). Only uses records with record_status='valid' or without status (legacy).
""" """
# Filtrar solo registros válidos para cálculo de variabilidad # Filter only valid records for variability calculation
df_valid = self.df[self.df["_is_valid_for_cv"] == True] df_valid = self.df[self.df["_is_valid_for_cv"] == True]
ht = df_valid["handle_time"].dropna().astype(float) ht = df_valid["handle_time"].dropna().astype(float)
if ht.empty: if ht.empty:
@@ -167,10 +166,9 @@ class OperationalPerformanceMetrics:
def talk_hold_acw_p50_by_skill(self) -> pd.DataFrame: def talk_hold_acw_p50_by_skill(self) -> pd.DataFrame:
""" """
P50 de talk_time, hold_time y wrap_up_time por skill. P50 of talk_time, hold_time and wrap_up_time by skill.
Incluye queue_skill como columna (no solo índice) para que Includes queue_skill as a column (not just index) so that the frontend can lookup by skill name.
el frontend pueda hacer lookup por nombre de skill.
""" """
df = self.df df = self.df
@@ -192,24 +190,24 @@ class OperationalPerformanceMetrics:
return result.round(2).sort_index().reset_index() return result.round(2).sort_index().reset_index()
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
# FCR, escalación, abandono, reincidencia, repetición canal # FCR, escalation, abandonment, recurrence, channel repetition
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
def fcr_rate(self) -> float: def fcr_rate(self) -> float:
""" """
FCR (First Contact Resolution). FCR (First Contact Resolution).
Prioridad 1: Usar fcr_real_flag del CSV si existe Priority 1: Use fcr_real_flag from CSV if it exists
Prioridad 2: Calcular como 100 - escalation_rate Priority 2: Calculate as 100 - escalation_rate
""" """
df = self.df df = self.df
total = len(df) total = len(df)
if total == 0: if total == 0:
return float("nan") return float("nan")
# Prioridad 1: Usar fcr_real_flag si existe # Priority 1: Use fcr_real_flag if it exists
if "fcr_real_flag" in df.columns: if "fcr_real_flag" in df.columns:
col = df["fcr_real_flag"] col = df["fcr_real_flag"]
# Normalizar a booleano # Normalize to boolean
if col.dtype == "O": if col.dtype == "O":
fcr_mask = ( fcr_mask = (
col.astype(str) col.astype(str)
@@ -224,7 +222,7 @@ class OperationalPerformanceMetrics:
fcr = (fcr_count / total) * 100.0 fcr = (fcr_count / total) * 100.0
return float(max(0.0, min(100.0, round(fcr, 2)))) return float(max(0.0, min(100.0, round(fcr, 2))))
# Prioridad 2: Fallback a 100 - escalation_rate # Priority 2: Fallback to 100 - escalation_rate
try: try:
esc = self.escalation_rate() esc = self.escalation_rate()
except Exception: except Exception:
@@ -239,7 +237,7 @@ class OperationalPerformanceMetrics:
def escalation_rate(self) -> float: def escalation_rate(self) -> float:
""" """
% de interacciones que requieren escalación (transfer_flag == True). % of interactions that require escalation (transfer_flag == True).
""" """
df = self.df df = self.df
total = len(df) total = len(df)
@@ -251,17 +249,17 @@ class OperationalPerformanceMetrics:
def abandonment_rate(self) -> float: def abandonment_rate(self) -> float:
""" """
% de interacciones abandonadas. % of abandoned interactions.
Busca en orden: is_abandoned, abandoned_flag, abandoned Searches in order: is_abandoned, abandoned_flag, abandoned
Si ninguna columna existe, devuelve NaN. If no column exists, returns NaN.
""" """
df = self.df df = self.df
total = len(df) total = len(df)
if total == 0: if total == 0:
return float("nan") return float("nan")
# Buscar columna de abandono en orden de prioridad # Search for abandonment column in priority order
abandon_col = None abandon_col = None
for col_name in ["is_abandoned", "abandoned_flag", "abandoned"]: for col_name in ["is_abandoned", "abandoned_flag", "abandoned"]:
if col_name in df.columns: if col_name in df.columns:
@@ -273,7 +271,7 @@ class OperationalPerformanceMetrics:
col = df[abandon_col] col = df[abandon_col]
# Normalizar a booleano # Normalize to boolean
if col.dtype == "O": if col.dtype == "O":
abandon_mask = ( abandon_mask = (
col.astype(str) col.astype(str)
@@ -289,10 +287,9 @@ class OperationalPerformanceMetrics:
def high_hold_time_rate(self, threshold_seconds: float = 60.0) -> float: def high_hold_time_rate(self, threshold_seconds: float = 60.0) -> float:
""" """
% de interacciones con hold_time > threshold (por defecto 60s). % of interactions with hold_time > threshold (default 60s).
Proxy de complejidad: si el agente tuvo que poner en espera al cliente Complexity proxy: if the agent had to put the customer on hold for more than 60 seconds, they probably had to consult/investigate.
más de 60 segundos, probablemente tuvo que consultar/investigar.
""" """
df = self.df df = self.df
total = len(df) total = len(df)
@@ -306,44 +303,43 @@ class OperationalPerformanceMetrics:
def recurrence_rate_7d(self) -> float: def recurrence_rate_7d(self) -> float:
""" """
% de clientes que vuelven a contactar en < 7 días para el MISMO skill. % of customers who contact again in < 7 days for the SAME skill.
Se basa en customer_id (o caller_id si no hay customer_id) + queue_skill. Based on customer_id (or caller_id if no customer_id) + queue_skill.
Calcula: Calculates:
- Para cada combinación cliente + skill, ordena por datetime_start - For each client + skill combination, sorts by datetime_start
- Si hay dos contactos consecutivos separados < 7 días (mismo cliente, mismo skill), - If there are two consecutive contacts separated by < 7 days (same client, same skill), counts as "recurrent"
cuenta como "recurrente" - Rate = number of recurrent clients / total number of clients
- Tasa = nº clientes recurrentes / nº total de clientes
NOTA: Solo cuenta como recurrencia si el cliente llama por el MISMO skill. NOTE: Only counts as recurrence if the client calls for the SAME skill.
Un cliente que llama a "Ventas" y luego a "Soporte" NO es recurrente. A client who calls "Sales" and then "Support" is NOT recurrent.
""" """
df = self.df.dropna(subset=["datetime_start"]).copy() df = self.df.dropna(subset=["datetime_start"]).copy()
# Normalizar identificador de cliente # Normalize client identifier
if "customer_id" not in df.columns: if "customer_id" not in df.columns:
if "caller_id" in df.columns: if "caller_id" in df.columns:
df["customer_id"] = df["caller_id"] df["customer_id"] = df["caller_id"]
else: else:
# No hay identificador de cliente -> no se puede calcular # No client identifier -> cannot calculate
return float("nan") return float("nan")
df = df.dropna(subset=["customer_id"]) df = df.dropna(subset=["customer_id"])
if df.empty: if df.empty:
return float("nan") return float("nan")
# Ordenar por cliente + skill + fecha # Sort by client + skill + date
df = df.sort_values(["customer_id", "queue_skill", "datetime_start"]) df = df.sort_values(["customer_id", "queue_skill", "datetime_start"])
# Diferencia de tiempo entre contactos consecutivos por cliente Y skill # Time difference between consecutive contacts by client AND skill
# Esto asegura que solo contamos recontactos del mismo cliente para el mismo skill # This ensures we only count re-contacts from the same client for the same skill
df["delta"] = df.groupby(["customer_id", "queue_skill"])["datetime_start"].diff() df["delta"] = df.groupby(["customer_id", "queue_skill"])["datetime_start"].diff()
# Marcamos los contactos que ocurren a menos de 7 días del anterior (mismo skill) # Mark contacts that occur less than 7 days from the previous one (same skill)
recurrence_mask = df["delta"] < pd.Timedelta(days=7) recurrence_mask = df["delta"] < pd.Timedelta(days=7)
# Nº de clientes que tienen al menos un contacto recurrente (para cualquier skill) # Number of clients who have at least one recurrent contact (for any skill)
recurrent_customers = df.loc[recurrence_mask, "customer_id"].nunique() recurrent_customers = df.loc[recurrence_mask, "customer_id"].nunique()
total_customers = df["customer_id"].nunique() total_customers = df["customer_id"].nunique()
@@ -356,9 +352,9 @@ class OperationalPerformanceMetrics:
def repeat_channel_rate(self) -> float: def repeat_channel_rate(self) -> float:
""" """
% de reincidencias (<7 días) en las que el cliente usa el MISMO canal. % of recurrences (<7 days) in which the client uses the SAME channel.
Si no hay customer_id/caller_id o solo un contacto por cliente, devuelve NaN. If there is no customer_id/caller_id or only one contact per client, returns NaN.
""" """
df = self.df.dropna(subset=["datetime_start"]).copy() df = self.df.dropna(subset=["datetime_start"]).copy()
if df["customer_id"].isna().all(): if df["customer_id"].isna().all():
@@ -387,11 +383,11 @@ class OperationalPerformanceMetrics:
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
def occupancy_rate(self) -> float: def occupancy_rate(self) -> float:
""" """
Tasa de ocupación: Occupancy rate:
occupancy = sum(handle_time) / sum(logged_time) * 100. occupancy = sum(handle_time) / sum(logged_time) * 100.
Requiere columna 'logged_time'. Si no existe o es todo 0, devuelve NaN. Requires 'logged_time' column. If it does not exist or is all 0, returns NaN.
""" """
df = self.df df = self.df
if "logged_time" not in df.columns: if "logged_time" not in df.columns:
@@ -408,23 +404,23 @@ class OperationalPerformanceMetrics:
return float(round(occ * 100, 2)) return float(round(occ * 100, 2))
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
# Score de rendimiento 0-10 # Performance score 0-10
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
def performance_score(self) -> Dict[str, float]: def performance_score(self) -> Dict[str, float]:
""" """
Calcula un score 0-10 combinando: Calculates a 0-10 score combining:
- AHT (bajo es mejor) - AHT (lower is better)
- FCR (alto es mejor) - FCR (higher is better)
- Variabilidad (P90/P50, bajo es mejor) - Variability (P90/P50, lower is better)
- Otros factores (ocupación / escalación) - Other factors (occupancy / escalation)
Fórmula: Formula:
score = 0.4 * (10 - AHT_norm) + score = 0.4 * (10 - AHT_norm) +
0.3 * FCR_norm + 0.3 * FCR_norm +
0.2 * (10 - Var_norm) + 0.2 * (10 - Var_norm) +
0.1 * Otros_score 0.1 * Otros_score
Donde *_norm son valores en escala 0-10. Where *_norm are values on a 0-10 scale.
""" """
dist = self.aht_distribution() dist = self.aht_distribution()
if not dist: if not dist:
@@ -433,15 +429,15 @@ class OperationalPerformanceMetrics:
p50 = dist["p50"] p50 = dist["p50"]
ratio = dist["p90_p50_ratio"] ratio = dist["p90_p50_ratio"]
# AHT_normalized: 0 (mejor) a 10 (peor) # AHT_normalized: 0 (better) to 10 (worse)
aht_norm = self._scale_to_0_10(p50, self.AHT_GOOD, self.AHT_BAD) aht_norm = self._scale_to_0_10(p50, self.AHT_GOOD, self.AHT_BAD)
# FCR_normalized: 0-10 directamente desde % (0-100) # FCR_normalized: 0-10 directly from % (0-100)
fcr_pct = self.fcr_rate() fcr_pct = self.fcr_rate()
fcr_norm = fcr_pct / 10.0 if not np.isnan(fcr_pct) else 0.0 fcr_norm = fcr_pct / 10.0 if not np.isnan(fcr_pct) else 0.0
# Variabilidad_normalized: 0 (ratio bueno) a 10 (ratio malo) # Variability_normalized: 0 (good ratio) to 10 (bad ratio)
var_norm = self._scale_to_0_10(ratio, self.VAR_RATIO_GOOD, self.VAR_RATIO_BAD) var_norm = self._scale_to_0_10(ratio, self.VAR_RATIO_GOOD, self.VAR_RATIO_BAD)
# Otros factores: combinamos ocupación (ideal ~80%) y escalación (ideal baja) # Other factors: combine occupancy (ideal ~80%) and escalation (ideal low)
occ = self.occupancy_rate() occ = self.occupancy_rate()
esc = self.escalation_rate() esc = self.escalation_rate()
@@ -467,26 +463,26 @@ class OperationalPerformanceMetrics:
def _scale_to_0_10(self, value: float, good: float, bad: float) -> float: def _scale_to_0_10(self, value: float, good: float, bad: float) -> float:
""" """
Escala linealmente un valor: Linearly scales a value:
- good -> 0 - good -> 0
- bad -> 10 - bad -> 10
Con saturación fuera de rango. With saturation outside range.
""" """
if np.isnan(value): if np.isnan(value):
return 5.0 # neutro return 5.0 # neutral
if good == bad: if good == bad:
return 5.0 return 5.0
if good < bad: if good < bad:
# Menor es mejor # Lower is better
if value <= good: if value <= good:
return 0.0 return 0.0
if value >= bad: if value >= bad:
return 10.0 return 10.0
return 10.0 * (value - good) / (bad - good) return 10.0 * (value - good) / (bad - good)
else: else:
# Mayor es mejor # Higher is better
if value >= good: if value >= good:
return 0.0 return 0.0
if value <= bad: if value <= bad:
@@ -495,19 +491,19 @@ class OperationalPerformanceMetrics:
def _compute_other_factors_score(self, occ_pct: float, esc_pct: float) -> float: def _compute_other_factors_score(self, occ_pct: float, esc_pct: float) -> float:
""" """
Otros factores (0-10) basados en: Other factors (0-10) based on:
- ocupación ideal alrededor de 80% - ideal occupancy around 80%
- tasa de escalación ideal baja (<10%) - ideal escalation rate low (<10%)
""" """
# Ocupación: 0 penalización si está entre 75-85, se penaliza fuera # Occupancy: 0 penalty if between 75-85, penalized outside
if np.isnan(occ_pct): if np.isnan(occ_pct):
occ_penalty = 5.0 occ_penalty = 5.0
else: else:
deviation = abs(occ_pct - 80.0) deviation = abs(occ_pct - 80.0)
occ_penalty = min(10.0, deviation / 5.0 * 2.0) # cada 5 puntos se suman 2, máx 10 occ_penalty = min(10.0, deviation / 5.0 * 2.0) # each 5 points add 2, max 10
occ_score = max(0.0, 10.0 - occ_penalty) occ_score = max(0.0, 10.0 - occ_penalty)
# Escalación: 0-10 donde 0% -> 10 puntos, >=40% -> 0 # Escalation: 0-10 where 0% -> 10 points, >=40% -> 0
if np.isnan(esc_pct): if np.isnan(esc_pct):
esc_score = 5.0 esc_score = 5.0
else: else:
@@ -518,7 +514,7 @@ class OperationalPerformanceMetrics:
else: else:
esc_score = 10.0 * (1.0 - esc_pct / 40.0) esc_score = 10.0 * (1.0 - esc_pct / 40.0)
# Media simple de ambos # Simple average of both
return (occ_score + esc_score) / 2.0 return (occ_score + esc_score) / 2.0
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
@@ -526,29 +522,29 @@ class OperationalPerformanceMetrics:
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
def plot_aht_boxplot_by_skill(self) -> Axes: def plot_aht_boxplot_by_skill(self) -> Axes:
""" """
Boxplot del AHT por skill (P10-P50-P90 visual). Boxplot of AHT by skill (P10-P50-P90 visual).
""" """
df = self.df.copy() df = self.df.copy()
if df.empty or "handle_time" not in df.columns: if df.empty or "handle_time" not in df.columns:
fig, ax = plt.subplots() fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin datos de AHT", ha="center", va="center") ax.text(0.5, 0.5, "No AHT data", ha="center", va="center")
ax.set_axis_off() ax.set_axis_off()
return ax return ax
df = df.dropna(subset=["handle_time"]) df = df.dropna(subset=["handle_time"])
if df.empty: if df.empty:
fig, ax = plt.subplots() fig, ax = plt.subplots()
ax.text(0.5, 0.5, "AHT no disponible", ha="center", va="center") ax.text(0.5, 0.5, "AHT not available", ha="center", va="center")
ax.set_axis_off() ax.set_axis_off()
return ax return ax
fig, ax = plt.subplots(figsize=(8, 4)) fig, ax = plt.subplots(figsize=(8, 4))
df.boxplot(column="handle_time", by="queue_skill", ax=ax, showfliers=False) df.boxplot(column="handle_time", by="queue_skill", ax=ax, showfliers=False)
ax.set_xlabel("Skill / Cola") ax.set_xlabel("Skill / Queue")
ax.set_ylabel("AHT (segundos)") ax.set_ylabel("AHT (seconds)")
ax.set_title("Distribución de AHT por skill") ax.set_title("AHT distribution by skill")
plt.suptitle("") plt.suptitle("")
plt.xticks(rotation=45, ha="right") plt.xticks(rotation=45, ha="right")
ax.grid(axis="y", alpha=0.3) ax.grid(axis="y", alpha=0.3)
@@ -557,14 +553,14 @@ class OperationalPerformanceMetrics:
def plot_resolution_funnel_by_skill(self) -> Axes: def plot_resolution_funnel_by_skill(self) -> Axes:
""" """
Funnel / barras apiladas de Talk + Hold + ACW por skill (P50). Funnel / stacked bars of Talk + Hold + ACW by skill (P50).
Permite ver el equilibrio de tiempos por skill. Allows viewing the time balance by skill.
""" """
p50 = self.talk_hold_acw_p50_by_skill() p50 = self.talk_hold_acw_p50_by_skill()
if p50.empty: if p50.empty:
fig, ax = plt.subplots() fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin datos para funnel", ha="center", va="center") ax.text(0.5, 0.5, "No data for funnel", ha="center", va="center")
ax.set_axis_off() ax.set_axis_off()
return ax return ax
@@ -583,27 +579,26 @@ class OperationalPerformanceMetrics:
ax.set_xticks(x) ax.set_xticks(x)
ax.set_xticklabels(skills, rotation=45, ha="right") ax.set_xticklabels(skills, rotation=45, ha="right")
ax.set_ylabel("Segundos") ax.set_ylabel("Seconds")
ax.set_title("Funnel de resolución (P50) por skill") ax.set_title("Resolution funnel (P50) by skill")
ax.legend() ax.legend()
ax.grid(axis="y", alpha=0.3) ax.grid(axis="y", alpha=0.3)
return ax return ax
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
# Métricas por skill (para consistencia frontend cached/fresh) # Metrics by skill (for frontend cached/fresh consistency)
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
def metrics_by_skill(self) -> List[Dict[str, Any]]: def metrics_by_skill(self) -> List[Dict[str, Any]]:
""" """
Calcula métricas operacionales por skill: Calculates operational metrics by skill:
- transfer_rate: % de interacciones con transfer_flag == True - transfer_rate: % of interactions with transfer_flag == True
- abandonment_rate: % de interacciones abandonadas - abandonment_rate: % of abandoned interactions
- fcr_tecnico: 100 - transfer_rate (sin transferencia) - fcr_tecnico: 100 - transfer_rate (without transfer)
- fcr_real: % sin transferencia Y sin recontacto 7d (si hay datos) - fcr_real: % without transfer AND without 7d re-contact (if there is data)
- volume: número de interacciones - volume: number of interactions
Devuelve una lista de dicts, uno por skill, para que el frontend Returns a list of dicts, one per skill, so that the frontend has access to real metrics by skill (not estimated).
tenga acceso a las métricas reales por skill (no estimadas).
""" """
df = self.df df = self.df
if df.empty: if df.empty:
@@ -611,14 +606,14 @@ class OperationalPerformanceMetrics:
results = [] results = []
# Detectar columna de abandono # Detect abandonment column
abandon_col = None abandon_col = None
for col_name in ["is_abandoned", "abandoned_flag", "abandoned"]: for col_name in ["is_abandoned", "abandoned_flag", "abandoned"]:
if col_name in df.columns: if col_name in df.columns:
abandon_col = col_name abandon_col = col_name
break break
# Detectar columna de repeat_call_7d para FCR real # Detect repeat_call_7d column for real FCR
repeat_col = None repeat_col = None
for col_name in ["repeat_call_7d", "repeat_7d", "is_repeat_7d"]: for col_name in ["repeat_call_7d", "repeat_7d", "is_repeat_7d"]:
if col_name in df.columns: if col_name in df.columns:
@@ -637,7 +632,7 @@ class OperationalPerformanceMetrics:
else: else:
transfer_rate = 0.0 transfer_rate = 0.0
# FCR Técnico = 100 - transfer_rate # Technical FCR = 100 - transfer_rate
fcr_tecnico = float(round(100.0 - transfer_rate, 2)) fcr_tecnico = float(round(100.0 - transfer_rate, 2))
# Abandonment rate # Abandonment rate
@@ -656,7 +651,7 @@ class OperationalPerformanceMetrics:
abandoned = int(abandon_mask.sum()) abandoned = int(abandon_mask.sum())
abandonment_rate = float(round(abandoned / total * 100, 2)) abandonment_rate = float(round(abandoned / total * 100, 2))
# FCR Real (sin transferencia Y sin recontacto 7d) # Real FCR (without transfer AND without 7d re-contact)
fcr_real = fcr_tecnico # default to fcr_tecnico if no repeat data fcr_real = fcr_tecnico # default to fcr_tecnico if no repeat data
if repeat_col and "transfer_flag" in group.columns: if repeat_col and "transfer_flag" in group.columns:
repeat_data = group[repeat_col] repeat_data = group[repeat_col]
@@ -670,13 +665,13 @@ class OperationalPerformanceMetrics:
else: else:
repeat_mask = pd.to_numeric(repeat_data, errors="coerce").fillna(0) > 0 repeat_mask = pd.to_numeric(repeat_data, errors="coerce").fillna(0) > 0
# FCR Real: no transfer AND no repeat # Real FCR: no transfer AND no repeat
fcr_real_mask = (~group["transfer_flag"]) & (~repeat_mask) fcr_real_mask = (~group["transfer_flag"]) & (~repeat_mask)
fcr_real_count = fcr_real_mask.sum() fcr_real_count = fcr_real_mask.sum()
fcr_real = float(round(fcr_real_count / total * 100, 2)) fcr_real = float(round(fcr_real_count / total * 100, 2))
# AHT Mean (promedio de handle_time sobre registros válidos) # AHT Mean (average of handle_time over valid records)
# Filtramos solo registros 'valid' (excluye noise/zombie) para consistencia # Filter only 'valid' records (excludes noise/zombie) for consistency
if "_is_valid_for_cv" in group.columns: if "_is_valid_for_cv" in group.columns:
valid_records = group[group["_is_valid_for_cv"]] valid_records = group[group["_is_valid_for_cv"]]
else: else:
@@ -687,15 +682,15 @@ class OperationalPerformanceMetrics:
else: else:
aht_mean = 0.0 aht_mean = 0.0
# AHT Total (promedio de handle_time sobre TODOS los registros) # AHT Total (average of handle_time over ALL records)
# Incluye NOISE, ZOMBIE, ABANDON - solo para información/comparación # Includes NOISE, ZOMBIE, ABANDON - for information/comparison only
if len(group) > 0 and "handle_time" in group.columns: if len(group) > 0 and "handle_time" in group.columns:
aht_total = float(round(group["handle_time"].mean(), 2)) aht_total = float(round(group["handle_time"].mean(), 2))
else: else:
aht_total = 0.0 aht_total = 0.0
# Hold Time Mean (promedio de hold_time sobre registros válidos) # Hold Time Mean (average of hold_time over valid records)
# Consistente con fresh path que usa MEAN, no P50 # Consistent with fresh path that uses MEAN, not P50
if len(valid_records) > 0 and "hold_time" in valid_records.columns: if len(valid_records) > 0 and "hold_time" in valid_records.columns:
hold_time_mean = float(round(valid_records["hold_time"].mean(), 2)) hold_time_mean = float(round(valid_records["hold_time"].mean(), 2))
else: else:

View File

@@ -24,11 +24,10 @@ REQUIRED_COLUMNS_SAT: List[str] = [
@dataclass @dataclass
class SatisfactionExperienceMetrics: class SatisfactionExperienceMetrics:
""" """
Dimensión 3: SATISFACCIÓN y EXPERIENCIA Dimension 3: SATISFACTION and EXPERIENCE
Todas las columnas de satisfacción (csat/nps/ces/aht) son OPCIONALES. All satisfaction columns (csat/nps/ces/aht) are OPTIONAL.
Si no están, las métricas que las usan devuelven vacío/NaN pero If they are not present, the metrics that use them return empty/NaN but never break the pipeline.
nunca rompen el pipeline.
""" """
df: pd.DataFrame df: pd.DataFrame
@@ -44,7 +43,7 @@ class SatisfactionExperienceMetrics:
missing = [c for c in REQUIRED_COLUMNS_SAT if c not in self.df.columns] missing = [c for c in REQUIRED_COLUMNS_SAT if c not in self.df.columns]
if missing: if missing:
raise ValueError( raise ValueError(
f"Faltan columnas obligatorias para SatisfactionExperienceMetrics: {missing}" f"Missing required columns for SatisfactionExperienceMetrics: {missing}"
) )
def _prepare_data(self) -> None: def _prepare_data(self) -> None:
@@ -52,7 +51,7 @@ class SatisfactionExperienceMetrics:
df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce") df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
# Duraciones base siempre existen # Base durations always exist
for col in ["duration_talk", "hold_time", "wrap_up_time"]: for col in ["duration_talk", "hold_time", "wrap_up_time"]:
df[col] = pd.to_numeric(df[col], errors="coerce") df[col] = pd.to_numeric(df[col], errors="coerce")
@@ -63,16 +62,16 @@ class SatisfactionExperienceMetrics:
+ df["wrap_up_time"].fillna(0) + df["wrap_up_time"].fillna(0)
) )
# csat_score opcional # csat_score optional
df["csat_score"] = pd.to_numeric(df.get("csat_score", np.nan), errors="coerce") df["csat_score"] = pd.to_numeric(df.get("csat_score", np.nan), errors="coerce")
# aht opcional: si existe columna explícita la usamos, si no usamos handle_time # aht optional: if explicit column exists we use it, otherwise we use handle_time
if "aht" in df.columns: if "aht" in df.columns:
df["aht"] = pd.to_numeric(df["aht"], errors="coerce") df["aht"] = pd.to_numeric(df["aht"], errors="coerce")
else: else:
df["aht"] = df["handle_time"] df["aht"] = df["handle_time"]
# NPS / CES opcionales # NPS / CES optional
df["nps_score"] = pd.to_numeric(df.get("nps_score", np.nan), errors="coerce") df["nps_score"] = pd.to_numeric(df.get("nps_score", np.nan), errors="coerce")
df["ces_score"] = pd.to_numeric(df.get("ces_score", np.nan), errors="coerce") df["ces_score"] = pd.to_numeric(df.get("ces_score", np.nan), errors="coerce")
@@ -90,8 +89,8 @@ class SatisfactionExperienceMetrics:
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
def csat_avg_by_skill_channel(self) -> pd.DataFrame: def csat_avg_by_skill_channel(self) -> pd.DataFrame:
""" """
CSAT promedio por skill/canal. Average CSAT by skill/channel.
Si no hay csat_score, devuelve DataFrame vacío. If there is no csat_score, returns empty DataFrame.
""" """
df = self.df df = self.df
if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0: if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0:
@@ -115,7 +114,7 @@ class SatisfactionExperienceMetrics:
def nps_avg_by_skill_channel(self) -> pd.DataFrame: def nps_avg_by_skill_channel(self) -> pd.DataFrame:
""" """
NPS medio por skill/canal, si existe nps_score. Average NPS by skill/channel, if nps_score exists.
""" """
df = self.df df = self.df
if "nps_score" not in df.columns or df["nps_score"].notna().sum() == 0: if "nps_score" not in df.columns or df["nps_score"].notna().sum() == 0:
@@ -139,7 +138,7 @@ class SatisfactionExperienceMetrics:
def ces_avg_by_skill_channel(self) -> pd.DataFrame: def ces_avg_by_skill_channel(self) -> pd.DataFrame:
""" """
CES medio por skill/canal, si existe ces_score. Average CES by skill/channel, if ces_score exists.
""" """
df = self.df df = self.df
if "ces_score" not in df.columns or df["ces_score"].notna().sum() == 0: if "ces_score" not in df.columns or df["ces_score"].notna().sum() == 0:
@@ -163,11 +162,11 @@ class SatisfactionExperienceMetrics:
def csat_global(self) -> float: def csat_global(self) -> float:
""" """
CSAT medio global (todas las interacciones). Global average CSAT (all interactions).
Usa la columna opcional `csat_score`: Uses the optional `csat_score` column:
- Si no existe, devuelve NaN. - If it does not exist, returns NaN.
- Si todos los valores son NaN / vacíos, devuelve NaN. - If all values are NaN / empty, returns NaN.
""" """
df = self.df df = self.df
if "csat_score" not in df.columns: if "csat_score" not in df.columns:
@@ -183,8 +182,8 @@ class SatisfactionExperienceMetrics:
def csat_aht_correlation(self) -> Dict[str, Any]: def csat_aht_correlation(self) -> Dict[str, Any]:
""" """
Correlación Pearson CSAT vs AHT. Pearson correlation CSAT vs AHT.
Si falta csat o aht, o no hay varianza, devuelve NaN y código adecuado. If csat or aht is missing, or there is no variance, returns NaN and appropriate code.
""" """
df = self.df df = self.df
if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0: if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0:
@@ -216,8 +215,8 @@ class SatisfactionExperienceMetrics:
def csat_aht_skill_summary(self) -> pd.DataFrame: def csat_aht_skill_summary(self) -> pd.DataFrame:
""" """
Resumen por skill con clasificación del "sweet spot". Summary by skill with "sweet spot" classification.
Si falta csat o aht, devuelve DataFrame vacío. If csat or aht is missing, returns empty DataFrame.
""" """
df = self.df df = self.df
if df["csat_score"].notna().sum() == 0 or df["aht"].notna().sum() == 0: if df["csat_score"].notna().sum() == 0 or df["aht"].notna().sum() == 0:
@@ -258,20 +257,20 @@ class SatisfactionExperienceMetrics:
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
def plot_csat_vs_aht_scatter(self) -> Axes: def plot_csat_vs_aht_scatter(self) -> Axes:
""" """
Scatter CSAT vs AHT por skill. Scatter CSAT vs AHT by skill.
Si no hay datos suficientes, devuelve un Axes con mensaje. If there is insufficient data, returns an Axes with message.
""" """
df = self.df df = self.df
if df["csat_score"].notna().sum() == 0 or df["aht"].notna().sum() == 0: if df["csat_score"].notna().sum() == 0 or df["aht"].notna().sum() == 0:
fig, ax = plt.subplots() fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin datos de CSAT/AHT", ha="center", va="center") ax.text(0.5, 0.5, "No CSAT/AHT data", ha="center", va="center")
ax.set_axis_off() ax.set_axis_off()
return ax return ax
df = df.dropna(subset=["csat_score", "aht"]).copy() df = df.dropna(subset=["csat_score", "aht"]).copy()
if df.empty: if df.empty:
fig, ax = plt.subplots() fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin datos de CSAT/AHT", ha="center", va="center") ax.text(0.5, 0.5, "No CSAT/AHT data", ha="center", va="center")
ax.set_axis_off() ax.set_axis_off()
return ax return ax
@@ -280,9 +279,9 @@ class SatisfactionExperienceMetrics:
for skill, sub in df.groupby("queue_skill"): for skill, sub in df.groupby("queue_skill"):
ax.scatter(sub["aht"], sub["csat_score"], label=skill, alpha=0.7) ax.scatter(sub["aht"], sub["csat_score"], label=skill, alpha=0.7)
ax.set_xlabel("AHT (segundos)") ax.set_xlabel("AHT (seconds)")
ax.set_ylabel("CSAT") ax.set_ylabel("CSAT")
ax.set_title("CSAT vs AHT por skill") ax.set_title("CSAT vs AHT by skill")
ax.grid(alpha=0.3) ax.grid(alpha=0.3)
ax.legend(title="Skill", bbox_to_anchor=(1.05, 1), loc="upper left") ax.legend(title="Skill", bbox_to_anchor=(1.05, 1), loc="upper left")
@@ -291,28 +290,28 @@ class SatisfactionExperienceMetrics:
def plot_csat_distribution(self) -> Axes: def plot_csat_distribution(self) -> Axes:
""" """
Histograma de CSAT. CSAT histogram.
Si no hay csat_score, devuelve un Axes con mensaje. If there is no csat_score, returns an Axes with message.
""" """
df = self.df df = self.df
if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0: if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0:
fig, ax = plt.subplots() fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin datos de CSAT", ha="center", va="center") ax.text(0.5, 0.5, "No CSAT data", ha="center", va="center")
ax.set_axis_off() ax.set_axis_off()
return ax return ax
df = df.dropna(subset=["csat_score"]).copy() df = df.dropna(subset=["csat_score"]).copy()
if df.empty: if df.empty:
fig, ax = plt.subplots() fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin datos de CSAT", ha="center", va="center") ax.text(0.5, 0.5, "No CSAT data", ha="center", va="center")
ax.set_axis_off() ax.set_axis_off()
return ax return ax
fig, ax = plt.subplots(figsize=(6, 4)) fig, ax = plt.subplots(figsize=(6, 4))
ax.hist(df["csat_score"], bins=10, alpha=0.7) ax.hist(df["csat_score"], bins=10, alpha=0.7)
ax.set_xlabel("CSAT") ax.set_xlabel("CSAT")
ax.set_ylabel("Frecuencia") ax.set_ylabel("Frequency")
ax.set_title("Distribución de CSAT") ax.set_title("CSAT distribution")
ax.grid(axis="y", alpha=0.3) ax.grid(axis="y", alpha=0.3)
return ax return ax

View File

@@ -1,11 +1,11 @@
// utils/dataTransformation.ts // utils/dataTransformation.ts
// Pipeline de transformación de datos raw a métricas procesadas // Raw data to processed metrics transformation pipeline
import type { RawInteraction } from '../types'; import type { RawInteraction } from '../types';
/** /**
* Paso 1: Limpieza de Ruido * Step 1: Noise Cleanup
* Elimina interacciones con duration < 10 segundos (falsos contactos o errores de sistema) * Removes interactions with duration < 10 seconds (false contacts or system errors)
*/ */
export function cleanNoiseFromData(interactions: RawInteraction[]): RawInteraction[] { export function cleanNoiseFromData(interactions: RawInteraction[]): RawInteraction[] {
const MIN_DURATION_SECONDS = 10; const MIN_DURATION_SECONDS = 10;
@@ -22,30 +22,30 @@ export function cleanNoiseFromData(interactions: RawInteraction[]): RawInteracti
const removedCount = interactions.length - cleaned.length; const removedCount = interactions.length - cleaned.length;
const removedPercentage = ((removedCount / interactions.length) * 100).toFixed(1); const removedPercentage = ((removedCount / interactions.length) * 100).toFixed(1);
console.log(`🧹 Limpieza de Ruido: ${removedCount} interacciones eliminadas (${removedPercentage}% del total)`); console.log(`🧹 Noise Cleanup: ${removedCount} interactions removed (${removedPercentage}% of total)`);
console.log(`Interacciones limpias: ${cleaned.length}`); console.log(`Clean interactions: ${cleaned.length}`);
return cleaned; return cleaned;
} }
/** /**
* tricas base calculadas por skill * Base metrics calculated by skill
*/ */
export interface SkillBaseMetrics { export interface SkillBaseMetrics {
skill: string; skill: string;
volume: number; // Número de interacciones volume: number; // Number of interactions
aht_mean: number; // AHT promedio (segundos) aht_mean: number; // Average AHT (seconds)
aht_std: number; // Desviación estándar del AHT aht_std: number; // AHT standard deviation
transfer_rate: number; // Tasa de transferencia (0-100) transfer_rate: number; // Transfer rate (0-100)
total_cost: number; // Coste total (€) total_cost: number; // Total cost (€)
// Datos auxiliares para cálculos posteriores // Auxiliary data for subsequent calculations
aht_values: number[]; // Array de todos los AHT para percentiles aht_values: number[]; // Array of all AHT values for percentiles
} }
/** /**
* Paso 2: Calcular Métricas Base por Skill * Step 2: Calculate Base Metrics by Skill
* Agrupa por skill y calcula volumen, AHT promedio, desviación estándar, tasa de transferencia y coste * Groups by skill and calculates volume, average AHT, standard deviation, transfer rate and cost
*/ */
export function calculateSkillBaseMetrics( export function calculateSkillBaseMetrics(
interactions: RawInteraction[], interactions: RawInteraction[],
@@ -53,7 +53,7 @@ export function calculateSkillBaseMetrics(
): SkillBaseMetrics[] { ): SkillBaseMetrics[] {
const COST_PER_SECOND = costPerHour / 3600; const COST_PER_SECOND = costPerHour / 3600;
// Agrupar por skill // Group by skill
const skillGroups = new Map<string, RawInteraction[]>(); const skillGroups = new Map<string, RawInteraction[]>();
interactions.forEach(interaction => { interactions.forEach(interaction => {
@@ -64,31 +64,31 @@ export function calculateSkillBaseMetrics(
skillGroups.get(skill)!.push(interaction); skillGroups.get(skill)!.push(interaction);
}); });
// Calcular métricas por skill // Calculate metrics per skill
const metrics: SkillBaseMetrics[] = []; const metrics: SkillBaseMetrics[] = [];
skillGroups.forEach((skillInteractions, skill) => { skillGroups.forEach((skillInteractions, skill) => {
const volume = skillInteractions.length; const volume = skillInteractions.length;
// Calcular AHT para cada interacción // Calculate AHT for each interaction
const ahtValues = skillInteractions.map(i => const ahtValues = skillInteractions.map(i =>
i.duration_talk + i.hold_time + i.wrap_up_time i.duration_talk + i.hold_time + i.wrap_up_time
); );
// AHT promedio // Average AHT
const ahtMean = ahtValues.reduce((sum, val) => sum + val, 0) / volume; const ahtMean = ahtValues.reduce((sum, val) => sum + val, 0) / volume;
// Desviación estándar del AHT // AHT standard deviation
const variance = ahtValues.reduce((sum, val) => const variance = ahtValues.reduce((sum, val) =>
sum + Math.pow(val - ahtMean, 2), 0 sum + Math.pow(val - ahtMean, 2), 0
) / volume; ) / volume;
const ahtStd = Math.sqrt(variance); const ahtStd = Math.sqrt(variance);
// Tasa de transferencia // Transfer rate
const transferCount = skillInteractions.filter(i => i.transfer_flag).length; const transferCount = skillInteractions.filter(i => i.transfer_flag).length;
const transferRate = (transferCount / volume) * 100; const transferRate = (transferCount / volume) * 100;
// Coste total // Total cost
const totalCost = ahtValues.reduce((sum, aht) => const totalCost = ahtValues.reduce((sum, aht) =>
sum + (aht * COST_PER_SECOND), 0 sum + (aht * COST_PER_SECOND), 0
); );
@@ -104,82 +104,82 @@ export function calculateSkillBaseMetrics(
}); });
}); });
// Ordenar por volumen descendente // Sort by descending volume
metrics.sort((a, b) => b.volume - a.volume); metrics.sort((a, b) => b.volume - a.volume);
console.log(`📊 tricas Base calculadas para ${metrics.length} skills`); console.log(`📊 Base Metrics calculated for ${metrics.length} skills`);
return metrics; return metrics;
} }
/** /**
* Dimensiones transformadas para Agentic Readiness Score * Transformed dimensions for Agentic Readiness Score
*/ */
export interface SkillDimensions { export interface SkillDimensions {
skill: string; skill: string;
volume: number; volume: number;
// Dimensión 1: Predictibilidad (0-10) // Dimension 1: Predictability (0-10)
predictability_score: number; predictability_score: number;
predictability_cv: number; // Coeficiente de Variación (para referencia) predictability_cv: number; // Coefficient of Variation (for reference)
// Dimensión 2: Complejidad Inversa (0-10) // Dimension 2: Inverse Complexity (0-10)
complexity_inverse_score: number; complexity_inverse_score: number;
complexity_transfer_rate: number; // Tasa de transferencia (para referencia) complexity_transfer_rate: number; // Transfer rate (for reference)
// Dimensión 3: Repetitividad/Impacto (0-10) // Dimension 3: Repetitiveness/Impact (0-10)
repetitivity_score: number; repetitivity_score: number;
// Datos auxiliares // Auxiliary data
aht_mean: number; aht_mean: number;
total_cost: number; total_cost: number;
} }
/** /**
* Paso 3: Transformar Métricas Base a Dimensiones * Step 3: Transform Base Metrics to Dimensions
* Aplica las fórmulas de normalización para obtener scores 0-10 * Applies normalization formulas to obtain 0-10 scores
*/ */
export function transformToDimensions( export function transformToDimensions(
baseMetrics: SkillBaseMetrics[] baseMetrics: SkillBaseMetrics[]
): SkillDimensions[] { ): SkillDimensions[] {
return baseMetrics.map(metric => { return baseMetrics.map(metric => {
// Dimensión 1: Predictibilidad (Proxy: Variabilidad del AHT) // Dimension 1: Predictability (Proxy: AHT Variability)
// CV = desviación estándar / media // CV = standard deviation / mean
const cv = metric.aht_std / metric.aht_mean; const cv = metric.aht_std / metric.aht_mean;
// Normalización: CV <= 0.3 → 10, CV >= 1.5 → 0 // Normalization: CV <= 0.3 → 10, CV >= 1.5 → 0
// Fórmula: MAX(0, MIN(10, 10 - ((CV - 0.3) / 1.2 * 10))) // Formula: MAX(0, MIN(10, 10 - ((CV - 0.3) / 1.2 * 10)))
const predictabilityScore = Math.max(0, Math.min(10, const predictabilityScore = Math.max(0, Math.min(10,
10 - ((cv - 0.3) / 1.2 * 10) 10 - ((cv - 0.3) / 1.2 * 10)
)); ));
// Dimensión 2: Complejidad Inversa (Proxy: Tasa de Transferencia) // Dimension 2: Inverse Complexity (Proxy: Transfer Rate)
// T = tasa de transferencia (%) // T = transfer rate (%)
const transferRate = metric.transfer_rate; const transferRate = metric.transfer_rate;
// Normalización: T <= 5% → 10, T >= 30% → 0 // Normalization: T <= 5% → 10, T >= 30% → 0
// Fórmula: MAX(0, MIN(10, 10 - ((T - 0.05) / 0.25 * 10))) // Formula: MAX(0, MIN(10, 10 - ((T - 0.05) / 0.25 * 10)))
const complexityInverseScore = Math.max(0, Math.min(10, const complexityInverseScore = Math.max(0, Math.min(10,
10 - ((transferRate / 100 - 0.05) / 0.25 * 10) 10 - ((transferRate / 100 - 0.05) / 0.25 * 10)
)); ));
// Dimensión 3: Repetitividad/Impacto (Proxy: Volumen) // Dimension 3: Repetitiveness/Impact (Proxy: Volume)
// Normalización fija: > 5,000 llamadas/mes = 10, < 100 = 0 // Fixed normalization: > 5,000 calls/month = 10, < 100 = 0
let repetitivityScore: number; let repetitivityScore: number;
if (metric.volume >= 5000) { if (metric.volume >= 5000) {
repetitivityScore = 10; repetitivityScore = 10;
} else if (metric.volume <= 100) { } else if (metric.volume <= 100) {
repetitivityScore = 0; repetitivityScore = 0;
} else { } else {
// Interpolación lineal entre 100 y 5000 // Linear interpolation between 100 and 5000
repetitivityScore = ((metric.volume - 100) / (5000 - 100)) * 10; repetitivityScore = ((metric.volume - 100) / (5000 - 100)) * 10;
} }
return { return {
skill: metric.skill, skill: metric.skill,
volume: metric.volume, volume: metric.volume,
predictability_score: Math.round(predictabilityScore * 10) / 10, // 1 decimal predictability_score: Math.round(predictabilityScore * 10) / 10, // 1 decimal place
predictability_cv: Math.round(cv * 100) / 100, // 2 decimales predictability_cv: Math.round(cv * 100) / 100, // 2 decimal places
complexity_inverse_score: Math.round(complexityInverseScore * 10) / 10, complexity_inverse_score: Math.round(complexityInverseScore * 10) / 10,
complexity_transfer_rate: Math.round(transferRate * 10) / 10, complexity_transfer_rate: Math.round(transferRate * 10) / 10,
repetitivity_score: Math.round(repetitivityScore * 10) / 10, repetitivity_score: Math.round(repetitivityScore * 10) / 10,
@@ -190,7 +190,7 @@ export function transformToDimensions(
} }
/** /**
* Resultado final con Agentic Readiness Score * Final result with Agentic Readiness Score
*/ */
export interface SkillAgenticReadiness extends SkillDimensions { export interface SkillAgenticReadiness extends SkillDimensions {
agentic_readiness_score: number; // 0-10 agentic_readiness_score: number; // 0-10
@@ -199,28 +199,28 @@ export interface SkillAgenticReadiness extends SkillDimensions {
} }
/** /**
* Paso 4: Calcular Agentic Readiness Score * Step 4: Calculate Agentic Readiness Score
* Promedio ponderado de las 3 dimensiones * Weighted average of the 3 dimensions
*/ */
export function calculateAgenticReadinessScore( export function calculateAgenticReadinessScore(
dimensions: SkillDimensions[], dimensions: SkillDimensions[],
weights?: { predictability: number; complexity: number; repetitivity: number } weights?: { predictability: number; complexity: number; repetitivity: number }
): SkillAgenticReadiness[] { ): SkillAgenticReadiness[] {
// Pesos por defecto (ajustables) // Default weights (adjustable)
const w = weights || { const w = weights || {
predictability: 0.40, // 40% - Más importante predictability: 0.40, // 40% - Most important
complexity: 0.35, // 35% complexity: 0.35, // 35%
repetitivity: 0.25 // 25% repetitivity: 0.25 // 25%
}; };
return dimensions.map(dim => { return dimensions.map(dim => {
// Promedio ponderado // Weighted average
const score = const score =
dim.predictability_score * w.predictability + dim.predictability_score * w.predictability +
dim.complexity_inverse_score * w.complexity + dim.complexity_inverse_score * w.complexity +
dim.repetitivity_score * w.repetitivity; dim.repetitivity_score * w.repetitivity;
// Categorizar // Categorize
let category: 'automate_now' | 'assist_copilot' | 'optimize_first'; let category: 'automate_now' | 'assist_copilot' | 'optimize_first';
let label: string; let label: string;
@@ -245,29 +245,29 @@ export function calculateAgenticReadinessScore(
} }
/** /**
* Pipeline completo: Raw Data → Agentic Readiness Score * Complete pipeline: Raw Data → Agentic Readiness Score
*/ */
export function transformRawDataToAgenticReadiness( export function transformRawDataToAgenticReadiness(
rawInteractions: RawInteraction[], rawInteractions: RawInteraction[],
costPerHour: number, costPerHour: number,
weights?: { predictability: number; complexity: number; repetitivity: number } weights?: { predictability: number; complexity: number; repetitivity: number }
): SkillAgenticReadiness[] { ): SkillAgenticReadiness[] {
console.log(`🚀 Iniciando pipeline de transformación con ${rawInteractions.length} interacciones...`); console.log(`🚀 Starting transformation pipeline with ${rawInteractions.length} interactions...`);
// Paso 1: Limpieza de ruido // Step 1: Noise cleanup
const cleanedData = cleanNoiseFromData(rawInteractions); const cleanedData = cleanNoiseFromData(rawInteractions);
// Paso 2: Calcular métricas base // Step 2: Calculate base metrics
const baseMetrics = calculateSkillBaseMetrics(cleanedData, costPerHour); const baseMetrics = calculateSkillBaseMetrics(cleanedData, costPerHour);
// Paso 3: Transformar a dimensiones // Step 3: Transform to dimensions
const dimensions = transformToDimensions(baseMetrics); const dimensions = transformToDimensions(baseMetrics);
// Paso 4: Calcular Agentic Readiness Score // Step 4: Calculate Agentic Readiness Score
const agenticReadiness = calculateAgenticReadinessScore(dimensions, weights); const agenticReadiness = calculateAgenticReadinessScore(dimensions, weights);
console.log(`✅ Pipeline completado: ${agenticReadiness.length} skills procesados`); console.log(`✅ Pipeline completed: ${agenticReadiness.length} skills processed`);
console.log(`📈 Distribución:`); console.log(`📈 Distribution:`);
const automateCount = agenticReadiness.filter(s => s.readiness_category === 'automate_now').length; const automateCount = agenticReadiness.filter(s => s.readiness_category === 'automate_now').length;
const assistCount = agenticReadiness.filter(s => s.readiness_category === 'assist_copilot').length; const assistCount = agenticReadiness.filter(s => s.readiness_category === 'assist_copilot').length;
const optimizeCount = agenticReadiness.filter(s => s.readiness_category === 'optimize_first').length; const optimizeCount = agenticReadiness.filter(s => s.readiness_category === 'optimize_first').length;
@@ -279,7 +279,7 @@ export function transformRawDataToAgenticReadiness(
} }
/** /**
* Utilidad: Generar resumen de estasticas * Utility: Generate statistics summary
*/ */
export function generateTransformationSummary( export function generateTransformationSummary(
originalCount: number, originalCount: number,
@@ -300,11 +300,11 @@ export function generateTransformationSummary(
const optimizePercent = skillsCount > 0 ? ((optimizeCount/skillsCount)*100).toFixed(0) : '0'; const optimizePercent = skillsCount > 0 ? ((optimizeCount/skillsCount)*100).toFixed(0) : '0';
return ` return `
📊 Resumen de Transformación: 📊 Transformation Summary:
Interacciones originales: ${originalCount.toLocaleString()} Original interactions: ${originalCount.toLocaleString()}
Ruido eliminado: ${removedCount.toLocaleString()} (${removedPercentage}%) Noise removed: ${removedCount.toLocaleString()} (${removedPercentage}%)
Interacciones limpias: ${cleanedCount.toLocaleString()} Clean interactions: ${cleanedCount.toLocaleString()}
Skills únicos: ${skillsCount} Unique skills: ${skillsCount}
🎯 Agentic Readiness: 🎯 Agentic Readiness:
• 🟢 Automate Now: ${automateCount} skills (${automatePercent}%) • 🟢 Automate Now: ${automateCount} skills (${automatePercent}%)

View File

@@ -1,5 +1,5 @@
// utils/segmentClassifier.ts // utils/segmentClassifier.ts
// Utilidad para clasificar colas/skills en segmentos de cliente // Utility to classify queues/skills into customer segments
import type { CustomerSegment, RawInteraction, StaticConfig } from '../types'; import type { CustomerSegment, RawInteraction, StaticConfig } from '../types';
@@ -10,8 +10,8 @@ export interface SegmentMapping {
} }
/** /**
* Parsea string de colas separadas por comas * Parses queue string separated by commas
* Ejemplo: "VIP, Premium, Enterprise" → ["VIP", "Premium", "Enterprise"] * Example: "VIP, Premium, Enterprise" → ["VIP", "Premium", "Enterprise"]
*/ */
export function parseQueueList(input: string): string[] { export function parseQueueList(input: string): string[] {
if (!input || input.trim().length === 0) { if (!input || input.trim().length === 0) {
@@ -25,13 +25,13 @@ export function parseQueueList(input: string): string[] {
} }
/** /**
* Clasifica una cola según el mapeo proporcionado * Classifies a queue according to the provided mapping
* Usa matching parcial y case-insensitive * Uses partial and case-insensitive matching
* *
* Ejemplo: * Example:
* - queue: "VIP_Support" + mapping.high: ["VIP"] → "high" * - queue: "VIP_Support" + mapping.high: ["VIP"] → "high"
* - queue: "Soporte_General_N1" + mapping.medium: ["Soporte_General"] → "medium" * - queue: "General_Support_L1" + mapping.medium: ["General_Support"] → "medium"
* - queue: "Retencion" (no match) → "medium" (default) * - queue: "Retention" (no match) → "medium" (default)
*/ */
export function classifyQueue( export function classifyQueue(
queue: string, queue: string,
@@ -39,7 +39,7 @@ export function classifyQueue(
): CustomerSegment { ): CustomerSegment {
const normalizedQueue = queue.toLowerCase().trim(); const normalizedQueue = queue.toLowerCase().trim();
// Buscar en high value // Search in high value
for (const highQueue of mapping.high_value_queues) { for (const highQueue of mapping.high_value_queues) {
const normalizedHigh = highQueue.toLowerCase().trim(); const normalizedHigh = highQueue.toLowerCase().trim();
if (normalizedQueue.includes(normalizedHigh) || normalizedHigh.includes(normalizedQueue)) { if (normalizedQueue.includes(normalizedHigh) || normalizedHigh.includes(normalizedQueue)) {
@@ -47,7 +47,7 @@ export function classifyQueue(
} }
} }
// Buscar en low value // Search in low value
for (const lowQueue of mapping.low_value_queues) { for (const lowQueue of mapping.low_value_queues) {
const normalizedLow = lowQueue.toLowerCase().trim(); const normalizedLow = lowQueue.toLowerCase().trim();
if (normalizedQueue.includes(normalizedLow) || normalizedLow.includes(normalizedQueue)) { if (normalizedQueue.includes(normalizedLow) || normalizedLow.includes(normalizedQueue)) {
@@ -55,7 +55,7 @@ export function classifyQueue(
} }
} }
// Buscar en medium value (explícito) // Search in medium value (explicit)
for (const mediumQueue of mapping.medium_value_queues) { for (const mediumQueue of mapping.medium_value_queues) {
const normalizedMedium = mediumQueue.toLowerCase().trim(); const normalizedMedium = mediumQueue.toLowerCase().trim();
if (normalizedQueue.includes(normalizedMedium) || normalizedMedium.includes(normalizedQueue)) { if (normalizedQueue.includes(normalizedMedium) || normalizedMedium.includes(normalizedQueue)) {
@@ -63,13 +63,13 @@ export function classifyQueue(
} }
} }
// Default: medium (para colas no mapeadas) // Default: medium (for unmapped queues)
return 'medium'; return 'medium';
} }
/** /**
* Clasifica todas las colas únicas de un conjunto de interacciones * Classifies all unique queues from a set of interactions
* Retorna un mapa de cola → segmento * Returns a map of queue → segment
*/ */
export function classifyAllQueues( export function classifyAllQueues(
interactions: RawInteraction[], interactions: RawInteraction[],
@@ -77,10 +77,10 @@ export function classifyAllQueues(
): Map<string, CustomerSegment> { ): Map<string, CustomerSegment> {
const queueSegments = new Map<string, CustomerSegment>(); const queueSegments = new Map<string, CustomerSegment>();
// Obtener colas únicas // Get unique queues
const uniqueQueues = [...new Set(interactions.map(i => i.queue_skill))]; const uniqueQueues = [...new Set(interactions.map(i => i.queue_skill))];
// Clasificar cada cola // Classify each queue
uniqueQueues.forEach(queue => { uniqueQueues.forEach(queue => {
queueSegments.set(queue, classifyQueue(queue, mapping)); queueSegments.set(queue, classifyQueue(queue, mapping));
}); });
@@ -89,8 +89,8 @@ export function classifyAllQueues(
} }
/** /**
* Genera estadísticas de segmentación * Generates segmentation statistics
* Retorna conteo, porcentaje y lista de colas por segmento * Returns count, percentage and list of queues by segment
*/ */
export function getSegmentationStats( export function getSegmentationStats(
interactions: RawInteraction[], interactions: RawInteraction[],
@@ -108,13 +108,13 @@ export function getSegmentationStats(
total: interactions.length total: interactions.length
}; };
// Contar interacciones por segmento // Count interactions by segment
interactions.forEach(interaction => { interactions.forEach(interaction => {
const segment = queueSegments.get(interaction.queue_skill) || 'medium'; const segment = queueSegments.get(interaction.queue_skill) || 'medium';
stats[segment].count++; stats[segment].count++;
}); });
// Calcular porcentajes // Calculate percentages
const total = interactions.length; const total = interactions.length;
if (total > 0) { if (total > 0) {
stats.high.percentage = Math.round((stats.high.count / total) * 100); stats.high.percentage = Math.round((stats.high.count / total) * 100);
@@ -122,7 +122,7 @@ export function getSegmentationStats(
stats.low.percentage = Math.round((stats.low.count / total) * 100); stats.low.percentage = Math.round((stats.low.count / total) * 100);
} }
// Obtener colas por segmento (únicas) // Get queues by segment (unique)
queueSegments.forEach((segment, queue) => { queueSegments.forEach((segment, queue) => {
if (!stats[segment].queues.includes(queue)) { if (!stats[segment].queues.includes(queue)) {
stats[segment].queues.push(queue); stats[segment].queues.push(queue);
@@ -133,7 +133,7 @@ export function getSegmentationStats(
} }
/** /**
* Valida que el mapeo tenga al menos una cola en algún segmento * Validates that the mapping has at least one queue in some segment
*/ */
export function isValidMapping(mapping: SegmentMapping): boolean { export function isValidMapping(mapping: SegmentMapping): boolean {
return ( return (
@@ -144,8 +144,8 @@ export function isValidMapping(mapping: SegmentMapping): boolean {
} }
/** /**
* Crea un mapeo desde StaticConfig * Creates a mapping from StaticConfig
* Si no hay segment_mapping, retorna mapeo vacío * If there is no segment_mapping, returns empty mapping
*/ */
export function getMappingFromConfig(config: StaticConfig): SegmentMapping | null { export function getMappingFromConfig(config: StaticConfig): SegmentMapping | null {
if (!config.segment_mapping) { if (!config.segment_mapping) {
@@ -160,8 +160,8 @@ export function getMappingFromConfig(config: StaticConfig): SegmentMapping | nul
} }
/** /**
* Obtiene el segmento para una cola específica desde el config * Gets the segment for a specific queue from the config
* Si no hay mapeo, retorna 'medium' por defecto * If there is no mapping, returns 'medium' by default
*/ */
export function getSegmentForQueue( export function getSegmentForQueue(
queue: string, queue: string,
@@ -177,7 +177,7 @@ export function getSegmentForQueue(
} }
/** /**
* Formatea estasticas para mostrar en UI * Formats statistics for display in UI
*/ */
export function formatSegmentationSummary( export function formatSegmentationSummary(
stats: ReturnType<typeof getSegmentationStats> stats: ReturnType<typeof getSegmentationStats>
@@ -185,15 +185,15 @@ export function formatSegmentationSummary(
const parts: string[] = []; const parts: string[] = [];
if (stats.high.count > 0) { if (stats.high.count > 0) {
parts.push(`${stats.high.percentage}% High Value (${stats.high.count} interacciones)`); parts.push(`${stats.high.percentage}% High Value (${stats.high.count} interactions)`);
} }
if (stats.medium.count > 0) { if (stats.medium.count > 0) {
parts.push(`${stats.medium.percentage}% Medium Value (${stats.medium.count} interacciones)`); parts.push(`${stats.medium.percentage}% Medium Value (${stats.medium.count} interactions)`);
} }
if (stats.low.count > 0) { if (stats.low.count > 0) {
parts.push(`${stats.low.percentage}% Low Value (${stats.low.count} interacciones)`); parts.push(`${stats.low.percentage}% Low Value (${stats.low.count} interactions)`);
} }
return parts.join(' | '); return parts.join(' | ');