diff --git a/backend/beyond_metrics/dimensions/EconomyCost.py b/backend/beyond_metrics/dimensions/EconomyCost.py index 09261f0..f53f31a 100644 --- a/backend/beyond_metrics/dimensions/EconomyCost.py +++ b/backend/beyond_metrics/dimensions/EconomyCost.py @@ -23,17 +23,16 @@ REQUIRED_COLUMNS_ECON: List[str] = [ @dataclass class EconomyConfig: """ - Parámetros manuales para la dimensión de Economía y Costes. + Manual parameters for the Economy and Cost dimension. - - labor_cost_per_hour: coste total/hora de un agente (fully loaded). - - overhead_rate: % overhead variable (ej. 0.1 = 10% sobre labor). - - tech_costs_annual: coste anual de tecnología (licencias, infra, ...). - - automation_cpi: coste por interacción automatizada (ej. 0.15€). - - automation_volume_share: % del volumen automatizable (0-1). - - automation_success_rate: % éxito de la automatización (0-1). + - labor_cost_per_hour: total cost/hour of an agent (fully loaded). + - overhead_rate: % variable overhead (e.g. 0.1 = 10% over labor). + - tech_costs_annual: annual technology cost (licenses, infrastructure, ...). + - automation_cpi: cost per automated interaction (e.g. 0.15€). + - automation_volume_share: % of automatable volume (0-1). + - automation_success_rate: % automation success (0-1). - - customer_segments: mapping opcional skill -> segmento ("high"/"medium"/"low") - para futuros insights de ROI por segmento. + - customer_segments: optional mapping skill -> segment ("high"/"medium"/"low") for future ROI insights by segment. """ labor_cost_per_hour: float @@ -48,20 +47,20 @@ class EconomyConfig: @dataclass class EconomyCostMetrics: """ - DIMENSIÓN 4: ECONOMÍA y COSTES + DIMENSION 4: ECONOMY and COSTS - Propósito: - - Cuantificar el COSTE actual (CPI, coste anual). - - Estimar el impacto de overhead y tecnología. - - Calcular un primer estimado de "coste de ineficiencia" y ahorro potencial. + Purpose: + - Quantify the current COST (CPI, annual cost). + - Estimate the impact of overhead and technology. + - Calculate an initial estimate of "inefficiency cost" and potential savings. - Requiere: - - Columnas del dataset transaccional (ver REQUIRED_COLUMNS_ECON). + Requires: + - Columns from the transactional dataset (see REQUIRED_COLUMNS_ECON). - Inputs opcionales vía EconomyConfig: - - labor_cost_per_hour (obligatorio para cualquier cálculo de €). + Optional inputs via EconomyConfig: + - labor_cost_per_hour (required for any € calculation). - overhead_rate, tech_costs_annual, automation_*. - - customer_segments (para insights de ROI por segmento). + - customer_segments (for ROI insights by segment). """ df: pd.DataFrame @@ -72,13 +71,13 @@ class EconomyCostMetrics: self._prepare_data() # ------------------------------------------------------------------ # - # Helpers internos + # Internal helpers # ------------------------------------------------------------------ # def _validate_columns(self) -> None: missing = [c for c in REQUIRED_COLUMNS_ECON if c not in self.df.columns] if missing: raise ValueError( - f"Faltan columnas obligatorias para EconomyCostMetrics: {missing}" + f"Missing required columns for EconomyCostMetrics: {missing}" ) def _prepare_data(self) -> None: @@ -97,15 +96,15 @@ class EconomyCostMetrics: df["duration_talk"].fillna(0) + df["hold_time"].fillna(0) + df["wrap_up_time"].fillna(0) - ) # segundos + ) # seconds - # Filtrar por record_status para cálculos de AHT/CPI - # Solo incluir registros VALID (excluir NOISE, ZOMBIE, ABANDON) + # Filter by record_status for AHT/CPI calculations + # Only include VALID records (exclude NOISE, ZOMBIE, ABANDON) if "record_status" in df.columns: df["record_status"] = df["record_status"].astype(str).str.strip().str.upper() df["_is_valid_for_cost"] = df["record_status"] == "VALID" else: - # Legacy data sin record_status: incluir todo + # Legacy data without record_status: include all df["_is_valid_for_cost"] = True self.df = df @@ -118,11 +117,11 @@ class EconomyCostMetrics: return self.config is not None and self.config.labor_cost_per_hour is not None # ------------------------------------------------------------------ # - # KPI 1: CPI por canal/skill + # KPI 1: CPI by channel/skill # ------------------------------------------------------------------ # def cpi_by_skill_channel(self) -> pd.DataFrame: """ - CPI (Coste Por Interacción) por skill/canal. + CPI (Cost Per Interaction) by skill/channel. CPI = (Labor_cost_per_interaction + Overhead_variable) / EFFECTIVE_PRODUCTIVITY @@ -130,19 +129,17 @@ class EconomyCostMetrics: - Overhead_variable = overhead_rate * Labor_cost_per_interaction - EFFECTIVE_PRODUCTIVITY = 0.70 (70% - accounts for non-productive time) - Excluye registros abandonados del cálculo de costes para consistencia - con el path del frontend (fresh CSV). + Excludes abandoned records from cost calculation for consistency with the frontend path (fresh CSV). - Si no hay config de costes -> devuelve DataFrame vacío. + If there is no cost config -> returns empty DataFrame. - Incluye queue_skill y channel como columnas (no solo índice) para que - el frontend pueda hacer lookup por nombre de skill. + Includes queue_skill and channel as columns (not just index) so that the frontend can lookup by skill name. """ if not self._has_cost_config(): return pd.DataFrame() cfg = self.config - assert cfg is not None # para el type checker + assert cfg is not None # for the type checker df = self.df.copy() if df.empty: @@ -154,15 +151,15 @@ class EconomyCostMetrics: else: df_cost = df - # Filtrar por record_status: solo VALID para cálculo de AHT - # Excluye NOISE, ZOMBIE, ABANDON + # Filter by record_status: only VALID for AHT calculation + # Excludes NOISE, ZOMBIE, ABANDON if "_is_valid_for_cost" in df_cost.columns: df_cost = df_cost[df_cost["_is_valid_for_cost"] == True] if df_cost.empty: return pd.DataFrame() - # AHT por skill/canal (en segundos) - solo registros VALID + # AHT by skill/channel (in seconds) - only VALID records grouped = df_cost.groupby(["queue_skill", "channel"])["handle_time"].mean() if grouped.empty: @@ -193,17 +190,16 @@ class EconomyCostMetrics: return out.sort_index().reset_index() # ------------------------------------------------------------------ # - # KPI 2: coste anual por skill/canal + # KPI 2: annual cost by skill/channel # ------------------------------------------------------------------ # def annual_cost_by_skill_channel(self) -> pd.DataFrame: """ - Coste anual por skill/canal. + Annual cost by skill/channel. - cost_annual = CPI * volumen (cantidad de interacciones de la muestra). + cost_annual = CPI * volume (number of interactions in the sample). - Nota: por simplicidad asumimos que el dataset refleja un periodo anual. - Si en el futuro quieres anualizar (ej. dataset = 1 mes) se puede añadir - un factor de escalado en EconomyConfig. + Note: for simplicity we assume the dataset reflects an annual period. + If in the future you want to annualize (e.g. dataset = 1 month) you can add a scaling factor in EconomyConfig. """ cpi_table = self.cpi_by_skill_channel() if cpi_table.empty: @@ -224,18 +220,18 @@ class EconomyCostMetrics: return joined # ------------------------------------------------------------------ # - # KPI 3: desglose de costes (labor / tech / overhead) + # KPI 3: cost breakdown (labor / tech / overhead) # ------------------------------------------------------------------ # def cost_breakdown(self) -> Dict[str, float]: """ - Desglose % de costes: labor, overhead, tech. + Cost breakdown %: labor, overhead, tech. labor_total = sum(labor_cost_per_interaction) overhead_total = labor_total * overhead_rate - tech_total = tech_costs_annual (si se ha proporcionado) + tech_total = tech_costs_annual (if provided) - Devuelve porcentajes sobre el total. - Si falta configuración de coste -> devuelve {}. + Returns percentages of the total. + If cost configuration is missing -> returns {}. """ if not self._has_cost_config(): return {} @@ -258,7 +254,7 @@ class EconomyCostMetrics: cpi_indexed = cpi_table.set_index(["queue_skill", "channel"]) joined = cpi_indexed.join(volume, how="left").fillna({"volume": 0}) - # Costes anuales de labor y overhead + # Annual labor and overhead costs annual_labor = (joined["labor_cost"] * joined["volume"]).sum() annual_overhead = (joined["overhead_cost"] * joined["volume"]).sum() annual_tech = cfg.tech_costs_annual @@ -278,21 +274,21 @@ class EconomyCostMetrics: } # ------------------------------------------------------------------ # - # KPI 4: coste de ineficiencia (€ por variabilidad/escalación) + # KPI 4: inefficiency cost (€ by variability/escalation) # ------------------------------------------------------------------ # def inefficiency_cost_by_skill_channel(self) -> pd.DataFrame: """ - Estimación muy simplificada de coste de ineficiencia: + Very simplified estimate of inefficiency cost: - Para cada skill/canal: + For each skill/channel: - - AHT_p50, AHT_p90 (segundos). + - AHT_p50, AHT_p90 (seconds). - Delta = max(0, AHT_p90 - AHT_p50). - - Se asume que ~40% de las interacciones están por encima de la mediana. + - Assumes that ~40% of interactions are above the median. - Ineff_seconds = Delta * volume * 0.4 - Ineff_cost = LaborCPI_per_second * Ineff_seconds - NOTA: Es un modelo aproximado para cuantificar "orden de magnitud". + NOTE: This is an approximate model to quantify "order of magnitude". """ if not self._has_cost_config(): return pd.DataFrame() @@ -302,8 +298,8 @@ class EconomyCostMetrics: df = self.df.copy() - # Filtrar por record_status: solo VALID para cálculo de AHT - # Excluye NOISE, ZOMBIE, ABANDON + # Filter by record_status: only VALID for AHT calculation + # Excludes NOISE, ZOMBIE, ABANDON if "_is_valid_for_cost" in df.columns: df = df[df["_is_valid_for_cost"] == True] @@ -318,7 +314,7 @@ class EconomyCostMetrics: if stats.empty: return pd.DataFrame() - # CPI para obtener coste/segundo de labor + # CPI to get cost/second of labor # cpi_by_skill_channel now returns with reset_index, so we need to set index for join cpi_table_raw = self.cpi_by_skill_channel() if cpi_table_raw.empty: @@ -331,11 +327,11 @@ class EconomyCostMetrics: merged = merged.fillna(0.0) delta = (merged["aht_p90"] - merged["aht_p50"]).clip(lower=0.0) - affected_fraction = 0.4 # aproximación + affected_fraction = 0.4 # approximation ineff_seconds = delta * merged["volume"] * affected_fraction - # labor_cost = coste por interacción con AHT medio; - # aproximamos coste/segundo como labor_cost / AHT_medio + # labor_cost = cost per interaction with average AHT; + # approximate cost/second as labor_cost / average_AHT aht_mean = grouped["handle_time"].mean() merged["aht_mean"] = aht_mean @@ -351,21 +347,21 @@ class EconomyCostMetrics: return merged[["aht_p50", "aht_p90", "volume", "ineff_seconds", "ineff_cost"]].reset_index() # ------------------------------------------------------------------ # - # KPI 5: ahorro potencial anual por automatización + # KPI 5: potential annual savings from automation # ------------------------------------------------------------------ # def potential_savings(self) -> Dict[str, Any]: """ - Ahorro potencial anual basado en: + Potential annual savings based on: - Ahorro = (CPI_humano - CPI_automatizado) * Volumen_automatizable * Tasa_éxito + Savings = (Human_CPI - Automated_CPI) * Automatable_volume * Success_rate - Donde: - - CPI_humano = media ponderada de cpi_total. - - CPI_automatizado = config.automation_cpi - - Volumen_automatizable = volume_total * automation_volume_share - - Tasa_éxito = automation_success_rate + Where: + - Human_CPI = weighted average of cpi_total. + - Automated_CPI = config.automation_cpi + - Automatable_volume = volume_total * automation_volume_share + - Success_rate = automation_success_rate - Si faltan parámetros en config -> devuelve {}. + If config parameters are missing -> returns {}. """ if not self._has_cost_config(): return {} @@ -384,7 +380,7 @@ class EconomyCostMetrics: if total_volume <= 0: return {} - # CPI humano medio ponderado + # Weighted average human CPI weighted_cpi = ( (cpi_table["cpi_total"] * cpi_table["volume"]).sum() / total_volume ) @@ -409,12 +405,12 @@ class EconomyCostMetrics: # ------------------------------------------------------------------ # def plot_cost_waterfall(self) -> Axes: """ - Waterfall de costes anuales (labor + tech + overhead). + Waterfall of annual costs (labor + tech + overhead). """ breakdown = self.cost_breakdown() if not breakdown: fig, ax = plt.subplots() - ax.text(0.5, 0.5, "Sin configuración de costes", ha="center", va="center") + ax.text(0.5, 0.5, "No cost configuration", ha="center", va="center") ax.set_axis_off() return ax @@ -436,14 +432,14 @@ class EconomyCostMetrics: bottoms.append(running) running += v - # barras estilo waterfall + # waterfall style bars x = np.arange(len(labels)) ax.bar(x, values) ax.set_xticks(x) ax.set_xticklabels(labels) - ax.set_ylabel("€ anuales") - ax.set_title("Desglose anual de costes") + ax.set_ylabel("€ annual") + ax.set_title("Annual cost breakdown") for idx, v in enumerate(values): ax.text(idx, v, f"{v:,.0f}", ha="center", va="bottom") @@ -454,12 +450,12 @@ class EconomyCostMetrics: def plot_cpi_by_channel(self) -> Axes: """ - Gráfico de barras de CPI medio por canal. + Bar chart of average CPI by channel. """ cpi_table = self.cpi_by_skill_channel() if cpi_table.empty: fig, ax = plt.subplots() - ax.text(0.5, 0.5, "Sin configuración de costes", ha="center", va="center") + ax.text(0.5, 0.5, "No cost configuration", ha="center", va="center") ax.set_axis_off() return ax @@ -474,7 +470,7 @@ class EconomyCostMetrics: cpi_indexed = cpi_table.set_index(["queue_skill", "channel"]) joined = cpi_indexed.join(volume, how="left").fillna({"volume": 0}) - # CPI medio ponderado por canal + # Weighted average CPI by channel per_channel = ( joined.reset_index() .groupby("channel") @@ -486,9 +482,9 @@ class EconomyCostMetrics: fig, ax = plt.subplots(figsize=(6, 4)) per_channel.plot(kind="bar", ax=ax) - ax.set_xlabel("Canal") - ax.set_ylabel("CPI medio (€)") - ax.set_title("Coste por interacción (CPI) por canal") + ax.set_xlabel("Channel") + ax.set_ylabel("Average CPI (€)") + ax.set_title("Cost per interaction (CPI) by channel") ax.grid(axis="y", alpha=0.3) return ax diff --git a/backend/beyond_metrics/dimensions/OperationalPerformance.py b/backend/beyond_metrics/dimensions/OperationalPerformance.py index db0a2e9..d99d455 100644 --- a/backend/beyond_metrics/dimensions/OperationalPerformance.py +++ b/backend/beyond_metrics/dimensions/OperationalPerformance.py @@ -25,32 +25,31 @@ REQUIRED_COLUMNS_OP: List[str] = [ @dataclass class OperationalPerformanceMetrics: """ - Dimensión: RENDIMIENTO OPERACIONAL Y DE SERVICIO + Dimension: OPERATIONAL PERFORMANCE AND SERVICE - Propósito: medir el balance entre rapidez (eficiencia) y calidad de resolución, - más la variabilidad del servicio. + Purpose: measure the balance between speed (efficiency) and resolution quality, plus service variability. - Requiere como mínimo: + Requires at minimum: - interaction_id - datetime_start - queue_skill - channel - - duration_talk (segundos) - - hold_time (segundos) - - wrap_up_time (segundos) + - duration_talk (seconds) + - hold_time (seconds) + - wrap_up_time (seconds) - agent_id - transfer_flag (bool/int) - Columnas opcionales: - - is_resolved (bool/int) -> para FCR - - abandoned_flag (bool/int) -> para tasa de abandono - - customer_id / caller_id -> para reincidencia y repetición de canal - - logged_time (segundos) -> para occupancy_rate + Optional columns: + - is_resolved (bool/int) -> for FCR + - abandoned_flag (bool/int) -> for abandonment rate + - customer_id / caller_id -> for recurrence and channel repetition + - logged_time (seconds) -> for occupancy_rate """ df: pd.DataFrame - # Benchmarks / parámetros de normalización (puedes ajustarlos) + # Benchmarks / normalization parameters (you can adjust them) AHT_GOOD: float = 300.0 # 5 min AHT_BAD: float = 900.0 # 15 min VAR_RATIO_GOOD: float = 1.2 # P90/P50 ~1.2 muy estable @@ -61,19 +60,19 @@ class OperationalPerformanceMetrics: self._prepare_data() # ------------------------------------------------------------------ # - # Helpers internos + # Internal helpers # ------------------------------------------------------------------ # def _validate_columns(self) -> None: missing = [c for c in REQUIRED_COLUMNS_OP if c not in self.df.columns] if missing: raise ValueError( - f"Faltan columnas obligatorias para OperationalPerformanceMetrics: {missing}" + f"Missing required columns for OperationalPerformanceMetrics: {missing}" ) def _prepare_data(self) -> None: df = self.df.copy() - # Tipos + # Types df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce") for col in ["duration_talk", "hold_time", "wrap_up_time"]: @@ -86,13 +85,13 @@ class OperationalPerformanceMetrics: + df["wrap_up_time"].fillna(0) ) - # v3.0: Filtrar NOISE y ZOMBIE para cálculos de variabilidad + # v3.0: Filter NOISE and ZOMBIE for variability calculations # record_status: 'VALID', 'NOISE', 'ZOMBIE', 'ABANDON' - # Para AHT/CV solo usamos 'VALID' (excluye noise, zombie, abandon) + # For AHT/CV we only use 'VALID' (excludes noise, zombie, abandon) if "record_status" in df.columns: df["record_status"] = df["record_status"].astype(str).str.strip().str.upper() - # Crear máscara para registros válidos: SOLO "VALID" - # Excluye explícitamente NOISE, ZOMBIE, ABANDON y cualquier otro valor + # Create mask for valid records: ONLY "VALID" + # Explicitly excludes NOISE, ZOMBIE, ABANDON and any other value df["_is_valid_for_cv"] = df["record_status"] == "VALID" # Log record_status breakdown for debugging @@ -104,21 +103,21 @@ class OperationalPerformanceMetrics: print(f" - {status}: {count}") print(f" VALID rows for AHT calculation: {valid_count}") else: - # Legacy data sin record_status: incluir todo + # Legacy data without record_status: include all df["_is_valid_for_cv"] = True print(f"[OperationalPerformance] No record_status column - using all {len(df)} rows") - # Normalización básica + # Basic normalization df["queue_skill"] = df["queue_skill"].astype(str).str.strip() df["channel"] = df["channel"].astype(str).str.strip() df["agent_id"] = df["agent_id"].astype(str).str.strip() - # Flags opcionales convertidos a bool cuando existan + # Optional flags converted to bool when they exist for flag_col in ["is_resolved", "abandoned_flag", "transfer_flag"]: if flag_col in df.columns: df[flag_col] = df[flag_col].astype(int).astype(bool) - # customer_id: usamos customer_id si existe, si no caller_id + # customer_id: we use customer_id if it exists, otherwise caller_id if "customer_id" in df.columns: df["customer_id"] = df["customer_id"].astype(str) elif "caller_id" in df.columns: @@ -126,8 +125,8 @@ class OperationalPerformanceMetrics: else: df["customer_id"] = None - # logged_time opcional - # Normalizamos logged_time: siempre será una serie float con NaN si no existe + # logged_time optional + # Normalize logged_time: will always be a float series with NaN if it does not exist df["logged_time"] = pd.to_numeric(df.get("logged_time", np.nan), errors="coerce") @@ -138,16 +137,16 @@ class OperationalPerformanceMetrics: return self.df.empty # ------------------------------------------------------------------ # - # AHT y variabilidad + # AHT and variability # ------------------------------------------------------------------ # def aht_distribution(self) -> Dict[str, float]: """ - Devuelve P10, P50, P90 del AHT y el ratio P90/P50 como medida de variabilidad. + Returns P10, P50, P90 of AHT and the P90/P50 ratio as a measure of variability. - v3.0: Filtra NOISE y ZOMBIE para el cálculo de variabilidad. - Solo usa registros con record_status='valid' o sin status (legacy). + v3.0: Filters NOISE and ZOMBIE for variability calculation. + Only uses records with record_status='valid' or without status (legacy). """ - # Filtrar solo registros válidos para cálculo de variabilidad + # Filter only valid records for variability calculation df_valid = self.df[self.df["_is_valid_for_cv"] == True] ht = df_valid["handle_time"].dropna().astype(float) if ht.empty: @@ -167,10 +166,9 @@ class OperationalPerformanceMetrics: def talk_hold_acw_p50_by_skill(self) -> pd.DataFrame: """ - P50 de talk_time, hold_time y wrap_up_time por skill. + P50 of talk_time, hold_time and wrap_up_time by skill. - Incluye queue_skill como columna (no solo índice) para que - el frontend pueda hacer lookup por nombre de skill. + Includes queue_skill as a column (not just index) so that the frontend can lookup by skill name. """ df = self.df @@ -192,24 +190,24 @@ class OperationalPerformanceMetrics: return result.round(2).sort_index().reset_index() # ------------------------------------------------------------------ # - # FCR, escalación, abandono, reincidencia, repetición canal + # FCR, escalation, abandonment, recurrence, channel repetition # ------------------------------------------------------------------ # def fcr_rate(self) -> float: """ FCR (First Contact Resolution). - Prioridad 1: Usar fcr_real_flag del CSV si existe - Prioridad 2: Calcular como 100 - escalation_rate + Priority 1: Use fcr_real_flag from CSV if it exists + Priority 2: Calculate as 100 - escalation_rate """ df = self.df total = len(df) if total == 0: return float("nan") - # Prioridad 1: Usar fcr_real_flag si existe + # Priority 1: Use fcr_real_flag if it exists if "fcr_real_flag" in df.columns: col = df["fcr_real_flag"] - # Normalizar a booleano + # Normalize to boolean if col.dtype == "O": fcr_mask = ( col.astype(str) @@ -224,7 +222,7 @@ class OperationalPerformanceMetrics: fcr = (fcr_count / total) * 100.0 return float(max(0.0, min(100.0, round(fcr, 2)))) - # Prioridad 2: Fallback a 100 - escalation_rate + # Priority 2: Fallback to 100 - escalation_rate try: esc = self.escalation_rate() except Exception: @@ -239,7 +237,7 @@ class OperationalPerformanceMetrics: def escalation_rate(self) -> float: """ - % de interacciones que requieren escalación (transfer_flag == True). + % of interactions that require escalation (transfer_flag == True). """ df = self.df total = len(df) @@ -251,17 +249,17 @@ class OperationalPerformanceMetrics: def abandonment_rate(self) -> float: """ - % de interacciones abandonadas. + % of abandoned interactions. - Busca en orden: is_abandoned, abandoned_flag, abandoned - Si ninguna columna existe, devuelve NaN. + Searches in order: is_abandoned, abandoned_flag, abandoned + If no column exists, returns NaN. """ df = self.df total = len(df) if total == 0: return float("nan") - # Buscar columna de abandono en orden de prioridad + # Search for abandonment column in priority order abandon_col = None for col_name in ["is_abandoned", "abandoned_flag", "abandoned"]: if col_name in df.columns: @@ -273,7 +271,7 @@ class OperationalPerformanceMetrics: col = df[abandon_col] - # Normalizar a booleano + # Normalize to boolean if col.dtype == "O": abandon_mask = ( col.astype(str) @@ -289,10 +287,9 @@ class OperationalPerformanceMetrics: def high_hold_time_rate(self, threshold_seconds: float = 60.0) -> float: """ - % de interacciones con hold_time > threshold (por defecto 60s). + % of interactions with hold_time > threshold (default 60s). - Proxy de complejidad: si el agente tuvo que poner en espera al cliente - más de 60 segundos, probablemente tuvo que consultar/investigar. + Complexity proxy: if the agent had to put the customer on hold for more than 60 seconds, they probably had to consult/investigate. """ df = self.df total = len(df) @@ -306,44 +303,43 @@ class OperationalPerformanceMetrics: def recurrence_rate_7d(self) -> float: """ - % de clientes que vuelven a contactar en < 7 días para el MISMO skill. + % of customers who contact again in < 7 days for the SAME skill. - Se basa en customer_id (o caller_id si no hay customer_id) + queue_skill. - Calcula: - - Para cada combinación cliente + skill, ordena por datetime_start - - Si hay dos contactos consecutivos separados < 7 días (mismo cliente, mismo skill), - cuenta como "recurrente" - - Tasa = nº clientes recurrentes / nº total de clientes + Based on customer_id (or caller_id if no customer_id) + queue_skill. + Calculates: + - For each client + skill combination, sorts by datetime_start + - If there are two consecutive contacts separated by < 7 days (same client, same skill), counts as "recurrent" + - Rate = number of recurrent clients / total number of clients - NOTA: Solo cuenta como recurrencia si el cliente llama por el MISMO skill. - Un cliente que llama a "Ventas" y luego a "Soporte" NO es recurrente. + NOTE: Only counts as recurrence if the client calls for the SAME skill. + A client who calls "Sales" and then "Support" is NOT recurrent. """ df = self.df.dropna(subset=["datetime_start"]).copy() - # Normalizar identificador de cliente + # Normalize client identifier if "customer_id" not in df.columns: if "caller_id" in df.columns: df["customer_id"] = df["caller_id"] else: - # No hay identificador de cliente -> no se puede calcular + # No client identifier -> cannot calculate return float("nan") df = df.dropna(subset=["customer_id"]) if df.empty: return float("nan") - # Ordenar por cliente + skill + fecha + # Sort by client + skill + date df = df.sort_values(["customer_id", "queue_skill", "datetime_start"]) - # Diferencia de tiempo entre contactos consecutivos por cliente Y skill - # Esto asegura que solo contamos recontactos del mismo cliente para el mismo skill + # Time difference between consecutive contacts by client AND skill + # This ensures we only count re-contacts from the same client for the same skill df["delta"] = df.groupby(["customer_id", "queue_skill"])["datetime_start"].diff() - # Marcamos los contactos que ocurren a menos de 7 días del anterior (mismo skill) + # Mark contacts that occur less than 7 days from the previous one (same skill) recurrence_mask = df["delta"] < pd.Timedelta(days=7) - # Nº de clientes que tienen al menos un contacto recurrente (para cualquier skill) + # Number of clients who have at least one recurrent contact (for any skill) recurrent_customers = df.loc[recurrence_mask, "customer_id"].nunique() total_customers = df["customer_id"].nunique() @@ -356,9 +352,9 @@ class OperationalPerformanceMetrics: def repeat_channel_rate(self) -> float: """ - % de reincidencias (<7 días) en las que el cliente usa el MISMO canal. + % of recurrences (<7 days) in which the client uses the SAME channel. - Si no hay customer_id/caller_id o solo un contacto por cliente, devuelve NaN. + If there is no customer_id/caller_id or only one contact per client, returns NaN. """ df = self.df.dropna(subset=["datetime_start"]).copy() if df["customer_id"].isna().all(): @@ -387,11 +383,11 @@ class OperationalPerformanceMetrics: # ------------------------------------------------------------------ # def occupancy_rate(self) -> float: """ - Tasa de ocupación: + Occupancy rate: occupancy = sum(handle_time) / sum(logged_time) * 100. - Requiere columna 'logged_time'. Si no existe o es todo 0, devuelve NaN. + Requires 'logged_time' column. If it does not exist or is all 0, returns NaN. """ df = self.df if "logged_time" not in df.columns: @@ -408,23 +404,23 @@ class OperationalPerformanceMetrics: return float(round(occ * 100, 2)) # ------------------------------------------------------------------ # - # Score de rendimiento 0-10 + # Performance score 0-10 # ------------------------------------------------------------------ # def performance_score(self) -> Dict[str, float]: """ - Calcula un score 0-10 combinando: - - AHT (bajo es mejor) - - FCR (alto es mejor) - - Variabilidad (P90/P50, bajo es mejor) - - Otros factores (ocupación / escalación) + Calculates a 0-10 score combining: + - AHT (lower is better) + - FCR (higher is better) + - Variability (P90/P50, lower is better) + - Other factors (occupancy / escalation) - Fórmula: + Formula: score = 0.4 * (10 - AHT_norm) + 0.3 * FCR_norm + 0.2 * (10 - Var_norm) + 0.1 * Otros_score - Donde *_norm son valores en escala 0-10. + Where *_norm are values on a 0-10 scale. """ dist = self.aht_distribution() if not dist: @@ -433,15 +429,15 @@ class OperationalPerformanceMetrics: p50 = dist["p50"] ratio = dist["p90_p50_ratio"] - # AHT_normalized: 0 (mejor) a 10 (peor) + # AHT_normalized: 0 (better) to 10 (worse) aht_norm = self._scale_to_0_10(p50, self.AHT_GOOD, self.AHT_BAD) - # FCR_normalized: 0-10 directamente desde % (0-100) + # FCR_normalized: 0-10 directly from % (0-100) fcr_pct = self.fcr_rate() fcr_norm = fcr_pct / 10.0 if not np.isnan(fcr_pct) else 0.0 - # Variabilidad_normalized: 0 (ratio bueno) a 10 (ratio malo) + # Variability_normalized: 0 (good ratio) to 10 (bad ratio) var_norm = self._scale_to_0_10(ratio, self.VAR_RATIO_GOOD, self.VAR_RATIO_BAD) - # Otros factores: combinamos ocupación (ideal ~80%) y escalación (ideal baja) + # Other factors: combine occupancy (ideal ~80%) and escalation (ideal low) occ = self.occupancy_rate() esc = self.escalation_rate() @@ -467,26 +463,26 @@ class OperationalPerformanceMetrics: def _scale_to_0_10(self, value: float, good: float, bad: float) -> float: """ - Escala linealmente un valor: + Linearly scales a value: - good -> 0 - bad -> 10 - Con saturación fuera de rango. + With saturation outside range. """ if np.isnan(value): - return 5.0 # neutro + return 5.0 # neutral if good == bad: return 5.0 if good < bad: - # Menor es mejor + # Lower is better if value <= good: return 0.0 if value >= bad: return 10.0 return 10.0 * (value - good) / (bad - good) else: - # Mayor es mejor + # Higher is better if value >= good: return 0.0 if value <= bad: @@ -495,19 +491,19 @@ class OperationalPerformanceMetrics: def _compute_other_factors_score(self, occ_pct: float, esc_pct: float) -> float: """ - Otros factores (0-10) basados en: - - ocupación ideal alrededor de 80% - - tasa de escalación ideal baja (<10%) + Other factors (0-10) based on: + - ideal occupancy around 80% + - ideal escalation rate low (<10%) """ - # Ocupación: 0 penalización si está entre 75-85, se penaliza fuera + # Occupancy: 0 penalty if between 75-85, penalized outside if np.isnan(occ_pct): occ_penalty = 5.0 else: deviation = abs(occ_pct - 80.0) - occ_penalty = min(10.0, deviation / 5.0 * 2.0) # cada 5 puntos se suman 2, máx 10 + occ_penalty = min(10.0, deviation / 5.0 * 2.0) # each 5 points add 2, max 10 occ_score = max(0.0, 10.0 - occ_penalty) - # Escalación: 0-10 donde 0% -> 10 puntos, >=40% -> 0 + # Escalation: 0-10 where 0% -> 10 points, >=40% -> 0 if np.isnan(esc_pct): esc_score = 5.0 else: @@ -518,7 +514,7 @@ class OperationalPerformanceMetrics: else: esc_score = 10.0 * (1.0 - esc_pct / 40.0) - # Media simple de ambos + # Simple average of both return (occ_score + esc_score) / 2.0 # ------------------------------------------------------------------ # @@ -526,29 +522,29 @@ class OperationalPerformanceMetrics: # ------------------------------------------------------------------ # def plot_aht_boxplot_by_skill(self) -> Axes: """ - Boxplot del AHT por skill (P10-P50-P90 visual). + Boxplot of AHT by skill (P10-P50-P90 visual). """ df = self.df.copy() if df.empty or "handle_time" not in df.columns: fig, ax = plt.subplots() - ax.text(0.5, 0.5, "Sin datos de AHT", ha="center", va="center") + ax.text(0.5, 0.5, "No AHT data", ha="center", va="center") ax.set_axis_off() return ax df = df.dropna(subset=["handle_time"]) if df.empty: fig, ax = plt.subplots() - ax.text(0.5, 0.5, "AHT no disponible", ha="center", va="center") + ax.text(0.5, 0.5, "AHT not available", ha="center", va="center") ax.set_axis_off() return ax fig, ax = plt.subplots(figsize=(8, 4)) df.boxplot(column="handle_time", by="queue_skill", ax=ax, showfliers=False) - ax.set_xlabel("Skill / Cola") - ax.set_ylabel("AHT (segundos)") - ax.set_title("Distribución de AHT por skill") + ax.set_xlabel("Skill / Queue") + ax.set_ylabel("AHT (seconds)") + ax.set_title("AHT distribution by skill") plt.suptitle("") plt.xticks(rotation=45, ha="right") ax.grid(axis="y", alpha=0.3) @@ -557,14 +553,14 @@ class OperationalPerformanceMetrics: def plot_resolution_funnel_by_skill(self) -> Axes: """ - Funnel / barras apiladas de Talk + Hold + ACW por skill (P50). + Funnel / stacked bars of Talk + Hold + ACW by skill (P50). - Permite ver el equilibrio de tiempos por skill. + Allows viewing the time balance by skill. """ p50 = self.talk_hold_acw_p50_by_skill() if p50.empty: fig, ax = plt.subplots() - ax.text(0.5, 0.5, "Sin datos para funnel", ha="center", va="center") + ax.text(0.5, 0.5, "No data for funnel", ha="center", va="center") ax.set_axis_off() return ax @@ -583,27 +579,26 @@ class OperationalPerformanceMetrics: ax.set_xticks(x) ax.set_xticklabels(skills, rotation=45, ha="right") - ax.set_ylabel("Segundos") - ax.set_title("Funnel de resolución (P50) por skill") + ax.set_ylabel("Seconds") + ax.set_title("Resolution funnel (P50) by skill") ax.legend() ax.grid(axis="y", alpha=0.3) return ax # ------------------------------------------------------------------ # - # Métricas por skill (para consistencia frontend cached/fresh) + # Metrics by skill (for frontend cached/fresh consistency) # ------------------------------------------------------------------ # def metrics_by_skill(self) -> List[Dict[str, Any]]: """ - Calcula métricas operacionales por skill: - - transfer_rate: % de interacciones con transfer_flag == True - - abandonment_rate: % de interacciones abandonadas - - fcr_tecnico: 100 - transfer_rate (sin transferencia) - - fcr_real: % sin transferencia Y sin recontacto 7d (si hay datos) - - volume: número de interacciones + Calculates operational metrics by skill: + - transfer_rate: % of interactions with transfer_flag == True + - abandonment_rate: % of abandoned interactions + - fcr_tecnico: 100 - transfer_rate (without transfer) + - fcr_real: % without transfer AND without 7d re-contact (if there is data) + - volume: number of interactions - Devuelve una lista de dicts, uno por skill, para que el frontend - tenga acceso a las métricas reales por skill (no estimadas). + Returns a list of dicts, one per skill, so that the frontend has access to real metrics by skill (not estimated). """ df = self.df if df.empty: @@ -611,14 +606,14 @@ class OperationalPerformanceMetrics: results = [] - # Detectar columna de abandono + # Detect abandonment column abandon_col = None for col_name in ["is_abandoned", "abandoned_flag", "abandoned"]: if col_name in df.columns: abandon_col = col_name break - # Detectar columna de repeat_call_7d para FCR real + # Detect repeat_call_7d column for real FCR repeat_col = None for col_name in ["repeat_call_7d", "repeat_7d", "is_repeat_7d"]: if col_name in df.columns: @@ -637,7 +632,7 @@ class OperationalPerformanceMetrics: else: transfer_rate = 0.0 - # FCR Técnico = 100 - transfer_rate + # Technical FCR = 100 - transfer_rate fcr_tecnico = float(round(100.0 - transfer_rate, 2)) # Abandonment rate @@ -656,7 +651,7 @@ class OperationalPerformanceMetrics: abandoned = int(abandon_mask.sum()) abandonment_rate = float(round(abandoned / total * 100, 2)) - # FCR Real (sin transferencia Y sin recontacto 7d) + # Real FCR (without transfer AND without 7d re-contact) fcr_real = fcr_tecnico # default to fcr_tecnico if no repeat data if repeat_col and "transfer_flag" in group.columns: repeat_data = group[repeat_col] @@ -670,13 +665,13 @@ class OperationalPerformanceMetrics: else: repeat_mask = pd.to_numeric(repeat_data, errors="coerce").fillna(0) > 0 - # FCR Real: no transfer AND no repeat + # Real FCR: no transfer AND no repeat fcr_real_mask = (~group["transfer_flag"]) & (~repeat_mask) fcr_real_count = fcr_real_mask.sum() fcr_real = float(round(fcr_real_count / total * 100, 2)) - # AHT Mean (promedio de handle_time sobre registros válidos) - # Filtramos solo registros 'valid' (excluye noise/zombie) para consistencia + # AHT Mean (average of handle_time over valid records) + # Filter only 'valid' records (excludes noise/zombie) for consistency if "_is_valid_for_cv" in group.columns: valid_records = group[group["_is_valid_for_cv"]] else: @@ -687,15 +682,15 @@ class OperationalPerformanceMetrics: else: aht_mean = 0.0 - # AHT Total (promedio de handle_time sobre TODOS los registros) - # Incluye NOISE, ZOMBIE, ABANDON - solo para información/comparación + # AHT Total (average of handle_time over ALL records) + # Includes NOISE, ZOMBIE, ABANDON - for information/comparison only if len(group) > 0 and "handle_time" in group.columns: aht_total = float(round(group["handle_time"].mean(), 2)) else: aht_total = 0.0 - # Hold Time Mean (promedio de hold_time sobre registros válidos) - # Consistente con fresh path que usa MEAN, no P50 + # Hold Time Mean (average of hold_time over valid records) + # Consistent with fresh path that uses MEAN, not P50 if len(valid_records) > 0 and "hold_time" in valid_records.columns: hold_time_mean = float(round(valid_records["hold_time"].mean(), 2)) else: diff --git a/backend/beyond_metrics/dimensions/SatisfactionExperience.py b/backend/beyond_metrics/dimensions/SatisfactionExperience.py index 59a78bb..d377857 100644 --- a/backend/beyond_metrics/dimensions/SatisfactionExperience.py +++ b/backend/beyond_metrics/dimensions/SatisfactionExperience.py @@ -24,11 +24,10 @@ REQUIRED_COLUMNS_SAT: List[str] = [ @dataclass class SatisfactionExperienceMetrics: """ - Dimensión 3: SATISFACCIÓN y EXPERIENCIA + Dimension 3: SATISFACTION and EXPERIENCE - Todas las columnas de satisfacción (csat/nps/ces/aht) son OPCIONALES. - Si no están, las métricas que las usan devuelven vacío/NaN pero - nunca rompen el pipeline. + All satisfaction columns (csat/nps/ces/aht) are OPTIONAL. + If they are not present, the metrics that use them return empty/NaN but never break the pipeline. """ df: pd.DataFrame @@ -44,7 +43,7 @@ class SatisfactionExperienceMetrics: missing = [c for c in REQUIRED_COLUMNS_SAT if c not in self.df.columns] if missing: raise ValueError( - f"Faltan columnas obligatorias para SatisfactionExperienceMetrics: {missing}" + f"Missing required columns for SatisfactionExperienceMetrics: {missing}" ) def _prepare_data(self) -> None: @@ -52,7 +51,7 @@ class SatisfactionExperienceMetrics: df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce") - # Duraciones base siempre existen + # Base durations always exist for col in ["duration_talk", "hold_time", "wrap_up_time"]: df[col] = pd.to_numeric(df[col], errors="coerce") @@ -63,16 +62,16 @@ class SatisfactionExperienceMetrics: + df["wrap_up_time"].fillna(0) ) - # csat_score opcional + # csat_score optional df["csat_score"] = pd.to_numeric(df.get("csat_score", np.nan), errors="coerce") - # aht opcional: si existe columna explícita la usamos, si no usamos handle_time + # aht optional: if explicit column exists we use it, otherwise we use handle_time if "aht" in df.columns: df["aht"] = pd.to_numeric(df["aht"], errors="coerce") else: df["aht"] = df["handle_time"] - # NPS / CES opcionales + # NPS / CES optional df["nps_score"] = pd.to_numeric(df.get("nps_score", np.nan), errors="coerce") df["ces_score"] = pd.to_numeric(df.get("ces_score", np.nan), errors="coerce") @@ -90,8 +89,8 @@ class SatisfactionExperienceMetrics: # ------------------------------------------------------------------ # def csat_avg_by_skill_channel(self) -> pd.DataFrame: """ - CSAT promedio por skill/canal. - Si no hay csat_score, devuelve DataFrame vacío. + Average CSAT by skill/channel. + If there is no csat_score, returns empty DataFrame. """ df = self.df if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0: @@ -115,7 +114,7 @@ class SatisfactionExperienceMetrics: def nps_avg_by_skill_channel(self) -> pd.DataFrame: """ - NPS medio por skill/canal, si existe nps_score. + Average NPS by skill/channel, if nps_score exists. """ df = self.df if "nps_score" not in df.columns or df["nps_score"].notna().sum() == 0: @@ -139,7 +138,7 @@ class SatisfactionExperienceMetrics: def ces_avg_by_skill_channel(self) -> pd.DataFrame: """ - CES medio por skill/canal, si existe ces_score. + Average CES by skill/channel, if ces_score exists. """ df = self.df if "ces_score" not in df.columns or df["ces_score"].notna().sum() == 0: @@ -163,11 +162,11 @@ class SatisfactionExperienceMetrics: def csat_global(self) -> float: """ - CSAT medio global (todas las interacciones). + Global average CSAT (all interactions). - Usa la columna opcional `csat_score`: - - Si no existe, devuelve NaN. - - Si todos los valores son NaN / vacíos, devuelve NaN. + Uses the optional `csat_score` column: + - If it does not exist, returns NaN. + - If all values are NaN / empty, returns NaN. """ df = self.df if "csat_score" not in df.columns: @@ -183,8 +182,8 @@ class SatisfactionExperienceMetrics: def csat_aht_correlation(self) -> Dict[str, Any]: """ - Correlación Pearson CSAT vs AHT. - Si falta csat o aht, o no hay varianza, devuelve NaN y código adecuado. + Pearson correlation CSAT vs AHT. + If csat or aht is missing, or there is no variance, returns NaN and appropriate code. """ df = self.df if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0: @@ -216,8 +215,8 @@ class SatisfactionExperienceMetrics: def csat_aht_skill_summary(self) -> pd.DataFrame: """ - Resumen por skill con clasificación del "sweet spot". - Si falta csat o aht, devuelve DataFrame vacío. + Summary by skill with "sweet spot" classification. + If csat or aht is missing, returns empty DataFrame. """ df = self.df if df["csat_score"].notna().sum() == 0 or df["aht"].notna().sum() == 0: @@ -258,20 +257,20 @@ class SatisfactionExperienceMetrics: # ------------------------------------------------------------------ # def plot_csat_vs_aht_scatter(self) -> Axes: """ - Scatter CSAT vs AHT por skill. - Si no hay datos suficientes, devuelve un Axes con mensaje. + Scatter CSAT vs AHT by skill. + If there is insufficient data, returns an Axes with message. """ df = self.df if df["csat_score"].notna().sum() == 0 or df["aht"].notna().sum() == 0: fig, ax = plt.subplots() - ax.text(0.5, 0.5, "Sin datos de CSAT/AHT", ha="center", va="center") + ax.text(0.5, 0.5, "No CSAT/AHT data", ha="center", va="center") ax.set_axis_off() return ax df = df.dropna(subset=["csat_score", "aht"]).copy() if df.empty: fig, ax = plt.subplots() - ax.text(0.5, 0.5, "Sin datos de CSAT/AHT", ha="center", va="center") + ax.text(0.5, 0.5, "No CSAT/AHT data", ha="center", va="center") ax.set_axis_off() return ax @@ -280,9 +279,9 @@ class SatisfactionExperienceMetrics: for skill, sub in df.groupby("queue_skill"): ax.scatter(sub["aht"], sub["csat_score"], label=skill, alpha=0.7) - ax.set_xlabel("AHT (segundos)") + ax.set_xlabel("AHT (seconds)") ax.set_ylabel("CSAT") - ax.set_title("CSAT vs AHT por skill") + ax.set_title("CSAT vs AHT by skill") ax.grid(alpha=0.3) ax.legend(title="Skill", bbox_to_anchor=(1.05, 1), loc="upper left") @@ -291,28 +290,28 @@ class SatisfactionExperienceMetrics: def plot_csat_distribution(self) -> Axes: """ - Histograma de CSAT. - Si no hay csat_score, devuelve un Axes con mensaje. + CSAT histogram. + If there is no csat_score, returns an Axes with message. """ df = self.df if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0: fig, ax = plt.subplots() - ax.text(0.5, 0.5, "Sin datos de CSAT", ha="center", va="center") + ax.text(0.5, 0.5, "No CSAT data", ha="center", va="center") ax.set_axis_off() return ax df = df.dropna(subset=["csat_score"]).copy() if df.empty: fig, ax = plt.subplots() - ax.text(0.5, 0.5, "Sin datos de CSAT", ha="center", va="center") + ax.text(0.5, 0.5, "No CSAT data", ha="center", va="center") ax.set_axis_off() return ax fig, ax = plt.subplots(figsize=(6, 4)) ax.hist(df["csat_score"], bins=10, alpha=0.7) ax.set_xlabel("CSAT") - ax.set_ylabel("Frecuencia") - ax.set_title("Distribución de CSAT") + ax.set_ylabel("Frequency") + ax.set_title("CSAT distribution") ax.grid(axis="y", alpha=0.3) return ax diff --git a/frontend/utils/dataTransformation.ts b/frontend/utils/dataTransformation.ts index bccf476..dba39a1 100644 --- a/frontend/utils/dataTransformation.ts +++ b/frontend/utils/dataTransformation.ts @@ -1,11 +1,11 @@ // utils/dataTransformation.ts -// Pipeline de transformación de datos raw a métricas procesadas +// Raw data to processed metrics transformation pipeline import type { RawInteraction } from '../types'; /** - * Paso 1: Limpieza de Ruido - * Elimina interacciones con duration < 10 segundos (falsos contactos o errores de sistema) + * Step 1: Noise Cleanup + * Removes interactions with duration < 10 seconds (false contacts or system errors) */ export function cleanNoiseFromData(interactions: RawInteraction[]): RawInteraction[] { const MIN_DURATION_SECONDS = 10; @@ -22,30 +22,30 @@ export function cleanNoiseFromData(interactions: RawInteraction[]): RawInteracti const removedCount = interactions.length - cleaned.length; const removedPercentage = ((removedCount / interactions.length) * 100).toFixed(1); - console.log(`🧹 Limpieza de Ruido: ${removedCount} interacciones eliminadas (${removedPercentage}% del total)`); - console.log(`✅ Interacciones limpias: ${cleaned.length}`); + console.log(`🧹 Noise Cleanup: ${removedCount} interactions removed (${removedPercentage}% of total)`); + console.log(`✅ Clean interactions: ${cleaned.length}`); return cleaned; } /** - * Métricas base calculadas por skill + * Base metrics calculated by skill */ export interface SkillBaseMetrics { skill: string; - volume: number; // Número de interacciones - aht_mean: number; // AHT promedio (segundos) - aht_std: number; // Desviación estándar del AHT - transfer_rate: number; // Tasa de transferencia (0-100) - total_cost: number; // Coste total (€) + volume: number; // Number of interactions + aht_mean: number; // Average AHT (seconds) + aht_std: number; // AHT standard deviation + transfer_rate: number; // Transfer rate (0-100) + total_cost: number; // Total cost (€) - // Datos auxiliares para cálculos posteriores - aht_values: number[]; // Array de todos los AHT para percentiles + // Auxiliary data for subsequent calculations + aht_values: number[]; // Array of all AHT values for percentiles } /** - * Paso 2: Calcular Métricas Base por Skill - * Agrupa por skill y calcula volumen, AHT promedio, desviación estándar, tasa de transferencia y coste + * Step 2: Calculate Base Metrics by Skill + * Groups by skill and calculates volume, average AHT, standard deviation, transfer rate and cost */ export function calculateSkillBaseMetrics( interactions: RawInteraction[], @@ -53,7 +53,7 @@ export function calculateSkillBaseMetrics( ): SkillBaseMetrics[] { const COST_PER_SECOND = costPerHour / 3600; - // Agrupar por skill + // Group by skill const skillGroups = new Map(); interactions.forEach(interaction => { @@ -64,31 +64,31 @@ export function calculateSkillBaseMetrics( skillGroups.get(skill)!.push(interaction); }); - // Calcular métricas por skill + // Calculate metrics per skill const metrics: SkillBaseMetrics[] = []; skillGroups.forEach((skillInteractions, skill) => { const volume = skillInteractions.length; - // Calcular AHT para cada interacción + // Calculate AHT for each interaction const ahtValues = skillInteractions.map(i => i.duration_talk + i.hold_time + i.wrap_up_time ); - // AHT promedio + // Average AHT const ahtMean = ahtValues.reduce((sum, val) => sum + val, 0) / volume; - // Desviación estándar del AHT + // AHT standard deviation const variance = ahtValues.reduce((sum, val) => sum + Math.pow(val - ahtMean, 2), 0 ) / volume; const ahtStd = Math.sqrt(variance); - // Tasa de transferencia + // Transfer rate const transferCount = skillInteractions.filter(i => i.transfer_flag).length; const transferRate = (transferCount / volume) * 100; - // Coste total + // Total cost const totalCost = ahtValues.reduce((sum, aht) => sum + (aht * COST_PER_SECOND), 0 ); @@ -104,82 +104,82 @@ export function calculateSkillBaseMetrics( }); }); - // Ordenar por volumen descendente + // Sort by descending volume metrics.sort((a, b) => b.volume - a.volume); - console.log(`📊 Métricas Base calculadas para ${metrics.length} skills`); + console.log(`📊 Base Metrics calculated for ${metrics.length} skills`); return metrics; } /** - * Dimensiones transformadas para Agentic Readiness Score + * Transformed dimensions for Agentic Readiness Score */ export interface SkillDimensions { skill: string; volume: number; - // Dimensión 1: Predictibilidad (0-10) + // Dimension 1: Predictability (0-10) predictability_score: number; - predictability_cv: number; // Coeficiente de Variación (para referencia) + predictability_cv: number; // Coefficient of Variation (for reference) - // Dimensión 2: Complejidad Inversa (0-10) + // Dimension 2: Inverse Complexity (0-10) complexity_inverse_score: number; - complexity_transfer_rate: number; // Tasa de transferencia (para referencia) + complexity_transfer_rate: number; // Transfer rate (for reference) - // Dimensión 3: Repetitividad/Impacto (0-10) + // Dimension 3: Repetitiveness/Impact (0-10) repetitivity_score: number; - // Datos auxiliares + // Auxiliary data aht_mean: number; total_cost: number; } /** - * Paso 3: Transformar Métricas Base a Dimensiones - * Aplica las fórmulas de normalización para obtener scores 0-10 + * Step 3: Transform Base Metrics to Dimensions + * Applies normalization formulas to obtain 0-10 scores */ export function transformToDimensions( baseMetrics: SkillBaseMetrics[] ): SkillDimensions[] { return baseMetrics.map(metric => { - // Dimensión 1: Predictibilidad (Proxy: Variabilidad del AHT) - // CV = desviación estándar / media + // Dimension 1: Predictability (Proxy: AHT Variability) + // CV = standard deviation / mean const cv = metric.aht_std / metric.aht_mean; - // Normalización: CV <= 0.3 → 10, CV >= 1.5 → 0 - // Fórmula: MAX(0, MIN(10, 10 - ((CV - 0.3) / 1.2 * 10))) + // Normalization: CV <= 0.3 → 10, CV >= 1.5 → 0 + // Formula: MAX(0, MIN(10, 10 - ((CV - 0.3) / 1.2 * 10))) const predictabilityScore = Math.max(0, Math.min(10, 10 - ((cv - 0.3) / 1.2 * 10) )); - // Dimensión 2: Complejidad Inversa (Proxy: Tasa de Transferencia) - // T = tasa de transferencia (%) + // Dimension 2: Inverse Complexity (Proxy: Transfer Rate) + // T = transfer rate (%) const transferRate = metric.transfer_rate; - // Normalización: T <= 5% → 10, T >= 30% → 0 - // Fórmula: MAX(0, MIN(10, 10 - ((T - 0.05) / 0.25 * 10))) + // Normalization: T <= 5% → 10, T >= 30% → 0 + // Formula: MAX(0, MIN(10, 10 - ((T - 0.05) / 0.25 * 10))) const complexityInverseScore = Math.max(0, Math.min(10, 10 - ((transferRate / 100 - 0.05) / 0.25 * 10) )); - // Dimensión 3: Repetitividad/Impacto (Proxy: Volumen) - // Normalización fija: > 5,000 llamadas/mes = 10, < 100 = 0 + // Dimension 3: Repetitiveness/Impact (Proxy: Volume) + // Fixed normalization: > 5,000 calls/month = 10, < 100 = 0 let repetitivityScore: number; if (metric.volume >= 5000) { repetitivityScore = 10; } else if (metric.volume <= 100) { repetitivityScore = 0; } else { - // Interpolación lineal entre 100 y 5000 + // Linear interpolation between 100 and 5000 repetitivityScore = ((metric.volume - 100) / (5000 - 100)) * 10; } return { skill: metric.skill, volume: metric.volume, - predictability_score: Math.round(predictabilityScore * 10) / 10, // 1 decimal - predictability_cv: Math.round(cv * 100) / 100, // 2 decimales + predictability_score: Math.round(predictabilityScore * 10) / 10, // 1 decimal place + predictability_cv: Math.round(cv * 100) / 100, // 2 decimal places complexity_inverse_score: Math.round(complexityInverseScore * 10) / 10, complexity_transfer_rate: Math.round(transferRate * 10) / 10, repetitivity_score: Math.round(repetitivityScore * 10) / 10, @@ -190,7 +190,7 @@ export function transformToDimensions( } /** - * Resultado final con Agentic Readiness Score + * Final result with Agentic Readiness Score */ export interface SkillAgenticReadiness extends SkillDimensions { agentic_readiness_score: number; // 0-10 @@ -199,28 +199,28 @@ export interface SkillAgenticReadiness extends SkillDimensions { } /** - * Paso 4: Calcular Agentic Readiness Score - * Promedio ponderado de las 3 dimensiones + * Step 4: Calculate Agentic Readiness Score + * Weighted average of the 3 dimensions */ export function calculateAgenticReadinessScore( dimensions: SkillDimensions[], weights?: { predictability: number; complexity: number; repetitivity: number } ): SkillAgenticReadiness[] { - // Pesos por defecto (ajustables) + // Default weights (adjustable) const w = weights || { - predictability: 0.40, // 40% - Más importante + predictability: 0.40, // 40% - Most important complexity: 0.35, // 35% repetitivity: 0.25 // 25% }; return dimensions.map(dim => { - // Promedio ponderado + // Weighted average const score = dim.predictability_score * w.predictability + dim.complexity_inverse_score * w.complexity + dim.repetitivity_score * w.repetitivity; - // Categorizar + // Categorize let category: 'automate_now' | 'assist_copilot' | 'optimize_first'; let label: string; @@ -245,29 +245,29 @@ export function calculateAgenticReadinessScore( } /** - * Pipeline completo: Raw Data → Agentic Readiness Score + * Complete pipeline: Raw Data → Agentic Readiness Score */ export function transformRawDataToAgenticReadiness( rawInteractions: RawInteraction[], costPerHour: number, weights?: { predictability: number; complexity: number; repetitivity: number } ): SkillAgenticReadiness[] { - console.log(`🚀 Iniciando pipeline de transformación con ${rawInteractions.length} interacciones...`); + console.log(`🚀 Starting transformation pipeline with ${rawInteractions.length} interactions...`); - // Paso 1: Limpieza de ruido + // Step 1: Noise cleanup const cleanedData = cleanNoiseFromData(rawInteractions); - // Paso 2: Calcular métricas base + // Step 2: Calculate base metrics const baseMetrics = calculateSkillBaseMetrics(cleanedData, costPerHour); - // Paso 3: Transformar a dimensiones + // Step 3: Transform to dimensions const dimensions = transformToDimensions(baseMetrics); - // Paso 4: Calcular Agentic Readiness Score + // Step 4: Calculate Agentic Readiness Score const agenticReadiness = calculateAgenticReadinessScore(dimensions, weights); - console.log(`✅ Pipeline completado: ${agenticReadiness.length} skills procesados`); - console.log(`📈 Distribución:`); + console.log(`✅ Pipeline completed: ${agenticReadiness.length} skills processed`); + console.log(`📈 Distribution:`); const automateCount = agenticReadiness.filter(s => s.readiness_category === 'automate_now').length; const assistCount = agenticReadiness.filter(s => s.readiness_category === 'assist_copilot').length; const optimizeCount = agenticReadiness.filter(s => s.readiness_category === 'optimize_first').length; @@ -279,7 +279,7 @@ export function transformRawDataToAgenticReadiness( } /** - * Utilidad: Generar resumen de estadísticas + * Utility: Generate statistics summary */ export function generateTransformationSummary( originalCount: number, @@ -300,11 +300,11 @@ export function generateTransformationSummary( const optimizePercent = skillsCount > 0 ? ((optimizeCount/skillsCount)*100).toFixed(0) : '0'; return ` -📊 Resumen de Transformación: - • Interacciones originales: ${originalCount.toLocaleString()} - • Ruido eliminado: ${removedCount.toLocaleString()} (${removedPercentage}%) - • Interacciones limpias: ${cleanedCount.toLocaleString()} - • Skills únicos: ${skillsCount} +📊 Transformation Summary: + • Original interactions: ${originalCount.toLocaleString()} + • Noise removed: ${removedCount.toLocaleString()} (${removedPercentage}%) + • Clean interactions: ${cleanedCount.toLocaleString()} + • Unique skills: ${skillsCount} 🎯 Agentic Readiness: • 🟢 Automate Now: ${automateCount} skills (${automatePercent}%) diff --git a/frontend/utils/segmentClassifier.ts b/frontend/utils/segmentClassifier.ts index eee8562..6aa5718 100644 --- a/frontend/utils/segmentClassifier.ts +++ b/frontend/utils/segmentClassifier.ts @@ -1,5 +1,5 @@ // utils/segmentClassifier.ts -// Utilidad para clasificar colas/skills en segmentos de cliente +// Utility to classify queues/skills into customer segments import type { CustomerSegment, RawInteraction, StaticConfig } from '../types'; @@ -10,8 +10,8 @@ export interface SegmentMapping { } /** - * Parsea string de colas separadas por comas - * Ejemplo: "VIP, Premium, Enterprise" → ["VIP", "Premium", "Enterprise"] + * Parses queue string separated by commas + * Example: "VIP, Premium, Enterprise" → ["VIP", "Premium", "Enterprise"] */ export function parseQueueList(input: string): string[] { if (!input || input.trim().length === 0) { @@ -25,13 +25,13 @@ export function parseQueueList(input: string): string[] { } /** - * Clasifica una cola según el mapeo proporcionado - * Usa matching parcial y case-insensitive + * Classifies a queue according to the provided mapping + * Uses partial and case-insensitive matching * - * Ejemplo: + * Example: * - queue: "VIP_Support" + mapping.high: ["VIP"] → "high" - * - queue: "Soporte_General_N1" + mapping.medium: ["Soporte_General"] → "medium" - * - queue: "Retencion" (no match) → "medium" (default) + * - queue: "General_Support_L1" + mapping.medium: ["General_Support"] → "medium" + * - queue: "Retention" (no match) → "medium" (default) */ export function classifyQueue( queue: string, @@ -39,7 +39,7 @@ export function classifyQueue( ): CustomerSegment { const normalizedQueue = queue.toLowerCase().trim(); - // Buscar en high value + // Search in high value for (const highQueue of mapping.high_value_queues) { const normalizedHigh = highQueue.toLowerCase().trim(); if (normalizedQueue.includes(normalizedHigh) || normalizedHigh.includes(normalizedQueue)) { @@ -47,7 +47,7 @@ export function classifyQueue( } } - // Buscar en low value + // Search in low value for (const lowQueue of mapping.low_value_queues) { const normalizedLow = lowQueue.toLowerCase().trim(); if (normalizedQueue.includes(normalizedLow) || normalizedLow.includes(normalizedQueue)) { @@ -55,7 +55,7 @@ export function classifyQueue( } } - // Buscar en medium value (explícito) + // Search in medium value (explicit) for (const mediumQueue of mapping.medium_value_queues) { const normalizedMedium = mediumQueue.toLowerCase().trim(); if (normalizedQueue.includes(normalizedMedium) || normalizedMedium.includes(normalizedQueue)) { @@ -63,13 +63,13 @@ export function classifyQueue( } } - // Default: medium (para colas no mapeadas) + // Default: medium (for unmapped queues) return 'medium'; } /** - * Clasifica todas las colas únicas de un conjunto de interacciones - * Retorna un mapa de cola → segmento + * Classifies all unique queues from a set of interactions + * Returns a map of queue → segment */ export function classifyAllQueues( interactions: RawInteraction[], @@ -77,10 +77,10 @@ export function classifyAllQueues( ): Map { const queueSegments = new Map(); - // Obtener colas únicas + // Get unique queues const uniqueQueues = [...new Set(interactions.map(i => i.queue_skill))]; - // Clasificar cada cola + // Classify each queue uniqueQueues.forEach(queue => { queueSegments.set(queue, classifyQueue(queue, mapping)); }); @@ -89,8 +89,8 @@ export function classifyAllQueues( } /** - * Genera estadísticas de segmentación - * Retorna conteo, porcentaje y lista de colas por segmento + * Generates segmentation statistics + * Returns count, percentage and list of queues by segment */ export function getSegmentationStats( interactions: RawInteraction[], @@ -108,13 +108,13 @@ export function getSegmentationStats( total: interactions.length }; - // Contar interacciones por segmento + // Count interactions by segment interactions.forEach(interaction => { const segment = queueSegments.get(interaction.queue_skill) || 'medium'; stats[segment].count++; }); - // Calcular porcentajes + // Calculate percentages const total = interactions.length; if (total > 0) { stats.high.percentage = Math.round((stats.high.count / total) * 100); @@ -122,7 +122,7 @@ export function getSegmentationStats( stats.low.percentage = Math.round((stats.low.count / total) * 100); } - // Obtener colas por segmento (únicas) + // Get queues by segment (unique) queueSegments.forEach((segment, queue) => { if (!stats[segment].queues.includes(queue)) { stats[segment].queues.push(queue); @@ -133,7 +133,7 @@ export function getSegmentationStats( } /** - * Valida que el mapeo tenga al menos una cola en algún segmento + * Validates that the mapping has at least one queue in some segment */ export function isValidMapping(mapping: SegmentMapping): boolean { return ( @@ -144,8 +144,8 @@ export function isValidMapping(mapping: SegmentMapping): boolean { } /** - * Crea un mapeo desde StaticConfig - * Si no hay segment_mapping, retorna mapeo vacío + * Creates a mapping from StaticConfig + * If there is no segment_mapping, returns empty mapping */ export function getMappingFromConfig(config: StaticConfig): SegmentMapping | null { if (!config.segment_mapping) { @@ -160,8 +160,8 @@ export function getMappingFromConfig(config: StaticConfig): SegmentMapping | nul } /** - * Obtiene el segmento para una cola específica desde el config - * Si no hay mapeo, retorna 'medium' por defecto + * Gets the segment for a specific queue from the config + * If there is no mapping, returns 'medium' by default */ export function getSegmentForQueue( queue: string, @@ -177,7 +177,7 @@ export function getSegmentForQueue( } /** - * Formatea estadísticas para mostrar en UI + * Formats statistics for display in UI */ export function formatSegmentationSummary( stats: ReturnType @@ -185,15 +185,15 @@ export function formatSegmentationSummary( const parts: string[] = []; if (stats.high.count > 0) { - parts.push(`${stats.high.percentage}% High Value (${stats.high.count} interacciones)`); + parts.push(`${stats.high.percentage}% High Value (${stats.high.count} interactions)`); } if (stats.medium.count > 0) { - parts.push(`${stats.medium.percentage}% Medium Value (${stats.medium.count} interacciones)`); + parts.push(`${stats.medium.percentage}% Medium Value (${stats.medium.count} interactions)`); } if (stats.low.count > 0) { - parts.push(`${stats.low.percentage}% Low Value (${stats.low.count} interacciones)`); + parts.push(`${stats.low.percentage}% Low Value (${stats.low.count} interactions)`); } return parts.join(' | ');