Phase 2 of Spanish-to-English translation for medium-priority files: Frontend utils (2 files): - dataTransformation.ts: Translated ~72 occurrences (comments, docs, console logs) - segmentClassifier.ts: Translated ~20 occurrences (JSDoc, inline comments, UI strings) Backend dimensions (3 files): - OperationalPerformance.py: Translated ~117 lines (docstrings, comments) - SatisfactionExperience.py: Translated ~33 lines (docstrings, comments) - EconomyCost.py: Translated ~79 lines (docstrings, comments) All function names and variable names preserved for API compatibility. Frontend and backend compilation tested and verified successful. Related to TRANSLATION_STATUS.md Phase 2 objectives. https://claude.ai/code/session_01GNbnkFoESkRcnPr3bLCYDg
318 lines
10 KiB
Python
318 lines
10 KiB
Python
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from typing import Dict, List, Any
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
from matplotlib.axes import Axes
|
|
|
|
|
|
# Solo columnas del dataset “core”
|
|
REQUIRED_COLUMNS_SAT: List[str] = [
|
|
"interaction_id",
|
|
"datetime_start",
|
|
"queue_skill",
|
|
"channel",
|
|
"duration_talk",
|
|
"hold_time",
|
|
"wrap_up_time",
|
|
]
|
|
|
|
|
|
@dataclass
|
|
class SatisfactionExperienceMetrics:
|
|
"""
|
|
Dimension 3: SATISFACTION and EXPERIENCE
|
|
|
|
All satisfaction columns (csat/nps/ces/aht) are OPTIONAL.
|
|
If they are not present, the metrics that use them return empty/NaN but never break the pipeline.
|
|
"""
|
|
|
|
df: pd.DataFrame
|
|
|
|
def __post_init__(self) -> None:
|
|
self._validate_columns()
|
|
self._prepare_data()
|
|
|
|
# ------------------------------------------------------------------ #
|
|
# Helpers
|
|
# ------------------------------------------------------------------ #
|
|
def _validate_columns(self) -> None:
|
|
missing = [c for c in REQUIRED_COLUMNS_SAT if c not in self.df.columns]
|
|
if missing:
|
|
raise ValueError(
|
|
f"Missing required columns for SatisfactionExperienceMetrics: {missing}"
|
|
)
|
|
|
|
def _prepare_data(self) -> None:
|
|
df = self.df.copy()
|
|
|
|
df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
|
|
|
|
# Base durations always exist
|
|
for col in ["duration_talk", "hold_time", "wrap_up_time"]:
|
|
df[col] = pd.to_numeric(df[col], errors="coerce")
|
|
|
|
# Handle time
|
|
df["handle_time"] = (
|
|
df["duration_talk"].fillna(0)
|
|
+ df["hold_time"].fillna(0)
|
|
+ df["wrap_up_time"].fillna(0)
|
|
)
|
|
|
|
# csat_score optional
|
|
df["csat_score"] = pd.to_numeric(df.get("csat_score", np.nan), errors="coerce")
|
|
|
|
# aht optional: if explicit column exists we use it, otherwise we use handle_time
|
|
if "aht" in df.columns:
|
|
df["aht"] = pd.to_numeric(df["aht"], errors="coerce")
|
|
else:
|
|
df["aht"] = df["handle_time"]
|
|
|
|
# NPS / CES optional
|
|
df["nps_score"] = pd.to_numeric(df.get("nps_score", np.nan), errors="coerce")
|
|
df["ces_score"] = pd.to_numeric(df.get("ces_score", np.nan), errors="coerce")
|
|
|
|
df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
|
|
df["channel"] = df["channel"].astype(str).str.strip()
|
|
|
|
self.df = df
|
|
|
|
@property
|
|
def is_empty(self) -> bool:
|
|
return self.df.empty
|
|
|
|
# ------------------------------------------------------------------ #
|
|
# KPIs
|
|
# ------------------------------------------------------------------ #
|
|
def csat_avg_by_skill_channel(self) -> pd.DataFrame:
|
|
"""
|
|
Average CSAT by skill/channel.
|
|
If there is no csat_score, returns empty DataFrame.
|
|
"""
|
|
df = self.df
|
|
if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0:
|
|
return pd.DataFrame()
|
|
|
|
df = df.dropna(subset=["csat_score"])
|
|
if df.empty:
|
|
return pd.DataFrame()
|
|
|
|
pivot = (
|
|
df.pivot_table(
|
|
index="queue_skill",
|
|
columns="channel",
|
|
values="csat_score",
|
|
aggfunc="mean",
|
|
)
|
|
.sort_index()
|
|
.round(2)
|
|
)
|
|
return pivot
|
|
|
|
def nps_avg_by_skill_channel(self) -> pd.DataFrame:
|
|
"""
|
|
Average NPS by skill/channel, if nps_score exists.
|
|
"""
|
|
df = self.df
|
|
if "nps_score" not in df.columns or df["nps_score"].notna().sum() == 0:
|
|
return pd.DataFrame()
|
|
|
|
df = df.dropna(subset=["nps_score"])
|
|
if df.empty:
|
|
return pd.DataFrame()
|
|
|
|
pivot = (
|
|
df.pivot_table(
|
|
index="queue_skill",
|
|
columns="channel",
|
|
values="nps_score",
|
|
aggfunc="mean",
|
|
)
|
|
.sort_index()
|
|
.round(2)
|
|
)
|
|
return pivot
|
|
|
|
def ces_avg_by_skill_channel(self) -> pd.DataFrame:
|
|
"""
|
|
Average CES by skill/channel, if ces_score exists.
|
|
"""
|
|
df = self.df
|
|
if "ces_score" not in df.columns or df["ces_score"].notna().sum() == 0:
|
|
return pd.DataFrame()
|
|
|
|
df = df.dropna(subset=["ces_score"])
|
|
if df.empty:
|
|
return pd.DataFrame()
|
|
|
|
pivot = (
|
|
df.pivot_table(
|
|
index="queue_skill",
|
|
columns="channel",
|
|
values="ces_score",
|
|
aggfunc="mean",
|
|
)
|
|
.sort_index()
|
|
.round(2)
|
|
)
|
|
return pivot
|
|
|
|
def csat_global(self) -> float:
|
|
"""
|
|
Global average CSAT (all interactions).
|
|
|
|
Uses the optional `csat_score` column:
|
|
- If it does not exist, returns NaN.
|
|
- If all values are NaN / empty, returns NaN.
|
|
"""
|
|
df = self.df
|
|
if "csat_score" not in df.columns:
|
|
return float("nan")
|
|
|
|
series = pd.to_numeric(df["csat_score"], errors="coerce").dropna()
|
|
if series.empty:
|
|
return float("nan")
|
|
|
|
mean = series.mean()
|
|
return float(round(mean, 2))
|
|
|
|
|
|
def csat_aht_correlation(self) -> Dict[str, Any]:
|
|
"""
|
|
Pearson correlation CSAT vs AHT.
|
|
If csat or aht is missing, or there is no variance, returns NaN and appropriate code.
|
|
"""
|
|
df = self.df
|
|
if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0:
|
|
return {"r": float("nan"), "n": 0.0, "interpretation_code": "sin_datos"}
|
|
if "aht" not in df.columns or df["aht"].notna().sum() == 0:
|
|
return {"r": float("nan"), "n": 0.0, "interpretation_code": "sin_datos"}
|
|
|
|
df = df.dropna(subset=["csat_score", "aht"]).copy()
|
|
n = len(df)
|
|
if n < 2:
|
|
return {"r": float("nan"), "n": float(n), "interpretation_code": "insuficiente"}
|
|
|
|
x = df["aht"].astype(float)
|
|
y = df["csat_score"].astype(float)
|
|
|
|
if x.std(ddof=1) == 0 or y.std(ddof=1) == 0:
|
|
return {"r": float("nan"), "n": float(n), "interpretation_code": "sin_varianza"}
|
|
|
|
r = float(np.corrcoef(x, y)[0, 1])
|
|
|
|
if r < -0.3:
|
|
interpretation = "negativo"
|
|
elif r > 0.3:
|
|
interpretation = "positivo"
|
|
else:
|
|
interpretation = "neutral"
|
|
|
|
return {"r": round(r, 3), "n": float(n), "interpretation_code": interpretation}
|
|
|
|
def csat_aht_skill_summary(self) -> pd.DataFrame:
|
|
"""
|
|
Summary by skill with "sweet spot" classification.
|
|
If csat or aht is missing, returns empty DataFrame.
|
|
"""
|
|
df = self.df
|
|
if df["csat_score"].notna().sum() == 0 or df["aht"].notna().sum() == 0:
|
|
return pd.DataFrame(columns=["csat_avg", "aht_avg", "classification"])
|
|
|
|
df = df.dropna(subset=["csat_score", "aht"]).copy()
|
|
if df.empty:
|
|
return pd.DataFrame(columns=["csat_avg", "aht_avg", "classification"])
|
|
|
|
grouped = df.groupby("queue_skill").agg(
|
|
csat_avg=("csat_score", "mean"),
|
|
aht_avg=("aht", "mean"),
|
|
)
|
|
|
|
aht_all = df["aht"].astype(float)
|
|
csat_all = df["csat_score"].astype(float)
|
|
|
|
aht_p40 = float(np.percentile(aht_all, 40))
|
|
aht_p60 = float(np.percentile(aht_all, 60))
|
|
csat_p40 = float(np.percentile(csat_all, 40))
|
|
csat_p60 = float(np.percentile(csat_all, 60))
|
|
|
|
def classify(row) -> str:
|
|
csat = row["csat_avg"]
|
|
aht = row["aht_avg"]
|
|
|
|
if aht <= aht_p40 and csat >= csat_p60:
|
|
return "ideal_automatizar"
|
|
if aht >= aht_p60 and csat >= csat_p40:
|
|
return "requiere_humano"
|
|
return "neutral"
|
|
|
|
grouped["classification"] = grouped.apply(classify, axis=1)
|
|
return grouped.round({"csat_avg": 2, "aht_avg": 2})
|
|
|
|
# ------------------------------------------------------------------ #
|
|
# Plots
|
|
# ------------------------------------------------------------------ #
|
|
def plot_csat_vs_aht_scatter(self) -> Axes:
|
|
"""
|
|
Scatter CSAT vs AHT by skill.
|
|
If there is insufficient data, returns an Axes with message.
|
|
"""
|
|
df = self.df
|
|
if df["csat_score"].notna().sum() == 0 or df["aht"].notna().sum() == 0:
|
|
fig, ax = plt.subplots()
|
|
ax.text(0.5, 0.5, "No CSAT/AHT data", ha="center", va="center")
|
|
ax.set_axis_off()
|
|
return ax
|
|
|
|
df = df.dropna(subset=["csat_score", "aht"]).copy()
|
|
if df.empty:
|
|
fig, ax = plt.subplots()
|
|
ax.text(0.5, 0.5, "No CSAT/AHT data", ha="center", va="center")
|
|
ax.set_axis_off()
|
|
return ax
|
|
|
|
fig, ax = plt.subplots(figsize=(8, 5))
|
|
|
|
for skill, sub in df.groupby("queue_skill"):
|
|
ax.scatter(sub["aht"], sub["csat_score"], label=skill, alpha=0.7)
|
|
|
|
ax.set_xlabel("AHT (seconds)")
|
|
ax.set_ylabel("CSAT")
|
|
ax.set_title("CSAT vs AHT by skill")
|
|
ax.grid(alpha=0.3)
|
|
ax.legend(title="Skill", bbox_to_anchor=(1.05, 1), loc="upper left")
|
|
|
|
plt.tight_layout()
|
|
return ax
|
|
|
|
def plot_csat_distribution(self) -> Axes:
|
|
"""
|
|
CSAT histogram.
|
|
If there is no csat_score, returns an Axes with message.
|
|
"""
|
|
df = self.df
|
|
if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0:
|
|
fig, ax = plt.subplots()
|
|
ax.text(0.5, 0.5, "No CSAT data", ha="center", va="center")
|
|
ax.set_axis_off()
|
|
return ax
|
|
|
|
df = df.dropna(subset=["csat_score"]).copy()
|
|
if df.empty:
|
|
fig, ax = plt.subplots()
|
|
ax.text(0.5, 0.5, "No CSAT data", ha="center", va="center")
|
|
ax.set_axis_off()
|
|
return ax
|
|
|
|
fig, ax = plt.subplots(figsize=(6, 4))
|
|
ax.hist(df["csat_score"], bins=10, alpha=0.7)
|
|
ax.set_xlabel("CSAT")
|
|
ax.set_ylabel("Frequency")
|
|
ax.set_title("CSAT distribution")
|
|
ax.grid(axis="y", alpha=0.3)
|
|
|
|
return ax
|