Files
BeyondCXAnalytics-Demo/backend/beyond_metrics/dimensions/Volumetria.py
2026-02-04 11:08:21 +01:00

269 lines
7.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
from dataclasses import dataclass
from typing import List
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.axes import Axes
REQUIRED_COLUMNS_VOLUMETRIA: List[str] = [
"interaction_id",
"datetime_start",
"queue_skill",
"channel",
]
@dataclass
class VolumetriaMetrics:
"""
Métricas de volumetría basadas en el nuevo esquema de datos.
Columnas mínimas requeridas:
- interaction_id
- datetime_start
- queue_skill
- channel
Otras columnas pueden existir pero no son necesarias para estas métricas.
"""
df: pd.DataFrame
def __post_init__(self) -> None:
self._validate_columns()
self._prepare_data()
# ------------------------------------------------------------------ #
# Helpers internos
# ------------------------------------------------------------------ #
def _validate_columns(self) -> None:
missing = [c for c in REQUIRED_COLUMNS_VOLUMETRIA if c not in self.df.columns]
if missing:
raise ValueError(
f"Faltan columnas obligatorias para VolumetriaMetrics: {missing}"
)
def _prepare_data(self) -> None:
df = self.df.copy()
# Asegurar tipo datetime
df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
# Normalizar strings
df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
df["channel"] = df["channel"].astype(str).str.strip()
# Guardamos el df preparado
self.df = df
# ------------------------------------------------------------------ #
# Propiedades útiles
# ------------------------------------------------------------------ #
@property
def is_empty(self) -> bool:
return self.df.empty
# ------------------------------------------------------------------ #
# Métricas numéricas / tabulares
# ------------------------------------------------------------------ #
def volume_by_channel(self) -> pd.Series:
"""
Nº de interacciones por canal.
"""
return self.df.groupby("channel")["interaction_id"].nunique().sort_values(
ascending=False
)
def volume_by_skill(self) -> pd.Series:
"""
Nº de interacciones por skill / cola.
"""
return self.df.groupby("queue_skill")["interaction_id"].nunique().sort_values(
ascending=False
)
def channel_distribution_pct(self) -> pd.Series:
"""
Distribución porcentual del volumen por canal.
"""
counts = self.volume_by_channel()
total = counts.sum()
if total == 0:
return counts * 0.0
return (counts / total * 100).round(2)
def skill_distribution_pct(self) -> pd.Series:
"""
Distribución porcentual del volumen por skill.
"""
counts = self.volume_by_skill()
total = counts.sum()
if total == 0:
return counts * 0.0
return (counts / total * 100).round(2)
def heatmap_24x7(self) -> pd.DataFrame:
"""
Matriz [día_semana x hora] con nº de interacciones.
dayofweek: 0=Lunes ... 6=Domingo
"""
df = self.df.dropna(subset=["datetime_start"]).copy()
if df.empty:
# Devolvemos un df vacío pero con índice/columnas esperadas
idx = range(7)
cols = range(24)
return pd.DataFrame(0, index=idx, columns=cols)
df["dow"] = df["datetime_start"].dt.dayofweek
df["hour"] = df["datetime_start"].dt.hour
pivot = (
df.pivot_table(
index="dow",
columns="hour",
values="interaction_id",
aggfunc="nunique",
fill_value=0,
)
.reindex(index=range(7), fill_value=0)
.reindex(columns=range(24), fill_value=0)
)
return pivot
def monthly_seasonality_cv(self) -> float:
"""
Coeficiente de variación del volumen mensual.
CV = std / mean (en %).
"""
df = self.df.dropna(subset=["datetime_start"]).copy()
if df.empty:
return float("nan")
df["year_month"] = df["datetime_start"].dt.to_period("M")
monthly_counts = (
df.groupby("year_month")["interaction_id"].nunique().astype(float)
)
if len(monthly_counts) < 2:
return float("nan")
mean = monthly_counts.mean()
std = monthly_counts.std(ddof=1)
if mean == 0:
return float("nan")
return float(round(std / mean * 100, 2))
def peak_offpeak_ratio(self) -> float:
"""
Ratio de volumen entre horas pico y valle.
Definimos pico como horas 10:0019:59, resto valle.
"""
df = self.df.dropna(subset=["datetime_start"]).copy()
if df.empty:
return float("nan")
df["hour"] = df["datetime_start"].dt.hour
peak_hours = list(range(10, 20))
is_peak = df["hour"].isin(peak_hours)
peak_vol = df.loc[is_peak, "interaction_id"].nunique()
off_vol = df.loc[~is_peak, "interaction_id"].nunique()
if off_vol == 0:
return float("inf") if peak_vol > 0 else float("nan")
return float(round(peak_vol / off_vol, 3))
def concentration_top20_skills_pct(self) -> float:
"""
% del volumen concentrado en el top 20% de skills (por nº de interacciones).
"""
counts = (
self.df.groupby("queue_skill")["interaction_id"].nunique().sort_values(
ascending=False
)
)
n_skills = len(counts)
if n_skills == 0:
return float("nan")
top_n = max(1, int(np.ceil(0.2 * n_skills)))
top_vol = counts.head(top_n).sum()
total = counts.sum()
if total == 0:
return float("nan")
return float(round(top_vol / total * 100, 2))
# ------------------------------------------------------------------ #
# Plots
# ------------------------------------------------------------------ #
def plot_heatmap_24x7(self) -> Axes:
"""
Heatmap de volumen por día de la semana (0-6) y hora (0-23).
Devuelve Axes para que el pipeline pueda guardar la figura.
"""
data = self.heatmap_24x7()
fig, ax = plt.subplots(figsize=(10, 4))
im = ax.imshow(data.values, aspect="auto", origin="lower")
ax.set_xticks(range(24))
ax.set_xticklabels([str(h) for h in range(24)])
ax.set_yticks(range(7))
ax.set_yticklabels(["L", "M", "X", "J", "V", "S", "D"])
ax.set_xlabel("Hora del día")
ax.set_ylabel("Día de la semana")
ax.set_title("Volumen por día de la semana y hora")
plt.colorbar(im, ax=ax, label="Nº interacciones")
return ax
def plot_channel_distribution(self) -> Axes:
"""
Distribución de volumen por canal.
"""
series = self.volume_by_channel()
fig, ax = plt.subplots(figsize=(6, 4))
series.plot(kind="bar", ax=ax)
ax.set_xlabel("Canal")
ax.set_ylabel("Nº interacciones")
ax.set_title("Volumen por canal")
ax.grid(axis="y", alpha=0.3)
return ax
def plot_skill_pareto(self) -> Axes:
"""
Pareto simple de volumen por skill (solo barras de volumen).
"""
series = self.volume_by_skill()
fig, ax = plt.subplots(figsize=(10, 4))
series.plot(kind="bar", ax=ax)
ax.set_xlabel("Skill / Cola")
ax.set_ylabel("Nº interacciones")
ax.set_title("Pareto de volumen por skill")
ax.grid(axis="y", alpha=0.3)
plt.xticks(rotation=45, ha="right")
return ax