Initial commit: frontend + backend integration

This commit is contained in:
Ignacio
2025-12-29 18:12:32 +01:00
commit 2cd6d6b95c
146 changed files with 31503 additions and 0 deletions

View File

@@ -0,0 +1,55 @@
"""
beyond_metrics package
======================
Capa pública del sistema BeyondMetrics.
Expone:
- Dimensiones (Volumetría, Eficiencia, ...)
- Pipeline principal
- Conectores de IO (local, S3, ...)
"""
from .dimensions import (
VolumetriaMetrics,
OperationalPerformanceMetrics,
SatisfactionExperienceMetrics,
EconomyCostMetrics,
)
from .pipeline import (
BeyondMetricsPipeline,
build_pipeline,
load_dimensions_config, # opcional, pero útil
)
from .io import (
DataSource,
ResultsSink,
LocalDataSource,
LocalResultsSink,
S3DataSource,
S3ResultsSink,
# si has añadido GoogleDrive, puedes exponerlo aquí también:
# GoogleDriveDataSource,
# GoogleDriveResultsSink,
)
__all__ = [
# Dimensiones
"VolumetriaMetrics",
"OperationalPerformanceMetrics",
"SatisfactionExperienceMetrics",
"EconomyCostMetrics",
# Pipeline
"BeyondMetricsPipeline",
"build_pipeline",
"load_dimensions_config",
# IO
"DataSource",
"ResultsSink",
"LocalDataSource",
"LocalResultsSink",
"S3DataSource",
"S3ResultsSink",
# "GoogleDriveDataSource",
# "GoogleDriveResultsSink",
]

View File

@@ -0,0 +1,310 @@
from __future__ import annotations
import json
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Optional, Sequence
from reportlab.lib.pagesizes import A4
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader
from openai import OpenAI
DEFAULT_SYSTEM_PROMPT = (
"Eres un consultor experto en contact centers. "
"Vas a recibir resultados analíticos de un sistema de métricas "
"(BeyondMetrics) en formato JSON. Tu tarea es generar un informe claro, "
"accionable y orientado a negocio, destacando los principales hallazgos, "
"riesgos y oportunidades de mejora."
)
@dataclass
class ReportAgentConfig:
"""
Configuración básica del agente de informes.
openai_api_key:
Se puede pasar explícitamente o leer de la variable de entorno OPENAI_API_KEY.
model:
Modelo de ChatGPT a utilizar, p.ej. 'gpt-4.1-mini' o similar.
system_prompt:
Prompt de sistema para controlar el estilo del informe.
"""
openai_api_key: Optional[str] = None
model: str = "gpt-4.1-mini"
system_prompt: str = DEFAULT_SYSTEM_PROMPT
class BeyondMetricsReportAgent:
"""
Agente muy sencillo que:
1) Lee el JSON de resultados de una ejecución de BeyondMetrics.
2) Construye un prompt con esos resultados.
3) Llama a ChatGPT para generar un informe en texto.
4) Guarda el informe en un PDF en disco, EMBEBIENDO las imágenes PNG
generadas por el pipeline como anexos.
MVP: centrado en texto + figuras incrustadas.
"""
def __init__(self, config: Optional[ReportAgentConfig] = None) -> None:
self.config = config or ReportAgentConfig()
api_key = self.config.openai_api_key or os.getenv("OPENAI_API_KEY")
if not api_key:
raise RuntimeError(
"Falta la API key de OpenAI. "
"Pásala en ReportAgentConfig(openai_api_key=...) o "
"define la variable de entorno OPENAI_API_KEY."
)
# Cliente de la nueva API de OpenAI
self._client = OpenAI(api_key=api_key)
# ------------------------------------------------------------------
# API pública principal
# ------------------------------------------------------------------
def generate_pdf_report(
self,
run_base: str,
output_pdf_path: Optional[str] = None,
extra_user_prompt: str = "",
) -> str:
"""
Genera un informe en PDF a partir de una carpeta de resultados.
Parámetros:
- run_base:
Carpeta base de la ejecución. Debe contener al menos 'results.json'
y, opcionalmente, imágenes PNG generadas por el pipeline.
- output_pdf_path:
Ruta completa del PDF de salida. Si es None, se crea
'beyondmetrics_report.pdf' dentro de run_base.
- extra_user_prompt:
Texto adicional para afinar la petición al agente
(p.ej. "enfatiza eficiencia y SLA", etc.)
Devuelve:
- La ruta del PDF generado.
"""
run_dir = Path(run_base)
results_json = run_dir / "results.json"
if not results_json.exists():
raise FileNotFoundError(
f"No se ha encontrado {results_json}. "
"Asegúrate de ejecutar primero el pipeline."
)
# 1) Leer JSON de resultados
with results_json.open("r", encoding="utf-8") as f:
results_data: Dict[str, Any] = json.load(f)
# 2) Buscar imágenes generadas
image_files = sorted(p for p in run_dir.glob("*.png"))
# 3) Construir prompt de usuario
user_prompt = self._build_user_prompt(
results=results_data,
image_files=[p.name for p in image_files],
extra_user_prompt=extra_user_prompt,
)
# 4) Llamar a ChatGPT para obtener el texto del informe
report_text = self._call_chatgpt(user_prompt)
# 5) Crear PDF con texto + imágenes embebidas
if output_pdf_path is None:
output_pdf_path = str(run_dir / "beyondmetrics_report.pdf")
self._write_pdf(output_pdf_path, report_text, image_files)
return output_pdf_path
# ------------------------------------------------------------------
# Construcción del prompt
# ------------------------------------------------------------------
def _build_user_prompt(
self,
results: Dict[str, Any],
image_files: Sequence[str],
extra_user_prompt: str = "",
) -> str:
"""
Construye el mensaje de usuario que se enviará al modelo.
Para un MVP, serializamos el JSON de resultados entero.
Más adelante se puede resumir si el JSON crece demasiado.
"""
results_str = json.dumps(results, indent=2, ensure_ascii=False)
images_section = (
"Imágenes generadas en la ejecución:\n"
+ "\n".join(f"- {name}" for name in image_files)
if image_files
else "No se han generado imágenes en esta ejecución."
)
extra = (
f"\n\nInstrucciones adicionales del usuario:\n{extra_user_prompt}"
if extra_user_prompt
else ""
)
prompt = (
"A continuación te proporciono los resultados de una ejecución de BeyondMetrics "
"en formato JSON. Debes elaborar un INFORME EJECUTIVO para un cliente de "
"contact center. El informe debe incluir:\n"
"- Resumen ejecutivo en lenguaje de negocio.\n"
"- Principales hallazgos por dimensión.\n"
"- Riesgos o problemas detectados.\n"
"- Recomendaciones accionables.\n\n"
"Resultados (JSON):\n"
f"{results_str}\n\n"
f"{images_section}"
f"{extra}"
)
return prompt
# ------------------------------------------------------------------
# Llamada a ChatGPT (nueva API)
# ------------------------------------------------------------------
def _call_chatgpt(self, user_prompt: str) -> str:
"""
Llama al modelo de ChatGPT y devuelve el contenido del mensaje de respuesta.
Implementado con la nueva API de OpenAI.
"""
resp = self._client.chat.completions.create(
model=self.config.model,
messages=[
{"role": "system", "content": self.config.system_prompt},
{"role": "user", "content": user_prompt},
],
temperature=0.3,
)
content = resp.choices[0].message.content
if not isinstance(content, str):
raise RuntimeError("La respuesta del modelo no contiene texto.")
return content
# ------------------------------------------------------------------
# Escritura de PDF (texto + imágenes)
# ------------------------------------------------------------------
def _write_pdf(
self,
output_path: str,
text: str,
image_paths: Sequence[Path],
) -> None:
"""
Crea un PDF A4 con:
1) Texto del informe (páginas iniciales).
2) Una sección de anexos donde se incrustan las imágenes PNG
generadas por el pipeline, escaladas para encajar en la página.
"""
output_path = str(output_path)
c = canvas.Canvas(output_path, pagesize=A4)
width, height = A4
margin_x = 50
margin_y = 50
max_width = width - 2 * margin_x
line_height = 14
c.setFont("Helvetica", 11)
# --- Escribir texto principal ---
def _wrap_line(line: str, max_chars: int = 100) -> list[str]:
parts: list[str] = []
current: list[str] = []
count = 0
for word in line.split():
if count + len(word) + 1 > max_chars:
parts.append(" ".join(current))
current = [word]
count = len(word) + 1
else:
current.append(word)
count += len(word) + 1
if current:
parts.append(" ".join(current))
return parts
y = height - margin_y
for raw_line in text.splitlines():
wrapped_lines = _wrap_line(raw_line)
for line in wrapped_lines:
if y < margin_y:
c.showPage()
c.setFont("Helvetica", 11)
y = height - margin_y
c.drawString(margin_x, y, line)
y -= line_height
# --- Anexar imágenes como figuras ---
if image_paths:
# Nueva página para las figuras
c.showPage()
c.setFont("Helvetica-Bold", 14)
c.drawString(margin_x, height - margin_y, "Anexo: Figuras")
c.setFont("Helvetica", 11)
current_y = height - margin_y - 2 * line_height
for img_path in image_paths:
# Si no cabe la imagen en la página, pasamos a la siguiente
available_height = current_y - margin_y
if available_height < 100: # espacio mínimo
c.showPage()
c.setFont("Helvetica-Bold", 14)
c.drawString(margin_x, height - margin_y, "Anexo: Figuras (cont.)")
c.setFont("Helvetica", 11)
current_y = height - margin_y - 2 * line_height
available_height = current_y - margin_y
# Título de la figura
title = f"Figura: {img_path.name}"
c.drawString(margin_x, current_y, title)
current_y -= line_height
# Cargar imagen y escalarla
try:
img = ImageReader(str(img_path))
iw, ih = img.getSize()
# Escala para encajar en ancho y alto disponibles
max_img_height = available_height - 2 * line_height
scale = min(max_width / iw, max_img_height / ih)
if scale <= 0:
scale = 1.0 # fallback
draw_w = iw * scale
draw_h = ih * scale
x = margin_x
y_img = current_y - draw_h
c.drawImage(
img,
x,
y_img,
width=draw_w,
height=draw_h,
preserveAspectRatio=True,
mask="auto",
)
current_y = y_img - 2 * line_height
except Exception as e:
# Si falla la carga, lo indicamos en el PDF
err_msg = f"No se pudo cargar la imagen {img_path.name}: {e}"
c.drawString(margin_x, current_y, err_msg)
current_y -= 2 * line_height
c.save()

View File

@@ -0,0 +1,27 @@
{
"dimensions": {
"volumetry": {
"class": "beyond_metrics.VolumetriaMetrics",
"enabled": true,
"metrics": [
"volume_by_channel",
"volume_by_skill"
]
},
"operational_performance": {
"class": "beyond_metrics.dimensions.OperationalPerformance.OperationalPerformanceMetrics",
"enabled": false,
"metrics": []
},
"customer_satisfaction": {
"class": "beyond_metrics.dimensions.SatisfactionExperience.SatisfactionExperienceMetrics",
"enabled": false,
"metrics": []
},
"economy_costs": {
"class": "beyond_metrics.dimensions.EconomyCost.EconomyCostMetrics",
"enabled": false,
"metrics": []
}
}
}

View File

@@ -0,0 +1,55 @@
{
"dimensions": {
"volumetry": {
"class": "beyond_metrics.VolumetriaMetrics",
"enabled": true,
"metrics": [
"volume_by_channel",
"volume_by_skill",
"channel_distribution_pct",
"skill_distribution_pct",
"heatmap_24x7",
"monthly_seasonality_cv",
"peak_offpeak_ratio",
"concentration_top20_skills_pct"
]
},
"operational_performance": {
"class": "beyond_metrics.dimensions.OperationalPerformance.OperationalPerformanceMetrics",
"enabled": true,
"metrics": [
"aht_distribution",
"talk_hold_acw_p50_by_skill",
"fcr_rate",
"escalation_rate",
"abandonment_rate",
"recurrence_rate_7d",
"repeat_channel_rate",
"occupancy_rate",
"performance_score"
]
},
"customer_satisfaction": {
"class": "beyond_metrics.dimensions.SatisfactionExperience.SatisfactionExperienceMetrics",
"enabled": true,
"metrics": [
"csat_avg_by_skill_channel",
"nps_avg_by_skill_channel",
"ces_avg_by_skill_channel",
"csat_aht_correlation",
"csat_aht_skill_summary"
]
},
"economy_costs": {
"class": "beyond_metrics.dimensions.EconomyCost.EconomyCostMetrics",
"enabled": true,
"metrics": [
"cpi_by_skill_channel",
"annual_cost_by_skill_channel",
"cost_breakdown",
"inefficiency_cost_by_skill_channel",
"potential_savings"
]
}
}
}

View File

@@ -0,0 +1,441 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict, List, Optional, Any
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.axes import Axes
REQUIRED_COLUMNS_ECON: List[str] = [
"interaction_id",
"datetime_start",
"queue_skill",
"channel",
"duration_talk",
"hold_time",
"wrap_up_time",
]
@dataclass
class EconomyConfig:
"""
Parámetros manuales para la dimensión de Economía y Costes.
- labor_cost_per_hour: coste total/hora de un agente (fully loaded).
- overhead_rate: % overhead variable (ej. 0.1 = 10% sobre labor).
- tech_costs_annual: coste anual de tecnología (licencias, infra, ...).
- automation_cpi: coste por interacción automatizada (ej. 0.15€).
- automation_volume_share: % del volumen automatizable (0-1).
- automation_success_rate: % éxito de la automatización (0-1).
- customer_segments: mapping opcional skill -> segmento ("high"/"medium"/"low")
para futuros insights de ROI por segmento.
"""
labor_cost_per_hour: float
overhead_rate: float = 0.0
tech_costs_annual: float = 0.0
automation_cpi: Optional[float] = None
automation_volume_share: float = 0.0
automation_success_rate: float = 0.0
customer_segments: Optional[Dict[str, str]] = None
@dataclass
class EconomyCostMetrics:
"""
DIMENSIÓN 4: ECONOMÍA y COSTES
Propósito:
- Cuantificar el COSTE actual (CPI, coste anual).
- Estimar el impacto de overhead y tecnología.
- Calcular un primer estimado de "coste de ineficiencia" y ahorro potencial.
Requiere:
- Columnas del dataset transaccional (ver REQUIRED_COLUMNS_ECON).
Inputs opcionales vía EconomyConfig:
- labor_cost_per_hour (obligatorio para cualquier cálculo de €).
- overhead_rate, tech_costs_annual, automation_*.
- customer_segments (para insights de ROI por segmento).
"""
df: pd.DataFrame
config: Optional[EconomyConfig] = None
def __post_init__(self) -> None:
self._validate_columns()
self._prepare_data()
# ------------------------------------------------------------------ #
# Helpers internos
# ------------------------------------------------------------------ #
def _validate_columns(self) -> None:
missing = [c for c in REQUIRED_COLUMNS_ECON if c not in self.df.columns]
if missing:
raise ValueError(
f"Faltan columnas obligatorias para EconomyCostMetrics: {missing}"
)
def _prepare_data(self) -> None:
df = self.df.copy()
df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
for col in ["duration_talk", "hold_time", "wrap_up_time"]:
df[col] = pd.to_numeric(df[col], errors="coerce")
df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
df["channel"] = df["channel"].astype(str).str.strip()
# Handle time = talk + hold + wrap
df["handle_time"] = (
df["duration_talk"].fillna(0)
+ df["hold_time"].fillna(0)
+ df["wrap_up_time"].fillna(0)
) # segundos
self.df = df
@property
def is_empty(self) -> bool:
return self.df.empty
def _has_cost_config(self) -> bool:
return self.config is not None and self.config.labor_cost_per_hour is not None
# ------------------------------------------------------------------ #
# KPI 1: CPI por canal/skill
# ------------------------------------------------------------------ #
def cpi_by_skill_channel(self) -> pd.DataFrame:
"""
CPI (Coste Por Interacción) por skill/canal.
CPI = Labor_cost_per_interaction + Overhead_variable
- Labor_cost_per_interaction = (labor_cost_per_hour * AHT_hours)
- Overhead_variable = overhead_rate * Labor_cost_per_interaction
Si no hay config de costes -> devuelve DataFrame vacío.
"""
if not self._has_cost_config():
return pd.DataFrame()
cfg = self.config
assert cfg is not None # para el type checker
df = self.df.copy()
if df.empty:
return pd.DataFrame()
# AHT por skill/canal (en segundos)
grouped = df.groupby(["queue_skill", "channel"])["handle_time"].mean()
if grouped.empty:
return pd.DataFrame()
aht_sec = grouped
aht_hours = aht_sec / 3600.0
labor_cost = cfg.labor_cost_per_hour * aht_hours
overhead = labor_cost * cfg.overhead_rate
cpi = labor_cost + overhead
out = pd.DataFrame(
{
"aht_seconds": aht_sec.round(2),
"labor_cost": labor_cost.round(4),
"overhead_cost": overhead.round(4),
"cpi_total": cpi.round(4),
}
)
return out.sort_index()
# ------------------------------------------------------------------ #
# KPI 2: coste anual por skill/canal
# ------------------------------------------------------------------ #
def annual_cost_by_skill_channel(self) -> pd.DataFrame:
"""
Coste anual por skill/canal.
cost_annual = CPI * volumen (cantidad de interacciones de la muestra).
Nota: por simplicidad asumimos que el dataset refleja un periodo anual.
Si en el futuro quieres anualizar (ej. dataset = 1 mes) se puede añadir
un factor de escalado en EconomyConfig.
"""
cpi_table = self.cpi_by_skill_channel()
if cpi_table.empty:
return pd.DataFrame()
df = self.df.copy()
volume = (
df.groupby(["queue_skill", "channel"])["interaction_id"]
.nunique()
.rename("volume")
)
joined = cpi_table.join(volume, how="left").fillna({"volume": 0})
joined["annual_cost"] = (joined["cpi_total"] * joined["volume"]).round(2)
return joined
# ------------------------------------------------------------------ #
# KPI 3: desglose de costes (labor / tech / overhead)
# ------------------------------------------------------------------ #
def cost_breakdown(self) -> Dict[str, float]:
"""
Desglose % de costes: labor, overhead, tech.
labor_total = sum(labor_cost_per_interaction)
overhead_total = labor_total * overhead_rate
tech_total = tech_costs_annual (si se ha proporcionado)
Devuelve porcentajes sobre el total.
Si falta configuración de coste -> devuelve {}.
"""
if not self._has_cost_config():
return {}
cfg = self.config
assert cfg is not None
cpi_table = self.cpi_by_skill_channel()
if cpi_table.empty:
return {}
df = self.df.copy()
volume = (
df.groupby(["queue_skill", "channel"])["interaction_id"]
.nunique()
.rename("volume")
)
joined = cpi_table.join(volume, how="left").fillna({"volume": 0})
# Costes anuales de labor y overhead
annual_labor = (joined["labor_cost"] * joined["volume"]).sum()
annual_overhead = (joined["overhead_cost"] * joined["volume"]).sum()
annual_tech = cfg.tech_costs_annual
total = annual_labor + annual_overhead + annual_tech
if total <= 0:
return {}
return {
"labor_pct": round(annual_labor / total * 100, 2),
"overhead_pct": round(annual_overhead / total * 100, 2),
"tech_pct": round(annual_tech / total * 100, 2),
"labor_annual": round(annual_labor, 2),
"overhead_annual": round(annual_overhead, 2),
"tech_annual": round(annual_tech, 2),
"total_annual": round(total, 2),
}
# ------------------------------------------------------------------ #
# KPI 4: coste de ineficiencia (€ por variabilidad/escalación)
# ------------------------------------------------------------------ #
def inefficiency_cost_by_skill_channel(self) -> pd.DataFrame:
"""
Estimación muy simplificada de coste de ineficiencia:
Para cada skill/canal:
- AHT_p50, AHT_p90 (segundos).
- Delta = max(0, AHT_p90 - AHT_p50).
- Se asume que ~40% de las interacciones están por encima de la mediana.
- Ineff_seconds = Delta * volume * 0.4
- Ineff_cost = LaborCPI_per_second * Ineff_seconds
⚠️ Es un modelo aproximado para cuantificar "orden de magnitud".
"""
if not self._has_cost_config():
return pd.DataFrame()
cfg = self.config
assert cfg is not None
df = self.df.copy()
grouped = df.groupby(["queue_skill", "channel"])
stats = grouped["handle_time"].agg(
aht_p50=lambda s: float(np.percentile(s.dropna(), 50)),
aht_p90=lambda s: float(np.percentile(s.dropna(), 90)),
volume="count",
)
if stats.empty:
return pd.DataFrame()
# CPI para obtener coste/segundo de labor
cpi_table = self.cpi_by_skill_channel()
if cpi_table.empty:
return pd.DataFrame()
merged = stats.join(cpi_table[["labor_cost"]], how="left")
merged = merged.fillna(0.0)
delta = (merged["aht_p90"] - merged["aht_p50"]).clip(lower=0.0)
affected_fraction = 0.4 # aproximación
ineff_seconds = delta * merged["volume"] * affected_fraction
# labor_cost = coste por interacción con AHT medio;
# aproximamos coste/segundo como labor_cost / AHT_medio
aht_mean = grouped["handle_time"].mean()
merged["aht_mean"] = aht_mean
cost_per_second = merged["labor_cost"] / merged["aht_mean"].replace(0, np.nan)
cost_per_second = cost_per_second.fillna(0.0)
ineff_cost = (ineff_seconds * cost_per_second).round(2)
merged["ineff_seconds"] = ineff_seconds.round(2)
merged["ineff_cost"] = ineff_cost
return merged[["aht_p50", "aht_p90", "volume", "ineff_seconds", "ineff_cost"]]
# ------------------------------------------------------------------ #
# KPI 5: ahorro potencial anual por automatización
# ------------------------------------------------------------------ #
def potential_savings(self) -> Dict[str, Any]:
"""
Ahorro potencial anual basado en:
Ahorro = (CPI_humano - CPI_automatizado) * Volumen_automatizable * Tasa_éxito
Donde:
- CPI_humano = media ponderada de cpi_total.
- CPI_automatizado = config.automation_cpi
- Volumen_automatizable = volume_total * automation_volume_share
- Tasa_éxito = automation_success_rate
Si faltan parámetros en config -> devuelve {}.
"""
if not self._has_cost_config():
return {}
cfg = self.config
assert cfg is not None
if cfg.automation_cpi is None or cfg.automation_volume_share <= 0 or cfg.automation_success_rate <= 0:
return {}
cpi_table = self.annual_cost_by_skill_channel()
if cpi_table.empty:
return {}
total_volume = cpi_table["volume"].sum()
if total_volume <= 0:
return {}
# CPI humano medio ponderado
weighted_cpi = (
(cpi_table["cpi_total"] * cpi_table["volume"]).sum() / total_volume
)
volume_automatizable = total_volume * cfg.automation_volume_share
effective_volume = volume_automatizable * cfg.automation_success_rate
delta_cpi = max(0.0, weighted_cpi - cfg.automation_cpi)
annual_savings = delta_cpi * effective_volume
return {
"cpi_humano": round(weighted_cpi, 4),
"cpi_automatizado": round(cfg.automation_cpi, 4),
"volume_total": float(total_volume),
"volume_automatizable": float(volume_automatizable),
"effective_volume": float(effective_volume),
"annual_savings": round(annual_savings, 2),
}
# ------------------------------------------------------------------ #
# PLOTS
# ------------------------------------------------------------------ #
def plot_cost_waterfall(self) -> Axes:
"""
Waterfall de costes anuales (labor + tech + overhead).
"""
breakdown = self.cost_breakdown()
if not breakdown:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin configuración de costes", ha="center", va="center")
ax.set_axis_off()
return ax
labels = ["Labor", "Overhead", "Tech"]
values = [
breakdown["labor_annual"],
breakdown["overhead_annual"],
breakdown["tech_annual"],
]
fig, ax = plt.subplots(figsize=(8, 4))
running = 0.0
positions = []
bottoms = []
for v in values:
positions.append(running)
bottoms.append(running)
running += v
# barras estilo waterfall
x = np.arange(len(labels))
ax.bar(x, values)
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.set_ylabel("€ anuales")
ax.set_title("Desglose anual de costes")
for idx, v in enumerate(values):
ax.text(idx, v, f"{v:,.0f}", ha="center", va="bottom")
ax.grid(axis="y", alpha=0.3)
return ax
def plot_cpi_by_channel(self) -> Axes:
"""
Gráfico de barras de CPI medio por canal.
"""
cpi_table = self.cpi_by_skill_channel()
if cpi_table.empty:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin configuración de costes", ha="center", va="center")
ax.set_axis_off()
return ax
df = self.df.copy()
volume = (
df.groupby(["queue_skill", "channel"])["interaction_id"]
.nunique()
.rename("volume")
)
joined = cpi_table.join(volume, how="left").fillna({"volume": 0})
# CPI medio ponderado por canal
per_channel = (
joined.reset_index()
.groupby("channel")
.apply(lambda g: (g["cpi_total"] * g["volume"]).sum() / max(g["volume"].sum(), 1))
.rename("cpi_mean")
.round(4)
)
fig, ax = plt.subplots(figsize=(6, 4))
per_channel.plot(kind="bar", ax=ax)
ax.set_xlabel("Canal")
ax.set_ylabel("CPI medio (€)")
ax.set_title("Coste por interacción (CPI) por canal")
ax.grid(axis="y", alpha=0.3)
return ax

View File

@@ -0,0 +1,481 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict, List
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.axes import Axes
REQUIRED_COLUMNS_OP: List[str] = [
"interaction_id",
"datetime_start",
"queue_skill",
"channel",
"duration_talk",
"hold_time",
"wrap_up_time",
"agent_id",
"transfer_flag",
]
@dataclass
class OperationalPerformanceMetrics:
"""
Dimensión: RENDIMIENTO OPERACIONAL Y DE SERVICIO
Propósito: medir el balance entre rapidez (eficiencia) y calidad de resolución,
más la variabilidad del servicio.
Requiere como mínimo:
- interaction_id
- datetime_start
- queue_skill
- channel
- duration_talk (segundos)
- hold_time (segundos)
- wrap_up_time (segundos)
- agent_id
- transfer_flag (bool/int)
Columnas opcionales:
- is_resolved (bool/int) -> para FCR
- abandoned_flag (bool/int) -> para tasa de abandono
- customer_id / caller_id -> para reincidencia y repetición de canal
- logged_time (segundos) -> para occupancy_rate
"""
df: pd.DataFrame
# Benchmarks / parámetros de normalización (puedes ajustarlos)
AHT_GOOD: float = 300.0 # 5 min
AHT_BAD: float = 900.0 # 15 min
VAR_RATIO_GOOD: float = 1.2 # P90/P50 ~1.2 muy estable
VAR_RATIO_BAD: float = 3.0 # P90/P50 >=3 muy inestable
def __post_init__(self) -> None:
self._validate_columns()
self._prepare_data()
# ------------------------------------------------------------------ #
# Helpers internos
# ------------------------------------------------------------------ #
def _validate_columns(self) -> None:
missing = [c for c in REQUIRED_COLUMNS_OP if c not in self.df.columns]
if missing:
raise ValueError(
f"Faltan columnas obligatorias para OperationalPerformanceMetrics: {missing}"
)
def _prepare_data(self) -> None:
df = self.df.copy()
# Tipos
df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
for col in ["duration_talk", "hold_time", "wrap_up_time"]:
df[col] = pd.to_numeric(df[col], errors="coerce")
# Handle Time
df["handle_time"] = (
df["duration_talk"].fillna(0)
+ df["hold_time"].fillna(0)
+ df["wrap_up_time"].fillna(0)
)
# Normalización básica
df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
df["channel"] = df["channel"].astype(str).str.strip()
df["agent_id"] = df["agent_id"].astype(str).str.strip()
# Flags opcionales convertidos a bool cuando existan
for flag_col in ["is_resolved", "abandoned_flag", "transfer_flag"]:
if flag_col in df.columns:
df[flag_col] = df[flag_col].astype(int).astype(bool)
# customer_id: usamos customer_id si existe, si no caller_id
if "customer_id" in df.columns:
df["customer_id"] = df["customer_id"].astype(str)
elif "caller_id" in df.columns:
df["customer_id"] = df["caller_id"].astype(str)
else:
df["customer_id"] = None
# logged_time opcional
# Normalizamos logged_time: siempre será una serie float con NaN si no existe
df["logged_time"] = pd.to_numeric(df.get("logged_time", np.nan), errors="coerce")
self.df = df
@property
def is_empty(self) -> bool:
return self.df.empty
# ------------------------------------------------------------------ #
# AHT y variabilidad
# ------------------------------------------------------------------ #
def aht_distribution(self) -> Dict[str, float]:
"""
Devuelve P10, P50, P90 del AHT y el ratio P90/P50 como medida de variabilidad.
"""
ht = self.df["handle_time"].dropna().astype(float)
if ht.empty:
return {}
p10 = float(np.percentile(ht, 10))
p50 = float(np.percentile(ht, 50))
p90 = float(np.percentile(ht, 90))
ratio = float(p90 / p50) if p50 > 0 else float("nan")
return {
"p10": round(p10, 2),
"p50": round(p50, 2),
"p90": round(p90, 2),
"p90_p50_ratio": round(ratio, 3),
}
def talk_hold_acw_p50_by_skill(self) -> pd.DataFrame:
"""
P50 de talk_time, hold_time y wrap_up_time por skill.
"""
df = self.df
def perc(s: pd.Series, q: float) -> float:
s = s.dropna().astype(float)
if s.empty:
return float("nan")
return float(np.percentile(s, q))
grouped = df.groupby("queue_skill")
result = pd.DataFrame(
{
"talk_p50": grouped["duration_talk"].apply(lambda s: perc(s, 50)),
"hold_p50": grouped["hold_time"].apply(lambda s: perc(s, 50)),
"acw_p50": grouped["wrap_up_time"].apply(lambda s: perc(s, 50)),
}
)
return result.round(2).sort_index()
# ------------------------------------------------------------------ #
# FCR, escalación, abandono, reincidencia, repetición canal
# ------------------------------------------------------------------ #
def fcr_rate(self) -> float:
"""
FCR = % de interacciones resueltas en el primer contacto.
Definido como % de filas con is_resolved == True.
Si la columna no existe, devuelve NaN.
"""
df = self.df
if "is_resolved" not in df.columns:
return float("nan")
total = len(df)
if total == 0:
return float("nan")
resolved = df["is_resolved"].sum()
return float(round(resolved / total * 100, 2))
def escalation_rate(self) -> float:
"""
% de interacciones que requieren escalación (transfer_flag == True).
"""
df = self.df
total = len(df)
if total == 0:
return float("nan")
escalated = df["transfer_flag"].sum()
return float(round(escalated / total * 100, 2))
def abandonment_rate(self) -> float:
"""
% de interacciones abandonadas.
Definido como % de filas con abandoned_flag == True.
Si la columna no existe, devuelve NaN.
"""
df = self.df
if "abandoned_flag" not in df.columns:
return float("nan")
total = len(df)
if total == 0:
return float("nan")
abandoned = df["abandoned_flag"].sum()
return float(round(abandoned / total * 100, 2))
def recurrence_rate_7d(self) -> float:
"""
% de clientes que vuelven a contactar en < 7 días.
Se basa en customer_id (o caller_id si no hay customer_id).
Calcula:
- Para cada cliente, ordena por datetime_start
- Si hay dos contactos consecutivos separados < 7 días, cuenta como "recurrente"
- Tasa = nº clientes recurrentes / nº total de clientes
"""
df = self.df.dropna(subset=["datetime_start"]).copy()
if df["customer_id"].isna().all():
return float("nan")
customers = df["customer_id"].dropna().unique()
if len(customers) == 0:
return float("nan")
recurrent_customers = 0
for cust in customers:
sub = df[df["customer_id"] == cust].sort_values("datetime_start")
if len(sub) < 2:
continue
deltas = sub["datetime_start"].diff().dropna()
if (deltas < pd.Timedelta(days=7)).any():
recurrent_customers += 1
if len(customers) == 0:
return float("nan")
return float(round(recurrent_customers / len(customers) * 100, 2))
def repeat_channel_rate(self) -> float:
"""
% de reincidencias (<7 días) en las que el cliente usa el MISMO canal.
Si no hay customer_id/caller_id o solo un contacto por cliente, devuelve NaN.
"""
df = self.df.dropna(subset=["datetime_start"]).copy()
if df["customer_id"].isna().all():
return float("nan")
df = df.sort_values(["customer_id", "datetime_start"])
df["next_customer"] = df["customer_id"].shift(-1)
df["next_datetime"] = df["datetime_start"].shift(-1)
df["next_channel"] = df["channel"].shift(-1)
same_customer = df["customer_id"] == df["next_customer"]
within_7d = (df["next_datetime"] - df["datetime_start"]) < pd.Timedelta(days=7)
recurrent_mask = same_customer & within_7d
if not recurrent_mask.any():
return float("nan")
same_channel = df["channel"] == df["next_channel"]
same_channel_recurrent = (recurrent_mask & same_channel).sum()
total_recurrent = recurrent_mask.sum()
return float(round(same_channel_recurrent / total_recurrent * 100, 2))
# ------------------------------------------------------------------ #
# Occupancy
# ------------------------------------------------------------------ #
def occupancy_rate(self) -> float:
"""
Tasa de ocupación:
occupancy = sum(handle_time) / sum(logged_time) * 100.
Requiere columna 'logged_time'. Si no existe o es todo 0, devuelve NaN.
"""
df = self.df
if "logged_time" not in df.columns:
return float("nan")
logged = df["logged_time"].fillna(0)
handle = df["handle_time"].fillna(0)
total_logged = logged.sum()
if total_logged == 0:
return float("nan")
occ = handle.sum() / total_logged
return float(round(occ * 100, 2))
# ------------------------------------------------------------------ #
# Score de rendimiento 0-10
# ------------------------------------------------------------------ #
def performance_score(self) -> Dict[str, float]:
"""
Calcula un score 0-10 combinando:
- AHT (bajo es mejor)
- FCR (alto es mejor)
- Variabilidad (P90/P50, bajo es mejor)
- Otros factores (ocupación / escalación)
Fórmula:
score = 0.4 * (10 - AHT_norm) +
0.3 * FCR_norm +
0.2 * (10 - Var_norm) +
0.1 * Otros_score
Donde *_norm son valores en escala 0-10.
"""
dist = self.aht_distribution()
if not dist:
return {"score": float("nan")}
p50 = dist["p50"]
ratio = dist["p90_p50_ratio"]
# AHT_normalized: 0 (mejor) a 10 (peor)
aht_norm = self._scale_to_0_10(p50, self.AHT_GOOD, self.AHT_BAD)
# FCR_normalized: 0-10 directamente desde % (0-100)
fcr_pct = self.fcr_rate()
fcr_norm = fcr_pct / 10.0 if not np.isnan(fcr_pct) else 0.0
# Variabilidad_normalized: 0 (ratio bueno) a 10 (ratio malo)
var_norm = self._scale_to_0_10(ratio, self.VAR_RATIO_GOOD, self.VAR_RATIO_BAD)
# Otros factores: combinamos ocupación (ideal ~80%) y escalación (ideal baja)
occ = self.occupancy_rate()
esc = self.escalation_rate()
other_score = self._compute_other_factors_score(occ, esc)
score = (
0.4 * (10.0 - aht_norm)
+ 0.3 * fcr_norm
+ 0.2 * (10.0 - var_norm)
+ 0.1 * other_score
)
# Clamp 0-10
score = max(0.0, min(10.0, score))
return {
"score": round(score, 2),
"aht_norm": round(aht_norm, 2),
"fcr_norm": round(fcr_norm, 2),
"var_norm": round(var_norm, 2),
"other_score": round(other_score, 2),
}
def _scale_to_0_10(self, value: float, good: float, bad: float) -> float:
"""
Escala linealmente un valor:
- good -> 0
- bad -> 10
Con saturación fuera de rango.
"""
if np.isnan(value):
return 5.0 # neutro
if good == bad:
return 5.0
if good < bad:
# Menor es mejor
if value <= good:
return 0.0
if value >= bad:
return 10.0
return 10.0 * (value - good) / (bad - good)
else:
# Mayor es mejor
if value >= good:
return 0.0
if value <= bad:
return 10.0
return 10.0 * (good - value) / (good - bad)
def _compute_other_factors_score(self, occ_pct: float, esc_pct: float) -> float:
"""
Otros factores (0-10) basados en:
- ocupación ideal alrededor de 80%
- tasa de escalación ideal baja (<10%)
"""
# Ocupación: 0 penalización si está entre 75-85, se penaliza fuera
if np.isnan(occ_pct):
occ_penalty = 5.0
else:
deviation = abs(occ_pct - 80.0)
occ_penalty = min(10.0, deviation / 5.0 * 2.0) # cada 5 puntos se suman 2, máx 10
occ_score = max(0.0, 10.0 - occ_penalty)
# Escalación: 0-10 donde 0% -> 10 puntos, >=40% -> 0
if np.isnan(esc_pct):
esc_score = 5.0
else:
if esc_pct <= 0:
esc_score = 10.0
elif esc_pct >= 40:
esc_score = 0.0
else:
esc_score = 10.0 * (1.0 - esc_pct / 40.0)
# Media simple de ambos
return (occ_score + esc_score) / 2.0
# ------------------------------------------------------------------ #
# Plots
# ------------------------------------------------------------------ #
def plot_aht_boxplot_by_skill(self) -> Axes:
"""
Boxplot del AHT por skill (P10-P50-P90 visual).
"""
df = self.df.copy()
if df.empty or "handle_time" not in df.columns:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin datos de AHT", ha="center", va="center")
ax.set_axis_off()
return ax
df = df.dropna(subset=["handle_time"])
if df.empty:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "AHT no disponible", ha="center", va="center")
ax.set_axis_off()
return ax
fig, ax = plt.subplots(figsize=(8, 4))
df.boxplot(column="handle_time", by="queue_skill", ax=ax, showfliers=False)
ax.set_xlabel("Skill / Cola")
ax.set_ylabel("AHT (segundos)")
ax.set_title("Distribución de AHT por skill")
plt.suptitle("")
plt.xticks(rotation=45, ha="right")
ax.grid(axis="y", alpha=0.3)
return ax
def plot_resolution_funnel_by_skill(self) -> Axes:
"""
Funnel / barras apiladas de Talk + Hold + ACW por skill (P50).
Permite ver el equilibrio de tiempos por skill.
"""
p50 = self.talk_hold_acw_p50_by_skill()
if p50.empty:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin datos para funnel", ha="center", va="center")
ax.set_axis_off()
return ax
fig, ax = plt.subplots(figsize=(10, 4))
skills = p50.index
talk = p50["talk_p50"]
hold = p50["hold_p50"]
acw = p50["acw_p50"]
x = np.arange(len(skills))
ax.bar(x, talk, label="Talk P50")
ax.bar(x, hold, bottom=talk, label="Hold P50")
ax.bar(x, acw, bottom=talk + hold, label="ACW P50")
ax.set_xticks(x)
ax.set_xticklabels(skills, rotation=45, ha="right")
ax.set_ylabel("Segundos")
ax.set_title("Funnel de resolución (P50) por skill")
ax.legend()
ax.grid(axis="y", alpha=0.3)
return ax

View File

@@ -0,0 +1,298 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict, List, Any
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.axes import Axes
# Solo columnas del dataset “core”
REQUIRED_COLUMNS_SAT: List[str] = [
"interaction_id",
"datetime_start",
"queue_skill",
"channel",
"duration_talk",
"hold_time",
"wrap_up_time",
]
@dataclass
class SatisfactionExperienceMetrics:
"""
Dimensión 3: SATISFACCIÓN y EXPERIENCIA
Todas las columnas de satisfacción (csat/nps/ces/aht) son OPCIONALES.
Si no están, las métricas que las usan devuelven vacío/NaN pero
nunca rompen el pipeline.
"""
df: pd.DataFrame
def __post_init__(self) -> None:
self._validate_columns()
self._prepare_data()
# ------------------------------------------------------------------ #
# Helpers
# ------------------------------------------------------------------ #
def _validate_columns(self) -> None:
missing = [c for c in REQUIRED_COLUMNS_SAT if c not in self.df.columns]
if missing:
raise ValueError(
f"Faltan columnas obligatorias para SatisfactionExperienceMetrics: {missing}"
)
def _prepare_data(self) -> None:
df = self.df.copy()
df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
# Duraciones base siempre existen
for col in ["duration_talk", "hold_time", "wrap_up_time"]:
df[col] = pd.to_numeric(df[col], errors="coerce")
# Handle time
df["handle_time"] = (
df["duration_talk"].fillna(0)
+ df["hold_time"].fillna(0)
+ df["wrap_up_time"].fillna(0)
)
# csat_score opcional
df["csat_score"] = pd.to_numeric(df.get("csat_score", np.nan), errors="coerce")
# aht opcional: si existe columna explícita la usamos, si no usamos handle_time
if "aht" in df.columns:
df["aht"] = pd.to_numeric(df["aht"], errors="coerce")
else:
df["aht"] = df["handle_time"]
# NPS / CES opcionales
df["nps_score"] = pd.to_numeric(df.get("nps_score", np.nan), errors="coerce")
df["ces_score"] = pd.to_numeric(df.get("ces_score", np.nan), errors="coerce")
df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
df["channel"] = df["channel"].astype(str).str.strip()
self.df = df
@property
def is_empty(self) -> bool:
return self.df.empty
# ------------------------------------------------------------------ #
# KPIs
# ------------------------------------------------------------------ #
def csat_avg_by_skill_channel(self) -> pd.DataFrame:
"""
CSAT promedio por skill/canal.
Si no hay csat_score, devuelve DataFrame vacío.
"""
df = self.df
if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0:
return pd.DataFrame()
df = df.dropna(subset=["csat_score"])
if df.empty:
return pd.DataFrame()
pivot = (
df.pivot_table(
index="queue_skill",
columns="channel",
values="csat_score",
aggfunc="mean",
)
.sort_index()
.round(2)
)
return pivot
def nps_avg_by_skill_channel(self) -> pd.DataFrame:
"""
NPS medio por skill/canal, si existe nps_score.
"""
df = self.df
if "nps_score" not in df.columns or df["nps_score"].notna().sum() == 0:
return pd.DataFrame()
df = df.dropna(subset=["nps_score"])
if df.empty:
return pd.DataFrame()
pivot = (
df.pivot_table(
index="queue_skill",
columns="channel",
values="nps_score",
aggfunc="mean",
)
.sort_index()
.round(2)
)
return pivot
def ces_avg_by_skill_channel(self) -> pd.DataFrame:
"""
CES medio por skill/canal, si existe ces_score.
"""
df = self.df
if "ces_score" not in df.columns or df["ces_score"].notna().sum() == 0:
return pd.DataFrame()
df = df.dropna(subset=["ces_score"])
if df.empty:
return pd.DataFrame()
pivot = (
df.pivot_table(
index="queue_skill",
columns="channel",
values="ces_score",
aggfunc="mean",
)
.sort_index()
.round(2)
)
return pivot
def csat_aht_correlation(self) -> Dict[str, Any]:
"""
Correlación Pearson CSAT vs AHT.
Si falta csat o aht, o no hay varianza, devuelve NaN y código adecuado.
"""
df = self.df
if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0:
return {"r": float("nan"), "n": 0.0, "interpretation_code": "sin_datos"}
if "aht" not in df.columns or df["aht"].notna().sum() == 0:
return {"r": float("nan"), "n": 0.0, "interpretation_code": "sin_datos"}
df = df.dropna(subset=["csat_score", "aht"]).copy()
n = len(df)
if n < 2:
return {"r": float("nan"), "n": float(n), "interpretation_code": "insuficiente"}
x = df["aht"].astype(float)
y = df["csat_score"].astype(float)
if x.std(ddof=1) == 0 or y.std(ddof=1) == 0:
return {"r": float("nan"), "n": float(n), "interpretation_code": "sin_varianza"}
r = float(np.corrcoef(x, y)[0, 1])
if r < -0.3:
interpretation = "negativo"
elif r > 0.3:
interpretation = "positivo"
else:
interpretation = "neutral"
return {"r": round(r, 3), "n": float(n), "interpretation_code": interpretation}
def csat_aht_skill_summary(self) -> pd.DataFrame:
"""
Resumen por skill con clasificación del "sweet spot".
Si falta csat o aht, devuelve DataFrame vacío.
"""
df = self.df
if df["csat_score"].notna().sum() == 0 or df["aht"].notna().sum() == 0:
return pd.DataFrame(columns=["csat_avg", "aht_avg", "classification"])
df = df.dropna(subset=["csat_score", "aht"]).copy()
if df.empty:
return pd.DataFrame(columns=["csat_avg", "aht_avg", "classification"])
grouped = df.groupby("queue_skill").agg(
csat_avg=("csat_score", "mean"),
aht_avg=("aht", "mean"),
)
aht_all = df["aht"].astype(float)
csat_all = df["csat_score"].astype(float)
aht_p40 = float(np.percentile(aht_all, 40))
aht_p60 = float(np.percentile(aht_all, 60))
csat_p40 = float(np.percentile(csat_all, 40))
csat_p60 = float(np.percentile(csat_all, 60))
def classify(row) -> str:
csat = row["csat_avg"]
aht = row["aht_avg"]
if aht <= aht_p40 and csat >= csat_p60:
return "ideal_automatizar"
if aht >= aht_p60 and csat >= csat_p40:
return "requiere_humano"
return "neutral"
grouped["classification"] = grouped.apply(classify, axis=1)
return grouped.round({"csat_avg": 2, "aht_avg": 2})
# ------------------------------------------------------------------ #
# Plots
# ------------------------------------------------------------------ #
def plot_csat_vs_aht_scatter(self) -> Axes:
"""
Scatter CSAT vs AHT por skill.
Si no hay datos suficientes, devuelve un Axes con mensaje.
"""
df = self.df
if df["csat_score"].notna().sum() == 0 or df["aht"].notna().sum() == 0:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin datos de CSAT/AHT", ha="center", va="center")
ax.set_axis_off()
return ax
df = df.dropna(subset=["csat_score", "aht"]).copy()
if df.empty:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin datos de CSAT/AHT", ha="center", va="center")
ax.set_axis_off()
return ax
fig, ax = plt.subplots(figsize=(8, 5))
for skill, sub in df.groupby("queue_skill"):
ax.scatter(sub["aht"], sub["csat_score"], label=skill, alpha=0.7)
ax.set_xlabel("AHT (segundos)")
ax.set_ylabel("CSAT")
ax.set_title("CSAT vs AHT por skill")
ax.grid(alpha=0.3)
ax.legend(title="Skill", bbox_to_anchor=(1.05, 1), loc="upper left")
plt.tight_layout()
return ax
def plot_csat_distribution(self) -> Axes:
"""
Histograma de CSAT.
Si no hay csat_score, devuelve un Axes con mensaje.
"""
df = self.df
if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin datos de CSAT", ha="center", va="center")
ax.set_axis_off()
return ax
df = df.dropna(subset=["csat_score"]).copy()
if df.empty:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin datos de CSAT", ha="center", va="center")
ax.set_axis_off()
return ax
fig, ax = plt.subplots(figsize=(6, 4))
ax.hist(df["csat_score"], bins=10, alpha=0.7)
ax.set_xlabel("CSAT")
ax.set_ylabel("Frecuencia")
ax.set_title("Distribución de CSAT")
ax.grid(axis="y", alpha=0.3)
return ax

View File

@@ -0,0 +1,268 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import List
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.axes import Axes
REQUIRED_COLUMNS_VOLUMETRIA: List[str] = [
"interaction_id",
"datetime_start",
"queue_skill",
"channel",
]
@dataclass
class VolumetriaMetrics:
"""
Métricas de volumetría basadas en el nuevo esquema de datos.
Columnas mínimas requeridas:
- interaction_id
- datetime_start
- queue_skill
- channel
Otras columnas pueden existir pero no son necesarias para estas métricas.
"""
df: pd.DataFrame
def __post_init__(self) -> None:
self._validate_columns()
self._prepare_data()
# ------------------------------------------------------------------ #
# Helpers internos
# ------------------------------------------------------------------ #
def _validate_columns(self) -> None:
missing = [c for c in REQUIRED_COLUMNS_VOLUMETRIA if c not in self.df.columns]
if missing:
raise ValueError(
f"Faltan columnas obligatorias para VolumetriaMetrics: {missing}"
)
def _prepare_data(self) -> None:
df = self.df.copy()
# Asegurar tipo datetime
df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
# Normalizar strings
df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
df["channel"] = df["channel"].astype(str).str.strip()
# Guardamos el df preparado
self.df = df
# ------------------------------------------------------------------ #
# Propiedades útiles
# ------------------------------------------------------------------ #
@property
def is_empty(self) -> bool:
return self.df.empty
# ------------------------------------------------------------------ #
# Métricas numéricas / tabulares
# ------------------------------------------------------------------ #
def volume_by_channel(self) -> pd.Series:
"""
Nº de interacciones por canal.
"""
return self.df.groupby("channel")["interaction_id"].nunique().sort_values(
ascending=False
)
def volume_by_skill(self) -> pd.Series:
"""
Nº de interacciones por skill / cola.
"""
return self.df.groupby("queue_skill")["interaction_id"].nunique().sort_values(
ascending=False
)
def channel_distribution_pct(self) -> pd.Series:
"""
Distribución porcentual del volumen por canal.
"""
counts = self.volume_by_channel()
total = counts.sum()
if total == 0:
return counts * 0.0
return (counts / total * 100).round(2)
def skill_distribution_pct(self) -> pd.Series:
"""
Distribución porcentual del volumen por skill.
"""
counts = self.volume_by_skill()
total = counts.sum()
if total == 0:
return counts * 0.0
return (counts / total * 100).round(2)
def heatmap_24x7(self) -> pd.DataFrame:
"""
Matriz [día_semana x hora] con nº de interacciones.
dayofweek: 0=Lunes ... 6=Domingo
"""
df = self.df.dropna(subset=["datetime_start"]).copy()
if df.empty:
# Devolvemos un df vacío pero con índice/columnas esperadas
idx = range(7)
cols = range(24)
return pd.DataFrame(0, index=idx, columns=cols)
df["dow"] = df["datetime_start"].dt.dayofweek
df["hour"] = df["datetime_start"].dt.hour
pivot = (
df.pivot_table(
index="dow",
columns="hour",
values="interaction_id",
aggfunc="nunique",
fill_value=0,
)
.reindex(index=range(7), fill_value=0)
.reindex(columns=range(24), fill_value=0)
)
return pivot
def monthly_seasonality_cv(self) -> float:
"""
Coeficiente de variación del volumen mensual.
CV = std / mean (en %).
"""
df = self.df.dropna(subset=["datetime_start"]).copy()
if df.empty:
return float("nan")
df["year_month"] = df["datetime_start"].dt.to_period("M")
monthly_counts = (
df.groupby("year_month")["interaction_id"].nunique().astype(float)
)
if len(monthly_counts) < 2:
return float("nan")
mean = monthly_counts.mean()
std = monthly_counts.std(ddof=1)
if mean == 0:
return float("nan")
return float(round(std / mean * 100, 2))
def peak_offpeak_ratio(self) -> float:
"""
Ratio de volumen entre horas pico y valle.
Definimos pico como horas 10:0019:59, resto valle.
"""
df = self.df.dropna(subset=["datetime_start"]).copy()
if df.empty:
return float("nan")
df["hour"] = df["datetime_start"].dt.hour
peak_hours = list(range(10, 20))
is_peak = df["hour"].isin(peak_hours)
peak_vol = df.loc[is_peak, "interaction_id"].nunique()
off_vol = df.loc[~is_peak, "interaction_id"].nunique()
if off_vol == 0:
return float("inf") if peak_vol > 0 else float("nan")
return float(round(peak_vol / off_vol, 3))
def concentration_top20_skills_pct(self) -> float:
"""
% del volumen concentrado en el top 20% de skills (por nº de interacciones).
"""
counts = (
self.df.groupby("queue_skill")["interaction_id"].nunique().sort_values(
ascending=False
)
)
n_skills = len(counts)
if n_skills == 0:
return float("nan")
top_n = max(1, int(np.ceil(0.2 * n_skills)))
top_vol = counts.head(top_n).sum()
total = counts.sum()
if total == 0:
return float("nan")
return float(round(top_vol / total * 100, 2))
# ------------------------------------------------------------------ #
# Plots
# ------------------------------------------------------------------ #
def plot_heatmap_24x7(self) -> Axes:
"""
Heatmap de volumen por día de la semana (0-6) y hora (0-23).
Devuelve Axes para que el pipeline pueda guardar la figura.
"""
data = self.heatmap_24x7()
fig, ax = plt.subplots(figsize=(10, 4))
im = ax.imshow(data.values, aspect="auto", origin="lower")
ax.set_xticks(range(24))
ax.set_xticklabels([str(h) for h in range(24)])
ax.set_yticks(range(7))
ax.set_yticklabels(["L", "M", "X", "J", "V", "S", "D"])
ax.set_xlabel("Hora del día")
ax.set_ylabel("Día de la semana")
ax.set_title("Volumen por día de la semana y hora")
plt.colorbar(im, ax=ax, label="Nº interacciones")
return ax
def plot_channel_distribution(self) -> Axes:
"""
Distribución de volumen por canal.
"""
series = self.volume_by_channel()
fig, ax = plt.subplots(figsize=(6, 4))
series.plot(kind="bar", ax=ax)
ax.set_xlabel("Canal")
ax.set_ylabel("Nº interacciones")
ax.set_title("Volumen por canal")
ax.grid(axis="y", alpha=0.3)
return ax
def plot_skill_pareto(self) -> Axes:
"""
Pareto simple de volumen por skill (solo barras de volumen).
"""
series = self.volume_by_skill()
fig, ax = plt.subplots(figsize=(10, 4))
series.plot(kind="bar", ax=ax)
ax.set_xlabel("Skill / Cola")
ax.set_ylabel("Nº interacciones")
ax.set_title("Pareto de volumen por skill")
ax.grid(axis="y", alpha=0.3)
plt.xticks(rotation=45, ha="right")
return ax

View File

@@ -0,0 +1,13 @@
from .Volumetria import VolumetriaMetrics
from .OperationalPerformance import OperationalPerformanceMetrics
from .SatisfactionExperience import SatisfactionExperienceMetrics
from .EconomyCost import EconomyCostMetrics, EconomyConfig
__all__ = [
# Dimensiones
"VolumetriaMetrics",
"OperationalPerformanceMetrics",
"SatisfactionExperienceMetrics",
"EconomyCostMetrics",
"EconomyConfig",
]

View File

@@ -0,0 +1,22 @@
from .base import DataSource, ResultsSink
from .local import LocalDataSource, LocalResultsSink
from .s3 import S3DataSource, S3ResultsSink
from .google_drive import (
GoogleDriveDataSource,
GoogleDriveConfig,
GoogleDriveResultsSink,
GoogleDriveSinkConfig,
)
__all__ = [
"DataSource",
"ResultsSink",
"LocalDataSource",
"LocalResultsSink",
"S3DataSource",
"S3ResultsSink",
"GoogleDriveDataSource",
"GoogleDriveConfig",
"GoogleDriveResultsSink",
"GoogleDriveSinkConfig",
]

View File

@@ -0,0 +1,36 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Any, Dict
import pandas as pd
from matplotlib.figure import Figure
class DataSource(ABC):
"""Interfaz de lectura de datos (CSV)."""
@abstractmethod
def read_csv(self, path: str) -> pd.DataFrame:
"""
Lee un CSV y devuelve un DataFrame.
El significado de 'path' depende de la implementación:
- LocalDataSource: ruta en el sistema de ficheros
- S3DataSource: 's3://bucket/key'
"""
raise NotImplementedError
class ResultsSink(ABC):
"""Interfaz de escritura de resultados (JSON e imágenes)."""
@abstractmethod
def write_json(self, path: str, data: Dict[str, Any]) -> None:
"""Escribe un dict como JSON en 'path'."""
raise NotImplementedError
@abstractmethod
def write_figure(self, path: str, fig: Figure) -> None:
"""Guarda una figura matplotlib en 'path'."""
raise NotImplementedError

View File

@@ -0,0 +1,160 @@
# beyond_metrics/io/google_drive.py
from __future__ import annotations
import io
import json
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Dict, Any
import pandas as pd
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload, MediaIoBaseUpload
from .base import DataSource, ResultsSink
GDRIVE_SCOPES = ["https://www.googleapis.com/auth/drive.readonly",
"https://www.googleapis.com/auth/drive.file"]
def _extract_file_id(file_id_or_url: str) -> str:
"""
Acepta:
- un ID directo de Google Drive (ej: '1AbC...')
- una URL de Google Drive compartida
y devuelve siempre el file_id.
"""
if "http://" not in file_id_or_url and "https://" not in file_id_or_url:
return file_id_or_url.strip()
patterns = [
r"/d/([a-zA-Z0-9_-]{10,})", # https://drive.google.com/file/d/<ID>/view
r"id=([a-zA-Z0-9_-]{10,})", # https://drive.google.com/open?id=<ID>
]
for pattern in patterns:
m = re.search(pattern, file_id_or_url)
if m:
return m.group(1)
raise ValueError(f"No se pudo extraer un file_id de la URL de Google Drive: {file_id_or_url}")
# -------- DataSource --------
@dataclass
class GoogleDriveConfig:
credentials_path: str # ruta al JSON de service account
impersonate_user: Optional[str] = None
class GoogleDriveDataSource(DataSource):
"""
DataSource que lee CSVs desde Google Drive.
"""
def __init__(self, config: GoogleDriveConfig) -> None:
self._config = config
self._service = self._build_service(readonly=True)
def _build_service(self, readonly: bool = True):
scopes = ["https://www.googleapis.com/auth/drive.readonly"] if readonly else GDRIVE_SCOPES
creds = service_account.Credentials.from_service_account_file(
self._config.credentials_path,
scopes=scopes,
)
if self._config.impersonate_user:
creds = creds.with_subject(self._config.impersonate_user)
service = build("drive", "v3", credentials=creds)
return service
def read_csv(self, path: str) -> pd.DataFrame:
file_id = _extract_file_id(path)
request = self._service.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while not done:
_, done = downloader.next_chunk()
fh.seek(0)
df = pd.read_csv(fh)
return df
# -------- ResultsSink --------
@dataclass
class GoogleDriveSinkConfig:
credentials_path: str # ruta al JSON de service account
base_folder_id: str # ID de la carpeta de Drive donde escribir
impersonate_user: Optional[str] = None
class GoogleDriveResultsSink(ResultsSink):
"""
ResultsSink que sube JSONs e imágenes a una carpeta de Google Drive.
Nota: por simplicidad, usamos solo el nombre del fichero (basename de `path`).
Es decir, si le pasas 'data/output/123/results.json', en Drive se guardará
como 'results.json' dentro de base_folder_id.
"""
def __init__(self, config: GoogleDriveSinkConfig) -> None:
self._config = config
self._service = self._build_service()
def _build_service(self):
creds = service_account.Credentials.from_service_account_file(
self._config.credentials_path,
scopes=GDRIVE_SCOPES,
)
if self._config.impersonate_user:
creds = creds.with_subject(self._config.impersonate_user)
service = build("drive", "v3", credentials=creds)
return service
def _upload_bytes(self, data: bytes, mime_type: str, target_path: str) -> str:
"""
Sube un fichero en memoria a Drive y devuelve el file_id.
"""
filename = Path(target_path).name
media = MediaIoBaseUpload(io.BytesIO(data), mimetype=mime_type, resumable=False)
file_metadata = {
"name": filename,
"parents": [self._config.base_folder_id],
}
created = self._service.files().create(
body=file_metadata,
media_body=media,
fields="id",
).execute()
return created["id"]
def write_json(self, path: str, data: Dict[str, Any]) -> None:
payload = json.dumps(data, ensure_ascii=False, indent=2).encode("utf-8")
self._upload_bytes(payload, "application/json", path)
def write_figure(self, path: str, fig) -> None:
from matplotlib.figure import Figure
if not isinstance(fig, Figure):
raise TypeError("write_figure espera un matplotlib.figure.Figure")
buf = io.BytesIO()
fig.savefig(buf, format="png", bbox_inches="tight")
buf.seek(0)
self._upload_bytes(buf.read(), "image/png", path)

View File

@@ -0,0 +1,57 @@
from __future__ import annotations
import json
import os
from typing import Any, Dict
import pandas as pd
from matplotlib.figure import Figure
from .base import DataSource, ResultsSink
class LocalDataSource(DataSource):
"""
DataSource que lee CSV desde el sistema de ficheros local.
- base_dir: se prefiere que todos los paths sean relativos a esta carpeta.
"""
def __init__(self, base_dir: str = ".") -> None:
self.base_dir = base_dir
def _full_path(self, path: str) -> str:
if os.path.isabs(path):
return path
return os.path.join(self.base_dir, path)
def read_csv(self, path: str) -> pd.DataFrame:
full = self._full_path(path)
return pd.read_csv(full)
class LocalResultsSink(ResultsSink):
"""
ResultsSink que escribe JSON e imágenes en el sistema de ficheros local.
"""
def __init__(self, base_dir: str = ".") -> None:
self.base_dir = base_dir
def _full_path(self, path: str) -> str:
if os.path.isabs(path):
full = path
else:
full = os.path.join(self.base_dir, path)
# Crear carpetas si no existen
os.makedirs(os.path.dirname(full), exist_ok=True)
return full
def write_json(self, path: str, data: Dict[str, Any]) -> None:
full = self._full_path(path)
with open(full, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
def write_figure(self, path: str, fig: Figure) -> None:
full = self._full_path(path)
fig.savefig(full, bbox_inches="tight")

View File

@@ -0,0 +1,62 @@
from __future__ import annotations
import io
import json
from typing import Any, Dict, Tuple
import boto3
import pandas as pd
from matplotlib.figure import Figure
from .base import DataSource, ResultsSink
def _split_s3_path(path: str) -> Tuple[str, str]:
"""
Convierte 's3://bucket/key' en (bucket, key).
"""
if not path.startswith("s3://"):
raise ValueError(f"Ruta S3 inválida: {path}")
without_scheme = path[len("s3://") :]
parts = without_scheme.split("/", 1)
if len(parts) != 2:
raise ValueError(f"Ruta S3 inválida: {path}")
return parts[0], parts[1]
class S3DataSource(DataSource):
"""
DataSource que lee CSV desde S3 usando boto3.
"""
def __init__(self, boto3_client: Any | None = None) -> None:
self.s3 = boto3_client or boto3.client("s3")
def read_csv(self, path: str) -> pd.DataFrame:
bucket, key = _split_s3_path(path)
obj = self.s3.get_object(Bucket=bucket, Key=key)
body = obj["Body"].read()
buffer = io.BytesIO(body)
return pd.read_csv(buffer)
class S3ResultsSink(ResultsSink):
"""
ResultsSink que escribe JSON e imágenes en S3.
"""
def __init__(self, boto3_client: Any | None = None) -> None:
self.s3 = boto3_client or boto3.client("s3")
def write_json(self, path: str, data: Dict[str, Any]) -> None:
bucket, key = _split_s3_path(path)
body = json.dumps(data, ensure_ascii=False, indent=2).encode("utf-8")
self.s3.put_object(Bucket=bucket, Key=key, Body=body)
def write_figure(self, path: str, fig: Figure) -> None:
bucket, key = _split_s3_path(path)
buf = io.BytesIO()
fig.savefig(buf, format="png", bbox_inches="tight")
buf.seek(0)
self.s3.put_object(Bucket=bucket, Key=key, Body=buf.getvalue(), ContentType="image/png")

View File

@@ -0,0 +1,291 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime
from importlib import import_module
from typing import Any, Dict, List, Mapping, Optional, cast, Callable
import logging
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.axes import Axes
from matplotlib.figure import Figure
from .io import (
DataSource,
ResultsSink,
)
LOGGER = logging.getLogger(__name__)
def setup_basic_logging(level: str = "INFO") -> None:
"""
Configuración básica de logging, por si se necesita desde scripts.
"""
logging.basicConfig(
level=getattr(logging, level.upper(), logging.INFO),
format="%(asctime)s %(levelname)s [%(name)s] %(message)s",
)
def _import_class(path: str) -> type:
"""
Import dinámico de una clase a partir de un string tipo:
"beyond_metrics.dimensions.VolumetriaMetrics"
"""
LOGGER.debug("Importando clase %s", path)
module_name, class_name = path.rsplit(".", 1)
module = import_module(module_name)
cls = getattr(module, class_name)
return cls
def _serialize_for_json(obj: Any) -> Any:
"""
Convierte objetos típicos de numpy/pandas en tipos JSON-friendly.
"""
if obj is None or isinstance(obj, (str, int, float, bool)):
return obj
if isinstance(obj, (np.integer, np.floating)):
return float(obj)
if isinstance(obj, pd.DataFrame):
return obj.to_dict(orient="records")
if isinstance(obj, pd.Series):
return obj.to_list()
if isinstance(obj, (list, tuple)):
return [_serialize_for_json(x) for x in obj]
if isinstance(obj, dict):
return {str(k): _serialize_for_json(v) for k, v in obj.items()}
return str(obj)
PostRunCallback = Callable[[Dict[str, Any], str, ResultsSink], None]
@dataclass
class BeyondMetricsPipeline:
"""
Pipeline principal de BeyondMetrics.
- Lee un CSV desde un DataSource (local, S3, Google Drive, etc.).
- Ejecuta dimensiones configuradas en un dict de configuración.
- Serializa resultados numéricos/tabulares a JSON.
- Guarda las imágenes de los métodos que comienzan por 'plot_'.
"""
datasource: DataSource
sink: ResultsSink
dimensions_config: Mapping[str, Any]
dimension_params: Optional[Mapping[str, Mapping[str, Any]]] = None
post_run: Optional[List[PostRunCallback]] = None
def run(
self,
input_path: str,
run_dir: str,
*,
write_results_json: bool = True,
) -> Dict[str, Any]:
LOGGER.info("Inicio de ejecución de BeyondMetricsPipeline")
LOGGER.info("Leyendo CSV de entrada: %s", input_path)
# 1) Leer datos
df = self.datasource.read_csv(input_path)
LOGGER.info("CSV leído con %d filas y %d columnas", df.shape[0], df.shape[1])
# 2) Determinar carpeta/base de salida para esta ejecución
run_base = run_dir.rstrip("/")
LOGGER.info("Ruta base de esta ejecución: %s", run_base)
# 3) Ejecutar dimensiones
dimensions_cfg = self.dimensions_config
if not isinstance(dimensions_cfg, dict):
raise ValueError("El bloque 'dimensions' debe ser un dict.")
all_results: Dict[str, Any] = {}
for dim_name, dim_cfg in dimensions_cfg.items():
if not isinstance(dim_cfg, dict):
raise ValueError(f"Config inválida para dimensión '{dim_name}' (debe ser dict).")
if not dim_cfg.get("enabled", True):
LOGGER.info("Dimensión '%s' desactivada; se omite.", dim_name)
continue
class_path = dim_cfg.get("class")
if not class_path:
raise ValueError(f"Falta 'class' en la dimensión '{dim_name}'.")
metrics: List[str] = dim_cfg.get("metrics", [])
if not metrics:
LOGGER.info("Dimensión '%s' sin métricas configuradas; se omite.", dim_name)
continue
cls = _import_class(class_path)
extra_kwargs = {}
if self.dimension_params is not None:
extra_kwargs = self.dimension_params.get(dim_name, {}) or {}
# Las dimensiones reciben df en el constructor
instance = cls(df, **extra_kwargs)
dim_results: Dict[str, Any] = {}
for metric_name in metrics:
LOGGER.info(" - Ejecutando métrica '%s.%s'", dim_name, metric_name)
result = self._execute_metric(instance, metric_name, run_base, dim_name)
dim_results[metric_name] = result
all_results[dim_name] = dim_results
# 4) Guardar JSON de resultados (opcional)
if write_results_json:
results_json_path = f"{run_base}/results.json"
LOGGER.info("Guardando resultados en JSON: %s", results_json_path)
self.sink.write_json(results_json_path, all_results)
# 5) Ejecutar callbacks post-run (scorers, agentes, etc.)
if self.post_run:
LOGGER.info("Ejecutando %d callbacks post-run...", len(self.post_run))
for cb in self.post_run:
try:
LOGGER.info("Ejecutando post-run callback: %s", cb)
cb(all_results, run_base, self.sink)
except Exception:
LOGGER.exception("Error ejecutando post-run callback %s", cb)
LOGGER.info("Ejecución completada correctamente.")
return all_results
def _execute_metric(
self,
instance: Any,
metric_name: str,
run_base: str,
dim_name: str,
) -> Any:
"""
Ejecuta una métrica:
- Si empieza por 'plot_' -> se asume que devuelve Axes:
- se guarda la figura como PNG
- se devuelve {"type": "image", "path": "..."}
- Si no, se serializa el valor a JSON.
Además, para métricas categóricas (por skill/canal) de la dimensión
'volumetry', devolvemos explícitamente etiquetas y valores para que
el frontend pueda saber a qué pertenece cada número.
"""
method = getattr(instance, metric_name, None)
if method is None or not callable(method):
raise ValueError(
f"La métrica '{metric_name}' no existe en {type(instance).__name__}"
)
# Caso plots
if metric_name.startswith("plot_"):
ax = method()
if not isinstance(ax, Axes):
raise TypeError(
f"La métrica '{metric_name}' de '{type(instance).__name__}' "
f"debería devolver un matplotlib.axes.Axes"
)
fig = ax.get_figure()
if fig is None:
raise RuntimeError(
"Axes.get_figure() devolvió None, lo cual no debería pasar."
)
fig = cast(Figure, fig)
filename = f"{dim_name}_{metric_name}.png"
img_path = f"{run_base}/{filename}"
LOGGER.debug("Guardando figura en %s", img_path)
self.sink.write_figure(img_path, fig)
plt.close(fig)
return {
"type": "image",
"path": img_path,
}
# Caso numérico/tabular
value = method()
# Caso especial: series categóricas de volumetría (por skill / canal)
# Devolvemos {"labels": [...], "values": [...]} para mantener la
# información de etiquetas en el JSON.
if (
dim_name == "volumetry"
and isinstance(value, pd.Series)
and metric_name
in {
"volume_by_channel",
"volume_by_skill",
"channel_distribution_pct",
"skill_distribution_pct",
}
):
labels = [str(idx) for idx in value.index.tolist()]
# Aseguramos que todos los valores sean numéricos JSON-friendly
values = [float(v) for v in value.astype(float).tolist()]
return {
"labels": labels,
"values": values,
}
return _serialize_for_json(value)
def load_dimensions_config(path: str) -> Dict[str, Any]:
"""
Carga un JSON de configuración que contiene solo el bloque 'dimensions'.
"""
import json
from pathlib import Path
with Path(path).open("r", encoding="utf-8") as f:
cfg = json.load(f)
dimensions = cfg.get("dimensions")
if dimensions is None:
raise ValueError("El fichero de configuración debe contener un bloque 'dimensions'.")
return dimensions
def build_pipeline(
dimensions_config_path: str,
datasource: DataSource,
sink: ResultsSink,
dimension_params: Optional[Mapping[str, Mapping[str, Any]]] = None,
post_run: Optional[List[PostRunCallback]] = None,
) -> BeyondMetricsPipeline:
"""
Crea un BeyondMetricsPipeline a partir de:
- ruta al JSON con dimensiones/métricas
- un DataSource ya construido (local/S3/Drive)
- un ResultsSink ya construido (local/S3/Drive)
- una lista opcional de callbacks post_run que se ejecutan al final
(útil para scorers, agentes de IA, etc.)
"""
dims_cfg = load_dimensions_config(dimensions_config_path)
return BeyondMetricsPipeline(
datasource=datasource,
sink=sink,
dimensions_config=dims_cfg,
dimension_params=dimension_params,
post_run=post_run,
)