Initial commit: frontend + backend integration

2025-12-29 18:12:32 +01:00
commit 2cd6d6b95c
146 changed files with 31503 additions and 0 deletions
--- a/backend/beyond_metrics/init.py
+++ b/backend/beyond_metrics/init.py
@@ -0,0 +1,55 @@
+"""
+beyond_metrics package
+======================
+
+Capa pública del sistema BeyondMetrics.
+
+Expone:
+- Dimensiones (Volumetría, Eficiencia, ...)
+- Pipeline principal
+- Conectores de IO (local, S3, ...)
+"""
+
+from .dimensions import (
+    VolumetriaMetrics,
+    OperationalPerformanceMetrics,
+    SatisfactionExperienceMetrics,
+    EconomyCostMetrics,  
+)
+from .pipeline import (
+    BeyondMetricsPipeline,
+    build_pipeline,
+    load_dimensions_config,   # opcional, pero útil
+)
+from .io import (
+    DataSource,
+    ResultsSink,
+    LocalDataSource,
+    LocalResultsSink,
+    S3DataSource,
+    S3ResultsSink,
+    # si has añadido GoogleDrive, puedes exponerlo aquí también:
+    # GoogleDriveDataSource,
+    # GoogleDriveResultsSink,
+)
+
+__all__ = [
+    # Dimensiones
+    "VolumetriaMetrics",
+    "OperationalPerformanceMetrics",
+    "SatisfactionExperienceMetrics",
+    "EconomyCostMetrics",
+    # Pipeline
+    "BeyondMetricsPipeline",
+    "build_pipeline",
+    "load_dimensions_config",
+    # IO
+    "DataSource",
+    "ResultsSink",
+    "LocalDataSource",
+    "LocalResultsSink",
+    "S3DataSource",
+    "S3ResultsSink",
+    # "GoogleDriveDataSource",
+    # "GoogleDriveResultsSink",
+]
--- a/backend/beyond_metrics/agent.py
+++ b/backend/beyond_metrics/agent.py
@@ -0,0 +1,310 @@
+from __future__ import annotations
+
+import json
+import os
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, Optional, Sequence
+
+from reportlab.lib.pagesizes import A4
+from reportlab.pdfgen import canvas
+from reportlab.lib.utils import ImageReader
+
+from openai import OpenAI
+
+
+DEFAULT_SYSTEM_PROMPT = (
+    "Eres un consultor experto en contact centers. "
+    "Vas a recibir resultados analíticos de un sistema de métricas "
+    "(BeyondMetrics) en formato JSON. Tu tarea es generar un informe claro, "
+    "accionable y orientado a negocio, destacando los principales hallazgos, "
+    "riesgos y oportunidades de mejora."
+)
+
+
+@dataclass
+class ReportAgentConfig:
+    """
+    Configuración básica del agente de informes.
+
+    openai_api_key:
+      Se puede pasar explícitamente o leer de la variable de entorno OPENAI_API_KEY.
+    model:
+      Modelo de ChatGPT a utilizar, p.ej. 'gpt-4.1-mini' o similar.
+    system_prompt:
+      Prompt de sistema para controlar el estilo del informe.
+    """
+
+    openai_api_key: Optional[str] = None
+    model: str = "gpt-4.1-mini"
+    system_prompt: str = DEFAULT_SYSTEM_PROMPT
+
+
+class BeyondMetricsReportAgent:
+    """
+    Agente muy sencillo que:
+
+    1) Lee el JSON de resultados de una ejecución de BeyondMetrics.
+    2) Construye un prompt con esos resultados.
+    3) Llama a ChatGPT para generar un informe en texto.
+    4) Guarda el informe en un PDF en disco, EMBEBIENDO las imágenes PNG
+       generadas por el pipeline como anexos.
+
+    MVP: centrado en texto + figuras incrustadas.
+    """
+
+    def __init__(self, config: Optional[ReportAgentConfig] = None) -> None:
+        self.config = config or ReportAgentConfig()
+
+        api_key = self.config.openai_api_key or os.getenv("OPENAI_API_KEY")
+        if not api_key:
+            raise RuntimeError(
+                "Falta la API key de OpenAI. "
+                "Pásala en ReportAgentConfig(openai_api_key=...) o "
+                "define la variable de entorno OPENAI_API_KEY."
+            )
+
+        # Cliente de la nueva API de OpenAI
+        self._client = OpenAI(api_key=api_key)
+
+    # ------------------------------------------------------------------
+    # API pública principal
+    # ------------------------------------------------------------------
+    def generate_pdf_report(
+        self,
+        run_base: str,
+        output_pdf_path: Optional[str] = None,
+        extra_user_prompt: str = "",
+    ) -> str:
+        """
+        Genera un informe en PDF a partir de una carpeta de resultados.
+
+        Parámetros:
+        - run_base:
+            Carpeta base de la ejecución. Debe contener al menos 'results.json'
+            y, opcionalmente, imágenes PNG generadas por el pipeline.
+        - output_pdf_path:
+            Ruta completa del PDF de salida. Si es None, se crea
+            'beyondmetrics_report.pdf' dentro de run_base.
+        - extra_user_prompt:
+            Texto adicional para afinar la petición al agente
+            (p.ej. "enfatiza eficiencia y SLA", etc.)
+
+        Devuelve:
+        - La ruta del PDF generado.
+        """
+        run_dir = Path(run_base)
+        results_json = run_dir / "results.json"
+        if not results_json.exists():
+            raise FileNotFoundError(
+                f"No se ha encontrado {results_json}. "
+                "Asegúrate de ejecutar primero el pipeline."
+            )
+
+        # 1) Leer JSON de resultados
+        with results_json.open("r", encoding="utf-8") as f:
+            results_data: Dict[str, Any] = json.load(f)
+
+        # 2) Buscar imágenes generadas
+        image_files = sorted(p for p in run_dir.glob("*.png"))
+
+        # 3) Construir prompt de usuario
+        user_prompt = self._build_user_prompt(
+            results=results_data,
+            image_files=[p.name for p in image_files],
+            extra_user_prompt=extra_user_prompt,
+        )
+
+        # 4) Llamar a ChatGPT para obtener el texto del informe
+        report_text = self._call_chatgpt(user_prompt)
+
+        # 5) Crear PDF con texto + imágenes embebidas
+        if output_pdf_path is None:
+            output_pdf_path = str(run_dir / "beyondmetrics_report.pdf")
+
+        self._write_pdf(output_pdf_path, report_text, image_files)
+
+        return output_pdf_path
+
+    # ------------------------------------------------------------------
+    # Construcción del prompt
+    # ------------------------------------------------------------------
+    def _build_user_prompt(
+        self,
+        results: Dict[str, Any],
+        image_files: Sequence[str],
+        extra_user_prompt: str = "",
+    ) -> str:
+        """
+        Construye el mensaje de usuario que se enviará al modelo.
+        Para un MVP, serializamos el JSON de resultados entero.
+        Más adelante se puede resumir si el JSON crece demasiado.
+        """
+        results_str = json.dumps(results, indent=2, ensure_ascii=False)
+
+        images_section = (
+            "Imágenes generadas en la ejecución:\n"
+            + "\n".join(f"- {name}" for name in image_files)
+            if image_files
+            else "No se han generado imágenes en esta ejecución."
+        )
+
+        extra = (
+            f"\n\nInstrucciones adicionales del usuario:\n{extra_user_prompt}"
+            if extra_user_prompt
+            else ""
+        )
+
+        prompt = (
+            "A continuación te proporciono los resultados de una ejecución de BeyondMetrics "
+            "en formato JSON. Debes elaborar un INFORME EJECUTIVO para un cliente de "
+            "contact center. El informe debe incluir:\n"
+            "- Resumen ejecutivo en lenguaje de negocio.\n"
+            "- Principales hallazgos por dimensión.\n"
+            "- Riesgos o problemas detectados.\n"
+            "- Recomendaciones accionables.\n\n"
+            "Resultados (JSON):\n"
+            f"{results_str}\n\n"
+            f"{images_section}"
+            f"{extra}"
+        )
+
+        return prompt
+
+    # ------------------------------------------------------------------
+    # Llamada a ChatGPT (nueva API)
+    # ------------------------------------------------------------------
+    def _call_chatgpt(self, user_prompt: str) -> str:
+        """
+        Llama al modelo de ChatGPT y devuelve el contenido del mensaje de respuesta.
+        Implementado con la nueva API de OpenAI.
+        """
+        resp = self._client.chat.completions.create(
+            model=self.config.model,
+            messages=[
+                {"role": "system", "content": self.config.system_prompt},
+                {"role": "user", "content": user_prompt},
+            ],
+            temperature=0.3,
+        )
+
+        content = resp.choices[0].message.content
+        if not isinstance(content, str):
+            raise RuntimeError("La respuesta del modelo no contiene texto.")
+        return content
+
+    # ------------------------------------------------------------------
+    # Escritura de PDF (texto + imágenes)
+    # ------------------------------------------------------------------
+    def _write_pdf(
+        self,
+        output_path: str,
+        text: str,
+        image_paths: Sequence[Path],
+    ) -> None:
+        """
+        Crea un PDF A4 con:
+
+        1) Texto del informe (páginas iniciales).
+        2) Una sección de anexos donde se incrustan las imágenes PNG
+           generadas por el pipeline, escaladas para encajar en la página.
+        """
+        output_path = str(output_path)
+        c = canvas.Canvas(output_path, pagesize=A4)
+        width, height = A4
+
+        margin_x = 50
+        margin_y = 50
+        max_width = width - 2 * margin_x
+        line_height = 14
+
+        c.setFont("Helvetica", 11)
+
+        # --- Escribir texto principal ---
+        def _wrap_line(line: str, max_chars: int = 100) -> list[str]:
+            parts: list[str] = []
+            current: list[str] = []
+            count = 0
+            for word in line.split():
+                if count + len(word) + 1 > max_chars:
+                    parts.append(" ".join(current))
+                    current = [word]
+                    count = len(word) + 1
+                else:
+                    current.append(word)
+                    count += len(word) + 1
+            if current:
+                parts.append(" ".join(current))
+            return parts
+
+        y = height - margin_y
+        for raw_line in text.splitlines():
+            wrapped_lines = _wrap_line(raw_line)
+            for line in wrapped_lines:
+                if y < margin_y:
+                    c.showPage()
+                    c.setFont("Helvetica", 11)
+                    y = height - margin_y
+                c.drawString(margin_x, y, line)
+                y -= line_height
+
+        # --- Anexar imágenes como figuras ---
+        if image_paths:
+            # Nueva página para las figuras
+            c.showPage()
+            c.setFont("Helvetica-Bold", 14)
+            c.drawString(margin_x, height - margin_y, "Anexo: Figuras")
+            c.setFont("Helvetica", 11)
+
+            current_y = height - margin_y - 2 * line_height
+
+            for img_path in image_paths:
+                # Si no cabe la imagen en la página, pasamos a la siguiente
+                available_height = current_y - margin_y
+                if available_height < 100:  # espacio mínimo
+                    c.showPage()
+                    c.setFont("Helvetica-Bold", 14)
+                    c.drawString(margin_x, height - margin_y, "Anexo: Figuras (cont.)")
+                    c.setFont("Helvetica", 11)
+                    current_y = height - margin_y - 2 * line_height
+                    available_height = current_y - margin_y
+
+                # Título de la figura
+                title = f"Figura: {img_path.name}"
+                c.drawString(margin_x, current_y, title)
+                current_y -= line_height
+
+                # Cargar imagen y escalarla
+                try:
+                    img = ImageReader(str(img_path))
+                    iw, ih = img.getSize()
+                    # Escala para encajar en ancho y alto disponibles
+                    max_img_height = available_height - 2 * line_height
+                    scale = min(max_width / iw, max_img_height / ih)
+                    if scale <= 0:
+                        scale = 1.0  # fallback
+
+                    draw_w = iw * scale
+                    draw_h = ih * scale
+
+                    x = margin_x
+                    y_img = current_y - draw_h
+
+                    c.drawImage(
+                        img,
+                        x,
+                        y_img,
+                        width=draw_w,
+                        height=draw_h,
+                        preserveAspectRatio=True,
+                        mask="auto",
+                    )
+
+                    current_y = y_img - 2 * line_height
+                except Exception as e:
+                    # Si falla la carga, lo indicamos en el PDF
+                    err_msg = f"No se pudo cargar la imagen {img_path.name}: {e}"
+                    c.drawString(margin_x, current_y, err_msg)
+                    current_y -= 2 * line_height
+
+        c.save()
--- a/backend/beyond_metrics/configs/basic.json
+++ b/backend/beyond_metrics/configs/basic.json
@@ -0,0 +1,27 @@
+{
+  "dimensions": {
+    "volumetry": {
+      "class": "beyond_metrics.VolumetriaMetrics",
+      "enabled": true,
+      "metrics": [
+        "volume_by_channel",
+        "volume_by_skill"
+      ]
+    },
+    "operational_performance": {
+      "class": "beyond_metrics.dimensions.OperationalPerformance.OperationalPerformanceMetrics",
+      "enabled": false,
+      "metrics": []
+    },
+    "customer_satisfaction": {
+      "class": "beyond_metrics.dimensions.SatisfactionExperience.SatisfactionExperienceMetrics",
+      "enabled": false,
+      "metrics": []
+    },
+    "economy_costs": {
+      "class": "beyond_metrics.dimensions.EconomyCost.EconomyCostMetrics",
+      "enabled": false,
+      "metrics": []
+    }
+  }
+}
--- a/backend/beyond_metrics/configs/beyond_metrics_config.json
+++ b/backend/beyond_metrics/configs/beyond_metrics_config.json
@@ -0,0 +1,55 @@
+{
+  "dimensions": {
+    "volumetry": {
+      "class": "beyond_metrics.VolumetriaMetrics",
+      "enabled": true,
+      "metrics": [
+        "volume_by_channel",
+        "volume_by_skill",
+        "channel_distribution_pct",
+        "skill_distribution_pct",
+        "heatmap_24x7",
+        "monthly_seasonality_cv",
+        "peak_offpeak_ratio",
+        "concentration_top20_skills_pct"
+      ]
+    },
+    "operational_performance": {
+      "class": "beyond_metrics.dimensions.OperationalPerformance.OperationalPerformanceMetrics",
+      "enabled": true,
+      "metrics": [
+        "aht_distribution",
+        "talk_hold_acw_p50_by_skill",
+        "fcr_rate",
+        "escalation_rate",
+        "abandonment_rate",
+        "recurrence_rate_7d",
+        "repeat_channel_rate",
+        "occupancy_rate",
+        "performance_score"
+      ]
+    },
+    "customer_satisfaction": {
+      "class": "beyond_metrics.dimensions.SatisfactionExperience.SatisfactionExperienceMetrics",
+      "enabled": true,
+      "metrics": [
+        "csat_avg_by_skill_channel",
+        "nps_avg_by_skill_channel",
+        "ces_avg_by_skill_channel",
+        "csat_aht_correlation",
+        "csat_aht_skill_summary"
+      ]
+    },
+    "economy_costs": {
+      "class": "beyond_metrics.dimensions.EconomyCost.EconomyCostMetrics",
+      "enabled": true,
+      "metrics": [
+        "cpi_by_skill_channel",
+        "annual_cost_by_skill_channel",
+        "cost_breakdown",
+        "inefficiency_cost_by_skill_channel",
+        "potential_savings"
+      ]
+    }
+  }
+}
--- a/backend/beyond_metrics/dimensions/EconomyCost.py
+++ b/backend/beyond_metrics/dimensions/EconomyCost.py
@@ -0,0 +1,441 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Any
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from matplotlib.axes import Axes
+
+
+REQUIRED_COLUMNS_ECON: List[str] = [
+    "interaction_id",
+    "datetime_start",
+    "queue_skill",
+    "channel",
+    "duration_talk",
+    "hold_time",
+    "wrap_up_time",
+]
+
+
+@dataclass
+class EconomyConfig:
+    """
+    Parámetros manuales para la dimensión de Economía y Costes.
+
+    - labor_cost_per_hour: coste total/hora de un agente (fully loaded).
+    - overhead_rate: % overhead variable (ej. 0.1 = 10% sobre labor).
+    - tech_costs_annual: coste anual de tecnología (licencias, infra, ...).
+    - automation_cpi: coste por interacción automatizada (ej. 0.15€).
+    - automation_volume_share: % del volumen automatizable (0-1).
+    - automation_success_rate: % éxito de la automatización (0-1).
+
+    - customer_segments: mapping opcional skill -> segmento ("high"/"medium"/"low")
+      para futuros insights de ROI por segmento.
+    """
+
+    labor_cost_per_hour: float
+    overhead_rate: float = 0.0
+    tech_costs_annual: float = 0.0
+    automation_cpi: Optional[float] = None
+    automation_volume_share: float = 0.0
+    automation_success_rate: float = 0.0
+    customer_segments: Optional[Dict[str, str]] = None
+
+
+@dataclass
+class EconomyCostMetrics:
+    """
+    DIMENSIÓN 4: ECONOMÍA y COSTES
+
+    Propósito:
+      - Cuantificar el COSTE actual (CPI, coste anual).
+      - Estimar el impacto de overhead y tecnología.
+      - Calcular un primer estimado de "coste de ineficiencia" y ahorro potencial.
+
+    Requiere:
+      - Columnas del dataset transaccional (ver REQUIRED_COLUMNS_ECON).
+
+    Inputs opcionales vía EconomyConfig:
+      - labor_cost_per_hour (obligatorio para cualquier cálculo de €).
+      - overhead_rate, tech_costs_annual, automation_*.
+      - customer_segments (para insights de ROI por segmento).
+    """
+
+    df: pd.DataFrame
+    config: Optional[EconomyConfig] = None
+
+    def __post_init__(self) -> None:
+        self._validate_columns()
+        self._prepare_data()
+
+    # ------------------------------------------------------------------ #
+    # Helpers internos
+    # ------------------------------------------------------------------ #
+    def _validate_columns(self) -> None:
+        missing = [c for c in REQUIRED_COLUMNS_ECON if c not in self.df.columns]
+        if missing:
+            raise ValueError(
+                f"Faltan columnas obligatorias para EconomyCostMetrics: {missing}"
+            )
+
+    def _prepare_data(self) -> None:
+        df = self.df.copy()
+
+        df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
+
+        for col in ["duration_talk", "hold_time", "wrap_up_time"]:
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+
+        df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
+        df["channel"] = df["channel"].astype(str).str.strip()
+
+        # Handle time = talk + hold + wrap
+        df["handle_time"] = (
+            df["duration_talk"].fillna(0)
+            + df["hold_time"].fillna(0)
+            + df["wrap_up_time"].fillna(0)
+        )  # segundos
+
+        self.df = df
+
+    @property
+    def is_empty(self) -> bool:
+        return self.df.empty
+
+    def _has_cost_config(self) -> bool:
+        return self.config is not None and self.config.labor_cost_per_hour is not None
+
+    # ------------------------------------------------------------------ #
+    # KPI 1: CPI por canal/skill
+    # ------------------------------------------------------------------ #
+    def cpi_by_skill_channel(self) -> pd.DataFrame:
+        """
+        CPI (Coste Por Interacción) por skill/canal.
+
+        CPI = Labor_cost_per_interaction + Overhead_variable
+
+        - Labor_cost_per_interaction = (labor_cost_per_hour * AHT_hours)
+        - Overhead_variable = overhead_rate * Labor_cost_per_interaction
+
+        Si no hay config de costes -> devuelve DataFrame vacío.
+        """
+        if not self._has_cost_config():
+            return pd.DataFrame()
+
+        cfg = self.config
+        assert cfg is not None  # para el type checker
+
+        df = self.df.copy()
+        if df.empty:
+            return pd.DataFrame()
+
+        # AHT por skill/canal (en segundos)
+        grouped = df.groupby(["queue_skill", "channel"])["handle_time"].mean()
+
+        if grouped.empty:
+            return pd.DataFrame()
+
+        aht_sec = grouped
+        aht_hours = aht_sec / 3600.0
+
+        labor_cost = cfg.labor_cost_per_hour * aht_hours
+        overhead = labor_cost * cfg.overhead_rate
+        cpi = labor_cost + overhead
+
+        out = pd.DataFrame(
+            {
+                "aht_seconds": aht_sec.round(2),
+                "labor_cost": labor_cost.round(4),
+                "overhead_cost": overhead.round(4),
+                "cpi_total": cpi.round(4),
+            }
+        )
+
+        return out.sort_index()
+
+    # ------------------------------------------------------------------ #
+    # KPI 2: coste anual por skill/canal
+    # ------------------------------------------------------------------ #
+    def annual_cost_by_skill_channel(self) -> pd.DataFrame:
+        """
+        Coste anual por skill/canal.
+
+        cost_annual = CPI * volumen (cantidad de interacciones de la muestra).
+
+        Nota: por simplicidad asumimos que el dataset refleja un periodo anual.
+        Si en el futuro quieres anualizar (ej. dataset = 1 mes) se puede añadir
+        un factor de escalado en EconomyConfig.
+        """
+        cpi_table = self.cpi_by_skill_channel()
+        if cpi_table.empty:
+            return pd.DataFrame()
+
+        df = self.df.copy()
+        volume = (
+            df.groupby(["queue_skill", "channel"])["interaction_id"]
+            .nunique()
+            .rename("volume")
+        )
+
+        joined = cpi_table.join(volume, how="left").fillna({"volume": 0})
+        joined["annual_cost"] = (joined["cpi_total"] * joined["volume"]).round(2)
+
+        return joined
+
+    # ------------------------------------------------------------------ #
+    # KPI 3: desglose de costes (labor / tech / overhead)
+    # ------------------------------------------------------------------ #
+    def cost_breakdown(self) -> Dict[str, float]:
+        """
+        Desglose % de costes: labor, overhead, tech.
+
+        labor_total = sum(labor_cost_per_interaction)
+        overhead_total = labor_total * overhead_rate
+        tech_total = tech_costs_annual (si se ha proporcionado)
+
+        Devuelve porcentajes sobre el total.
+        Si falta configuración de coste -> devuelve {}.
+        """
+        if not self._has_cost_config():
+            return {}
+
+        cfg = self.config
+        assert cfg is not None
+
+        cpi_table = self.cpi_by_skill_channel()
+        if cpi_table.empty:
+            return {}
+
+        df = self.df.copy()
+        volume = (
+            df.groupby(["queue_skill", "channel"])["interaction_id"]
+            .nunique()
+            .rename("volume")
+        )
+
+        joined = cpi_table.join(volume, how="left").fillna({"volume": 0})
+
+        # Costes anuales de labor y overhead
+        annual_labor = (joined["labor_cost"] * joined["volume"]).sum()
+        annual_overhead = (joined["overhead_cost"] * joined["volume"]).sum()
+        annual_tech = cfg.tech_costs_annual
+
+        total = annual_labor + annual_overhead + annual_tech
+        if total <= 0:
+            return {}
+
+        return {
+            "labor_pct": round(annual_labor / total * 100, 2),
+            "overhead_pct": round(annual_overhead / total * 100, 2),
+            "tech_pct": round(annual_tech / total * 100, 2),
+            "labor_annual": round(annual_labor, 2),
+            "overhead_annual": round(annual_overhead, 2),
+            "tech_annual": round(annual_tech, 2),
+            "total_annual": round(total, 2),
+        }
+
+    # ------------------------------------------------------------------ #
+    # KPI 4: coste de ineficiencia (€ por variabilidad/escalación)
+    # ------------------------------------------------------------------ #
+    def inefficiency_cost_by_skill_channel(self) -> pd.DataFrame:
+        """
+        Estimación muy simplificada de coste de ineficiencia:
+
+        Para cada skill/canal:
+
+          - AHT_p50, AHT_p90 (segundos).
+          - Delta = max(0, AHT_p90 - AHT_p50).
+          - Se asume que ~40% de las interacciones están por encima de la mediana.
+          - Ineff_seconds = Delta * volume * 0.4
+          - Ineff_cost = LaborCPI_per_second * Ineff_seconds
+
+        ⚠️ Es un modelo aproximado para cuantificar "orden de magnitud".
+        """
+        if not self._has_cost_config():
+            return pd.DataFrame()
+
+        cfg = self.config
+        assert cfg is not None
+
+        df = self.df.copy()
+        grouped = df.groupby(["queue_skill", "channel"])
+
+        stats = grouped["handle_time"].agg(
+            aht_p50=lambda s: float(np.percentile(s.dropna(), 50)),
+            aht_p90=lambda s: float(np.percentile(s.dropna(), 90)),
+            volume="count",
+        )
+
+        if stats.empty:
+            return pd.DataFrame()
+
+        # CPI para obtener coste/segundo de labor
+        cpi_table = self.cpi_by_skill_channel()
+        if cpi_table.empty:
+            return pd.DataFrame()
+
+        merged = stats.join(cpi_table[["labor_cost"]], how="left")
+        merged = merged.fillna(0.0)
+
+        delta = (merged["aht_p90"] - merged["aht_p50"]).clip(lower=0.0)
+        affected_fraction = 0.4  # aproximación
+        ineff_seconds = delta * merged["volume"] * affected_fraction
+
+        # labor_cost = coste por interacción con AHT medio;
+        # aproximamos coste/segundo como labor_cost / AHT_medio
+        aht_mean = grouped["handle_time"].mean()
+        merged["aht_mean"] = aht_mean
+
+        cost_per_second = merged["labor_cost"] / merged["aht_mean"].replace(0, np.nan)
+        cost_per_second = cost_per_second.fillna(0.0)
+
+        ineff_cost = (ineff_seconds * cost_per_second).round(2)
+
+        merged["ineff_seconds"] = ineff_seconds.round(2)
+        merged["ineff_cost"] = ineff_cost
+
+        return merged[["aht_p50", "aht_p90", "volume", "ineff_seconds", "ineff_cost"]]
+
+    # ------------------------------------------------------------------ #
+    # KPI 5: ahorro potencial anual por automatización
+    # ------------------------------------------------------------------ #
+    def potential_savings(self) -> Dict[str, Any]:
+        """
+        Ahorro potencial anual basado en:
+
+        Ahorro = (CPI_humano - CPI_automatizado) * Volumen_automatizable * Tasa_éxito
+
+        Donde:
+        - CPI_humano = media ponderada de cpi_total.
+        - CPI_automatizado = config.automation_cpi
+        - Volumen_automatizable = volume_total * automation_volume_share
+        - Tasa_éxito = automation_success_rate
+
+        Si faltan parámetros en config -> devuelve {}.
+        """
+        if not self._has_cost_config():
+            return {}
+
+        cfg = self.config
+        assert cfg is not None
+
+        if cfg.automation_cpi is None or cfg.automation_volume_share <= 0 or cfg.automation_success_rate <= 0:
+            return {}
+
+        cpi_table = self.annual_cost_by_skill_channel()
+        if cpi_table.empty:
+            return {}
+
+        total_volume = cpi_table["volume"].sum()
+        if total_volume <= 0:
+            return {}
+
+        # CPI humano medio ponderado
+        weighted_cpi = (
+            (cpi_table["cpi_total"] * cpi_table["volume"]).sum() / total_volume
+        )
+
+        volume_automatizable = total_volume * cfg.automation_volume_share
+        effective_volume = volume_automatizable * cfg.automation_success_rate
+
+        delta_cpi = max(0.0, weighted_cpi - cfg.automation_cpi)
+        annual_savings = delta_cpi * effective_volume
+
+        return {
+            "cpi_humano": round(weighted_cpi, 4),
+            "cpi_automatizado": round(cfg.automation_cpi, 4),
+            "volume_total": float(total_volume),
+            "volume_automatizable": float(volume_automatizable),
+            "effective_volume": float(effective_volume),
+            "annual_savings": round(annual_savings, 2),
+        }
+
+    # ------------------------------------------------------------------ #
+    # PLOTS
+    # ------------------------------------------------------------------ #
+    def plot_cost_waterfall(self) -> Axes:
+        """
+        Waterfall de costes anuales (labor + tech + overhead).
+        """
+        breakdown = self.cost_breakdown()
+        if not breakdown:
+            fig, ax = plt.subplots()
+            ax.text(0.5, 0.5, "Sin configuración de costes", ha="center", va="center")
+            ax.set_axis_off()
+            return ax
+
+        labels = ["Labor", "Overhead", "Tech"]
+        values = [
+            breakdown["labor_annual"],
+            breakdown["overhead_annual"],
+            breakdown["tech_annual"],
+        ]
+
+        fig, ax = plt.subplots(figsize=(8, 4))
+
+        running = 0.0
+        positions = []
+        bottoms = []
+
+        for v in values:
+            positions.append(running)
+            bottoms.append(running)
+            running += v
+
+        # barras estilo waterfall
+        x = np.arange(len(labels))
+        ax.bar(x, values)
+
+        ax.set_xticks(x)
+        ax.set_xticklabels(labels)
+        ax.set_ylabel("€ anuales")
+        ax.set_title("Desglose anual de costes")
+
+        for idx, v in enumerate(values):
+            ax.text(idx, v, f"{v:,.0f}", ha="center", va="bottom")
+
+        ax.grid(axis="y", alpha=0.3)
+
+        return ax
+
+    def plot_cpi_by_channel(self) -> Axes:
+        """
+        Gráfico de barras de CPI medio por canal.
+        """
+        cpi_table = self.cpi_by_skill_channel()
+        if cpi_table.empty:
+            fig, ax = plt.subplots()
+            ax.text(0.5, 0.5, "Sin configuración de costes", ha="center", va="center")
+            ax.set_axis_off()
+            return ax
+
+        df = self.df.copy()
+        volume = (
+            df.groupby(["queue_skill", "channel"])["interaction_id"]
+            .nunique()
+            .rename("volume")
+        )
+
+        joined = cpi_table.join(volume, how="left").fillna({"volume": 0})
+
+        # CPI medio ponderado por canal
+        per_channel = (
+            joined.reset_index()
+            .groupby("channel")
+            .apply(lambda g: (g["cpi_total"] * g["volume"]).sum() / max(g["volume"].sum(), 1))
+            .rename("cpi_mean")
+            .round(4)
+        )
+
+        fig, ax = plt.subplots(figsize=(6, 4))
+        per_channel.plot(kind="bar", ax=ax)
+
+        ax.set_xlabel("Canal")
+        ax.set_ylabel("CPI medio (€)")
+        ax.set_title("Coste por interacción (CPI) por canal")
+        ax.grid(axis="y", alpha=0.3)
+
+        return ax
--- a/backend/beyond_metrics/dimensions/OperationalPerformance.py
+++ b/backend/beyond_metrics/dimensions/OperationalPerformance.py
@@ -0,0 +1,481 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Dict, List
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from matplotlib.axes import Axes
+
+
+REQUIRED_COLUMNS_OP: List[str] = [
+    "interaction_id",
+    "datetime_start",
+    "queue_skill",
+    "channel",
+    "duration_talk",
+    "hold_time",
+    "wrap_up_time",
+    "agent_id",
+    "transfer_flag",
+]
+
+
+@dataclass
+class OperationalPerformanceMetrics:
+    """
+    Dimensión: RENDIMIENTO OPERACIONAL Y DE SERVICIO
+
+    Propósito: medir el balance entre rapidez (eficiencia) y calidad de resolución,
+    más la variabilidad del servicio.
+
+    Requiere como mínimo:
+    - interaction_id
+    - datetime_start
+    - queue_skill
+    - channel
+    - duration_talk (segundos)
+    - hold_time (segundos)
+    - wrap_up_time (segundos)
+    - agent_id
+    - transfer_flag (bool/int)
+
+    Columnas opcionales:
+    - is_resolved (bool/int)      -> para FCR
+    - abandoned_flag (bool/int)   -> para tasa de abandono
+    - customer_id / caller_id     -> para reincidencia y repetición de canal
+    - logged_time (segundos)      -> para occupancy_rate
+    """
+
+    df: pd.DataFrame
+
+    # Benchmarks / parámetros de normalización (puedes ajustarlos)
+    AHT_GOOD: float = 300.0     # 5 min
+    AHT_BAD: float = 900.0      # 15 min
+    VAR_RATIO_GOOD: float = 1.2 # P90/P50 ~1.2 muy estable
+    VAR_RATIO_BAD: float = 3.0  # P90/P50 >=3 muy inestable
+
+    def __post_init__(self) -> None:
+        self._validate_columns()
+        self._prepare_data()
+
+    # ------------------------------------------------------------------ #
+    # Helpers internos
+    # ------------------------------------------------------------------ #
+    def _validate_columns(self) -> None:
+        missing = [c for c in REQUIRED_COLUMNS_OP if c not in self.df.columns]
+        if missing:
+            raise ValueError(
+                f"Faltan columnas obligatorias para OperationalPerformanceMetrics: {missing}"
+            )
+
+    def _prepare_data(self) -> None:
+        df = self.df.copy()
+
+        # Tipos
+        df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
+
+        for col in ["duration_talk", "hold_time", "wrap_up_time"]:
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+
+        # Handle Time
+        df["handle_time"] = (
+            df["duration_talk"].fillna(0)
+            + df["hold_time"].fillna(0)
+            + df["wrap_up_time"].fillna(0)
+        )
+
+        # Normalización básica
+        df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
+        df["channel"] = df["channel"].astype(str).str.strip()
+        df["agent_id"] = df["agent_id"].astype(str).str.strip()
+
+        # Flags opcionales convertidos a bool cuando existan
+        for flag_col in ["is_resolved", "abandoned_flag", "transfer_flag"]:
+            if flag_col in df.columns:
+                df[flag_col] = df[flag_col].astype(int).astype(bool)
+
+        # customer_id: usamos customer_id si existe, si no caller_id
+        if "customer_id" in df.columns:
+            df["customer_id"] = df["customer_id"].astype(str)
+        elif "caller_id" in df.columns:
+            df["customer_id"] = df["caller_id"].astype(str)
+        else:
+            df["customer_id"] = None
+
+        # logged_time opcional
+        # Normalizamos logged_time: siempre será una serie float con NaN si no existe
+        df["logged_time"] = pd.to_numeric(df.get("logged_time", np.nan), errors="coerce")
+
+
+        self.df = df
+
+    @property
+    def is_empty(self) -> bool:
+        return self.df.empty
+
+    # ------------------------------------------------------------------ #
+    # AHT y variabilidad
+    # ------------------------------------------------------------------ #
+    def aht_distribution(self) -> Dict[str, float]:
+        """
+        Devuelve P10, P50, P90 del AHT y el ratio P90/P50 como medida de variabilidad.
+        """
+        ht = self.df["handle_time"].dropna().astype(float)
+        if ht.empty:
+            return {}
+
+        p10 = float(np.percentile(ht, 10))
+        p50 = float(np.percentile(ht, 50))
+        p90 = float(np.percentile(ht, 90))
+        ratio = float(p90 / p50) if p50 > 0 else float("nan")
+
+        return {
+            "p10": round(p10, 2),
+            "p50": round(p50, 2),
+            "p90": round(p90, 2),
+            "p90_p50_ratio": round(ratio, 3),
+        }
+
+    def talk_hold_acw_p50_by_skill(self) -> pd.DataFrame:
+        """
+        P50 de talk_time, hold_time y wrap_up_time por skill.
+        """
+        df = self.df
+
+        def perc(s: pd.Series, q: float) -> float:
+            s = s.dropna().astype(float)
+            if s.empty:
+                return float("nan")
+            return float(np.percentile(s, q))
+
+        grouped = df.groupby("queue_skill")
+        result = pd.DataFrame(
+            {
+                "talk_p50": grouped["duration_talk"].apply(lambda s: perc(s, 50)),
+                "hold_p50": grouped["hold_time"].apply(lambda s: perc(s, 50)),
+                "acw_p50": grouped["wrap_up_time"].apply(lambda s: perc(s, 50)),
+            }
+        )
+        return result.round(2).sort_index()
+
+    # ------------------------------------------------------------------ #
+    # FCR, escalación, abandono, reincidencia, repetición canal
+    # ------------------------------------------------------------------ #
+    def fcr_rate(self) -> float:
+        """
+        FCR = % de interacciones resueltas en el primer contacto.
+
+        Definido como % de filas con is_resolved == True.
+        Si la columna no existe, devuelve NaN.
+        """
+        df = self.df
+        if "is_resolved" not in df.columns:
+            return float("nan")
+
+        total = len(df)
+        if total == 0:
+            return float("nan")
+
+        resolved = df["is_resolved"].sum()
+        return float(round(resolved / total * 100, 2))
+
+    def escalation_rate(self) -> float:
+        """
+        % de interacciones que requieren escalación (transfer_flag == True).
+        """
+        df = self.df
+        total = len(df)
+        if total == 0:
+            return float("nan")
+
+        escalated = df["transfer_flag"].sum()
+        return float(round(escalated / total * 100, 2))
+
+    def abandonment_rate(self) -> float:
+        """
+        % de interacciones abandonadas.
+
+        Definido como % de filas con abandoned_flag == True.
+        Si la columna no existe, devuelve NaN.
+        """
+        df = self.df
+        if "abandoned_flag" not in df.columns:
+            return float("nan")
+
+        total = len(df)
+        if total == 0:
+            return float("nan")
+
+        abandoned = df["abandoned_flag"].sum()
+        return float(round(abandoned / total * 100, 2))
+
+    def recurrence_rate_7d(self) -> float:
+        """
+        % de clientes que vuelven a contactar en < 7 días.
+
+        Se basa en customer_id (o caller_id si no hay customer_id).
+        Calcula:
+        - Para cada cliente, ordena por datetime_start
+        - Si hay dos contactos consecutivos separados < 7 días, cuenta como "recurrente"
+        - Tasa = nº clientes recurrentes / nº total de clientes
+        """
+        df = self.df.dropna(subset=["datetime_start"]).copy()
+        if df["customer_id"].isna().all():
+            return float("nan")
+
+        customers = df["customer_id"].dropna().unique()
+        if len(customers) == 0:
+            return float("nan")
+
+        recurrent_customers = 0
+
+        for cust in customers:
+            sub = df[df["customer_id"] == cust].sort_values("datetime_start")
+            if len(sub) < 2:
+                continue
+            deltas = sub["datetime_start"].diff().dropna()
+            if (deltas < pd.Timedelta(days=7)).any():
+                recurrent_customers += 1
+
+        if len(customers) == 0:
+            return float("nan")
+
+        return float(round(recurrent_customers / len(customers) * 100, 2))
+
+    def repeat_channel_rate(self) -> float:
+        """
+        % de reincidencias (<7 días) en las que el cliente usa el MISMO canal.
+
+        Si no hay customer_id/caller_id o solo un contacto por cliente, devuelve NaN.
+        """
+        df = self.df.dropna(subset=["datetime_start"]).copy()
+        if df["customer_id"].isna().all():
+            return float("nan")
+
+        df = df.sort_values(["customer_id", "datetime_start"])
+        df["next_customer"] = df["customer_id"].shift(-1)
+        df["next_datetime"] = df["datetime_start"].shift(-1)
+        df["next_channel"] = df["channel"].shift(-1)
+
+        same_customer = df["customer_id"] == df["next_customer"]
+        within_7d = (df["next_datetime"] - df["datetime_start"]) < pd.Timedelta(days=7)
+
+        recurrent_mask = same_customer & within_7d
+        if not recurrent_mask.any():
+            return float("nan")
+
+        same_channel = df["channel"] == df["next_channel"]
+        same_channel_recurrent = (recurrent_mask & same_channel).sum()
+        total_recurrent = recurrent_mask.sum()
+
+        return float(round(same_channel_recurrent / total_recurrent * 100, 2))
+
+    # ------------------------------------------------------------------ #
+    # Occupancy
+    # ------------------------------------------------------------------ #
+    def occupancy_rate(self) -> float:
+        """
+        Tasa de ocupación:
+
+        occupancy = sum(handle_time) / sum(logged_time) * 100.
+
+        Requiere columna 'logged_time'. Si no existe o es todo 0, devuelve NaN.
+        """
+        df = self.df
+        if "logged_time" not in df.columns:
+            return float("nan")
+
+        logged = df["logged_time"].fillna(0)
+        handle = df["handle_time"].fillna(0)
+
+        total_logged = logged.sum()
+        if total_logged == 0:
+            return float("nan")
+
+        occ = handle.sum() / total_logged
+        return float(round(occ * 100, 2))
+
+    # ------------------------------------------------------------------ #
+    # Score de rendimiento 0-10
+    # ------------------------------------------------------------------ #
+    def performance_score(self) -> Dict[str, float]:
+        """
+        Calcula un score 0-10 combinando:
+        - AHT (bajo es mejor)
+        - FCR (alto es mejor)
+        - Variabilidad (P90/P50, bajo es mejor)
+        - Otros factores (ocupación / escalación)
+
+        Fórmula:
+        score = 0.4 * (10 - AHT_norm) +
+                0.3 * FCR_norm +
+                0.2 * (10 - Var_norm) +
+                0.1 * Otros_score
+
+        Donde *_norm son valores en escala 0-10.
+        """
+        dist = self.aht_distribution()
+        if not dist:
+            return {"score": float("nan")}
+
+        p50 = dist["p50"]
+        ratio = dist["p90_p50_ratio"]
+
+        # AHT_normalized: 0 (mejor) a 10 (peor)
+        aht_norm = self._scale_to_0_10(p50, self.AHT_GOOD, self.AHT_BAD)
+        # FCR_normalized: 0-10 directamente desde % (0-100)
+        fcr_pct = self.fcr_rate()
+        fcr_norm = fcr_pct / 10.0 if not np.isnan(fcr_pct) else 0.0
+        # Variabilidad_normalized: 0 (ratio bueno) a 10 (ratio malo)
+        var_norm = self._scale_to_0_10(ratio, self.VAR_RATIO_GOOD, self.VAR_RATIO_BAD)
+
+        # Otros factores: combinamos ocupación (ideal ~80%) y escalación (ideal baja)
+        occ = self.occupancy_rate()
+        esc = self.escalation_rate()
+
+        other_score = self._compute_other_factors_score(occ, esc)
+
+        score = (
+            0.4 * (10.0 - aht_norm)
+            + 0.3 * fcr_norm
+            + 0.2 * (10.0 - var_norm)
+            + 0.1 * other_score
+        )
+
+        # Clamp 0-10
+        score = max(0.0, min(10.0, score))
+
+        return {
+            "score": round(score, 2),
+            "aht_norm": round(aht_norm, 2),
+            "fcr_norm": round(fcr_norm, 2),
+            "var_norm": round(var_norm, 2),
+            "other_score": round(other_score, 2),
+        }
+
+    def _scale_to_0_10(self, value: float, good: float, bad: float) -> float:
+        """
+        Escala linealmente un valor:
+        - good -> 0
+        - bad  -> 10
+        Con saturación fuera de rango.
+        """
+        if np.isnan(value):
+            return 5.0  # neutro
+
+        if good == bad:
+            return 5.0
+
+        if good < bad:
+            # Menor es mejor
+            if value <= good:
+                return 0.0
+            if value >= bad:
+                return 10.0
+            return 10.0 * (value - good) / (bad - good)
+        else:
+            # Mayor es mejor
+            if value >= good:
+                return 0.0
+            if value <= bad:
+                return 10.0
+            return 10.0 * (good - value) / (good - bad)
+
+    def _compute_other_factors_score(self, occ_pct: float, esc_pct: float) -> float:
+        """
+        Otros factores (0-10) basados en:
+        - ocupación ideal alrededor de 80%
+        - tasa de escalación ideal baja (<10%)
+        """
+        # Ocupación: 0 penalización si está entre 75-85, se penaliza fuera
+        if np.isnan(occ_pct):
+            occ_penalty = 5.0
+        else:
+            deviation = abs(occ_pct - 80.0)
+            occ_penalty = min(10.0, deviation / 5.0 * 2.0)  # cada 5 puntos se suman 2, máx 10
+        occ_score = max(0.0, 10.0 - occ_penalty)
+
+        # Escalación: 0-10 donde 0% -> 10 puntos, >=40% -> 0
+        if np.isnan(esc_pct):
+            esc_score = 5.0
+        else:
+            if esc_pct <= 0:
+                esc_score = 10.0
+            elif esc_pct >= 40:
+                esc_score = 0.0
+            else:
+                esc_score = 10.0 * (1.0 - esc_pct / 40.0)
+
+        # Media simple de ambos
+        return (occ_score + esc_score) / 2.0
+
+    # ------------------------------------------------------------------ #
+    # Plots
+    # ------------------------------------------------------------------ #
+    def plot_aht_boxplot_by_skill(self) -> Axes:
+        """
+        Boxplot del AHT por skill (P10-P50-P90 visual).
+        """
+        df = self.df.copy()
+
+        if df.empty or "handle_time" not in df.columns:
+            fig, ax = plt.subplots()
+            ax.text(0.5, 0.5, "Sin datos de AHT", ha="center", va="center")
+            ax.set_axis_off()
+            return ax
+
+        df = df.dropna(subset=["handle_time"])
+        if df.empty:
+            fig, ax = plt.subplots()
+            ax.text(0.5, 0.5, "AHT no disponible", ha="center", va="center")
+            ax.set_axis_off()
+            return ax
+
+        fig, ax = plt.subplots(figsize=(8, 4))
+        df.boxplot(column="handle_time", by="queue_skill", ax=ax, showfliers=False)
+
+        ax.set_xlabel("Skill / Cola")
+        ax.set_ylabel("AHT (segundos)")
+        ax.set_title("Distribución de AHT por skill")
+        plt.suptitle("")
+        plt.xticks(rotation=45, ha="right")
+        ax.grid(axis="y", alpha=0.3)
+
+        return ax
+
+    def plot_resolution_funnel_by_skill(self) -> Axes:
+        """
+        Funnel / barras apiladas de Talk + Hold + ACW por skill (P50).
+
+        Permite ver el equilibrio de tiempos por skill.
+        """
+        p50 = self.talk_hold_acw_p50_by_skill()
+        if p50.empty:
+            fig, ax = plt.subplots()
+            ax.text(0.5, 0.5, "Sin datos para funnel", ha="center", va="center")
+            ax.set_axis_off()
+            return ax
+
+        fig, ax = plt.subplots(figsize=(10, 4))
+
+        skills = p50.index
+        talk = p50["talk_p50"]
+        hold = p50["hold_p50"]
+        acw = p50["acw_p50"]
+
+        x = np.arange(len(skills))
+
+        ax.bar(x, talk, label="Talk P50")
+        ax.bar(x, hold, bottom=talk, label="Hold P50")
+        ax.bar(x, acw, bottom=talk + hold, label="ACW P50")
+
+        ax.set_xticks(x)
+        ax.set_xticklabels(skills, rotation=45, ha="right")
+        ax.set_ylabel("Segundos")
+        ax.set_title("Funnel de resolución (P50) por skill")
+        ax.legend()
+        ax.grid(axis="y", alpha=0.3)
+
+        return ax
--- a/backend/beyond_metrics/dimensions/SatisfactionExperience.py
+++ b/backend/beyond_metrics/dimensions/SatisfactionExperience.py
@@ -0,0 +1,298 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Dict, List, Any
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from matplotlib.axes import Axes
+
+
+# Solo columnas del dataset “core”
+REQUIRED_COLUMNS_SAT: List[str] = [
+    "interaction_id",
+    "datetime_start",
+    "queue_skill",
+    "channel",
+    "duration_talk",
+    "hold_time",
+    "wrap_up_time",
+]
+
+
+@dataclass
+class SatisfactionExperienceMetrics:
+    """
+    Dimensión 3: SATISFACCIÓN y EXPERIENCIA
+
+    Todas las columnas de satisfacción (csat/nps/ces/aht) son OPCIONALES.
+    Si no están, las métricas que las usan devuelven vacío/NaN pero
+    nunca rompen el pipeline.
+    """
+
+    df: pd.DataFrame
+
+    def __post_init__(self) -> None:
+        self._validate_columns()
+        self._prepare_data()
+
+    # ------------------------------------------------------------------ #
+    # Helpers
+    # ------------------------------------------------------------------ #
+    def _validate_columns(self) -> None:
+        missing = [c for c in REQUIRED_COLUMNS_SAT if c not in self.df.columns]
+        if missing:
+            raise ValueError(
+                f"Faltan columnas obligatorias para SatisfactionExperienceMetrics: {missing}"
+            )
+
+    def _prepare_data(self) -> None:
+        df = self.df.copy()
+
+        df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
+
+        # Duraciones base siempre existen
+        for col in ["duration_talk", "hold_time", "wrap_up_time"]:
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+
+        # Handle time
+        df["handle_time"] = (
+            df["duration_talk"].fillna(0)
+            + df["hold_time"].fillna(0)
+            + df["wrap_up_time"].fillna(0)
+        )
+
+        # csat_score opcional
+        df["csat_score"] = pd.to_numeric(df.get("csat_score", np.nan), errors="coerce")
+
+        # aht opcional: si existe columna explícita la usamos, si no usamos handle_time
+        if "aht" in df.columns:
+            df["aht"] = pd.to_numeric(df["aht"], errors="coerce")
+        else:
+            df["aht"] = df["handle_time"]
+
+        # NPS / CES opcionales
+        df["nps_score"] = pd.to_numeric(df.get("nps_score", np.nan), errors="coerce")
+        df["ces_score"] = pd.to_numeric(df.get("ces_score", np.nan), errors="coerce")
+
+        df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
+        df["channel"] = df["channel"].astype(str).str.strip()
+
+        self.df = df
+
+    @property
+    def is_empty(self) -> bool:
+        return self.df.empty
+
+    # ------------------------------------------------------------------ #
+    # KPIs
+    # ------------------------------------------------------------------ #
+    def csat_avg_by_skill_channel(self) -> pd.DataFrame:
+        """
+        CSAT promedio por skill/canal.
+        Si no hay csat_score, devuelve DataFrame vacío.
+        """
+        df = self.df
+        if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0:
+            return pd.DataFrame()
+
+        df = df.dropna(subset=["csat_score"])
+        if df.empty:
+            return pd.DataFrame()
+
+        pivot = (
+            df.pivot_table(
+                index="queue_skill",
+                columns="channel",
+                values="csat_score",
+                aggfunc="mean",
+            )
+            .sort_index()
+            .round(2)
+        )
+        return pivot
+
+    def nps_avg_by_skill_channel(self) -> pd.DataFrame:
+        """
+        NPS medio por skill/canal, si existe nps_score.
+        """
+        df = self.df
+        if "nps_score" not in df.columns or df["nps_score"].notna().sum() == 0:
+            return pd.DataFrame()
+
+        df = df.dropna(subset=["nps_score"])
+        if df.empty:
+            return pd.DataFrame()
+
+        pivot = (
+            df.pivot_table(
+                index="queue_skill",
+                columns="channel",
+                values="nps_score",
+                aggfunc="mean",
+            )
+            .sort_index()
+            .round(2)
+        )
+        return pivot
+
+    def ces_avg_by_skill_channel(self) -> pd.DataFrame:
+        """
+        CES medio por skill/canal, si existe ces_score.
+        """
+        df = self.df
+        if "ces_score" not in df.columns or df["ces_score"].notna().sum() == 0:
+            return pd.DataFrame()
+
+        df = df.dropna(subset=["ces_score"])
+        if df.empty:
+            return pd.DataFrame()
+
+        pivot = (
+            df.pivot_table(
+                index="queue_skill",
+                columns="channel",
+                values="ces_score",
+                aggfunc="mean",
+            )
+            .sort_index()
+            .round(2)
+        )
+        return pivot
+
+    def csat_aht_correlation(self) -> Dict[str, Any]:
+        """
+        Correlación Pearson CSAT vs AHT.
+        Si falta csat o aht, o no hay varianza, devuelve NaN y código adecuado.
+        """
+        df = self.df
+        if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0:
+            return {"r": float("nan"), "n": 0.0, "interpretation_code": "sin_datos"}
+        if "aht" not in df.columns or df["aht"].notna().sum() == 0:
+            return {"r": float("nan"), "n": 0.0, "interpretation_code": "sin_datos"}
+
+        df = df.dropna(subset=["csat_score", "aht"]).copy()
+        n = len(df)
+        if n < 2:
+            return {"r": float("nan"), "n": float(n), "interpretation_code": "insuficiente"}
+
+        x = df["aht"].astype(float)
+        y = df["csat_score"].astype(float)
+
+        if x.std(ddof=1) == 0 or y.std(ddof=1) == 0:
+            return {"r": float("nan"), "n": float(n), "interpretation_code": "sin_varianza"}
+
+        r = float(np.corrcoef(x, y)[0, 1])
+
+        if r < -0.3:
+            interpretation = "negativo"
+        elif r > 0.3:
+            interpretation = "positivo"
+        else:
+            interpretation = "neutral"
+
+        return {"r": round(r, 3), "n": float(n), "interpretation_code": interpretation}
+
+    def csat_aht_skill_summary(self) -> pd.DataFrame:
+        """
+        Resumen por skill con clasificación del "sweet spot".
+        Si falta csat o aht, devuelve DataFrame vacío.
+        """
+        df = self.df
+        if df["csat_score"].notna().sum() == 0 or df["aht"].notna().sum() == 0:
+            return pd.DataFrame(columns=["csat_avg", "aht_avg", "classification"])
+
+        df = df.dropna(subset=["csat_score", "aht"]).copy()
+        if df.empty:
+            return pd.DataFrame(columns=["csat_avg", "aht_avg", "classification"])
+
+        grouped = df.groupby("queue_skill").agg(
+            csat_avg=("csat_score", "mean"),
+            aht_avg=("aht", "mean"),
+        )
+
+        aht_all = df["aht"].astype(float)
+        csat_all = df["csat_score"].astype(float)
+
+        aht_p40 = float(np.percentile(aht_all, 40))
+        aht_p60 = float(np.percentile(aht_all, 60))
+        csat_p40 = float(np.percentile(csat_all, 40))
+        csat_p60 = float(np.percentile(csat_all, 60))
+
+        def classify(row) -> str:
+            csat = row["csat_avg"]
+            aht = row["aht_avg"]
+
+            if aht <= aht_p40 and csat >= csat_p60:
+                return "ideal_automatizar"
+            if aht >= aht_p60 and csat >= csat_p40:
+                return "requiere_humano"
+            return "neutral"
+
+        grouped["classification"] = grouped.apply(classify, axis=1)
+        return grouped.round({"csat_avg": 2, "aht_avg": 2})
+
+    # ------------------------------------------------------------------ #
+    # Plots
+    # ------------------------------------------------------------------ #
+    def plot_csat_vs_aht_scatter(self) -> Axes:
+        """
+        Scatter CSAT vs AHT por skill.
+        Si no hay datos suficientes, devuelve un Axes con mensaje.
+        """
+        df = self.df
+        if df["csat_score"].notna().sum() == 0 or df["aht"].notna().sum() == 0:
+            fig, ax = plt.subplots()
+            ax.text(0.5, 0.5, "Sin datos de CSAT/AHT", ha="center", va="center")
+            ax.set_axis_off()
+            return ax
+
+        df = df.dropna(subset=["csat_score", "aht"]).copy()
+        if df.empty:
+            fig, ax = plt.subplots()
+            ax.text(0.5, 0.5, "Sin datos de CSAT/AHT", ha="center", va="center")
+            ax.set_axis_off()
+            return ax
+
+        fig, ax = plt.subplots(figsize=(8, 5))
+
+        for skill, sub in df.groupby("queue_skill"):
+            ax.scatter(sub["aht"], sub["csat_score"], label=skill, alpha=0.7)
+
+        ax.set_xlabel("AHT (segundos)")
+        ax.set_ylabel("CSAT")
+        ax.set_title("CSAT vs AHT por skill")
+        ax.grid(alpha=0.3)
+        ax.legend(title="Skill", bbox_to_anchor=(1.05, 1), loc="upper left")
+
+        plt.tight_layout()
+        return ax
+
+    def plot_csat_distribution(self) -> Axes:
+        """
+        Histograma de CSAT.
+        Si no hay csat_score, devuelve un Axes con mensaje.
+        """
+        df = self.df
+        if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0:
+            fig, ax = plt.subplots()
+            ax.text(0.5, 0.5, "Sin datos de CSAT", ha="center", va="center")
+            ax.set_axis_off()
+            return ax
+
+        df = df.dropna(subset=["csat_score"]).copy()
+        if df.empty:
+            fig, ax = plt.subplots()
+            ax.text(0.5, 0.5, "Sin datos de CSAT", ha="center", va="center")
+            ax.set_axis_off()
+            return ax
+
+        fig, ax = plt.subplots(figsize=(6, 4))
+        ax.hist(df["csat_score"], bins=10, alpha=0.7)
+        ax.set_xlabel("CSAT")
+        ax.set_ylabel("Frecuencia")
+        ax.set_title("Distribución de CSAT")
+        ax.grid(axis="y", alpha=0.3)
+
+        return ax
--- a/backend/beyond_metrics/dimensions/Volumetria.py
+++ b/backend/beyond_metrics/dimensions/Volumetria.py
@@ -0,0 +1,268 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import List
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from matplotlib.axes import Axes
+
+
+REQUIRED_COLUMNS_VOLUMETRIA: List[str] = [
+    "interaction_id",
+    "datetime_start",
+    "queue_skill",
+    "channel",
+]
+
+
+@dataclass
+class VolumetriaMetrics:
+    """
+    Métricas de volumetría basadas en el nuevo esquema de datos.
+
+    Columnas mínimas requeridas:
+    - interaction_id
+    - datetime_start
+    - queue_skill
+    - channel
+
+    Otras columnas pueden existir pero no son necesarias para estas métricas.
+    """
+
+    df: pd.DataFrame
+
+    def __post_init__(self) -> None:
+        self._validate_columns()
+        self._prepare_data()
+
+    # ------------------------------------------------------------------ #
+    # Helpers internos
+    # ------------------------------------------------------------------ #
+    def _validate_columns(self) -> None:
+        missing = [c for c in REQUIRED_COLUMNS_VOLUMETRIA if c not in self.df.columns]
+        if missing:
+            raise ValueError(
+                f"Faltan columnas obligatorias para VolumetriaMetrics: {missing}"
+            )
+
+    def _prepare_data(self) -> None:
+        df = self.df.copy()
+
+        # Asegurar tipo datetime
+        df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
+
+        # Normalizar strings
+        df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
+        df["channel"] = df["channel"].astype(str).str.strip()
+
+        # Guardamos el df preparado
+        self.df = df
+
+    # ------------------------------------------------------------------ #
+    # Propiedades útiles
+    # ------------------------------------------------------------------ #
+    @property
+    def is_empty(self) -> bool:
+        return self.df.empty
+
+    # ------------------------------------------------------------------ #
+    # Métricas numéricas / tabulares
+    # ------------------------------------------------------------------ #
+    def volume_by_channel(self) -> pd.Series:
+        """
+        Nº de interacciones por canal.
+        """
+        return self.df.groupby("channel")["interaction_id"].nunique().sort_values(
+            ascending=False
+        )
+
+    def volume_by_skill(self) -> pd.Series:
+        """
+        Nº de interacciones por skill / cola.
+        """
+        return self.df.groupby("queue_skill")["interaction_id"].nunique().sort_values(
+            ascending=False
+        )
+
+    def channel_distribution_pct(self) -> pd.Series:
+        """
+        Distribución porcentual del volumen por canal.
+        """
+        counts = self.volume_by_channel()
+        total = counts.sum()
+        if total == 0:
+            return counts * 0.0
+        return (counts / total * 100).round(2)
+
+    def skill_distribution_pct(self) -> pd.Series:
+        """
+        Distribución porcentual del volumen por skill.
+        """
+        counts = self.volume_by_skill()
+        total = counts.sum()
+        if total == 0:
+            return counts * 0.0
+        return (counts / total * 100).round(2)
+
+    def heatmap_24x7(self) -> pd.DataFrame:
+        """
+        Matriz [día_semana x hora] con nº de interacciones.
+        dayofweek: 0=Lunes ... 6=Domingo
+        """
+        df = self.df.dropna(subset=["datetime_start"]).copy()
+        if df.empty:
+            # Devolvemos un df vacío pero con índice/columnas esperadas
+            idx = range(7)
+            cols = range(24)
+            return pd.DataFrame(0, index=idx, columns=cols)
+
+        df["dow"] = df["datetime_start"].dt.dayofweek
+        df["hour"] = df["datetime_start"].dt.hour
+
+        pivot = (
+            df.pivot_table(
+                index="dow",
+                columns="hour",
+                values="interaction_id",
+                aggfunc="nunique",
+                fill_value=0,
+            )
+            .reindex(index=range(7), fill_value=0)
+            .reindex(columns=range(24), fill_value=0)
+        )
+
+        return pivot
+
+    def monthly_seasonality_cv(self) -> float:
+        """
+        Coeficiente de variación del volumen mensual.
+        CV = std / mean (en %).
+        """
+        df = self.df.dropna(subset=["datetime_start"]).copy()
+        if df.empty:
+            return float("nan")
+
+        df["year_month"] = df["datetime_start"].dt.to_period("M")
+        monthly_counts = (
+            df.groupby("year_month")["interaction_id"].nunique().astype(float)
+        )
+
+        if len(monthly_counts) < 2:
+            return float("nan")
+
+        mean = monthly_counts.mean()
+        std = monthly_counts.std(ddof=1)
+        if mean == 0:
+            return float("nan")
+
+        return float(round(std / mean * 100, 2))
+
+    def peak_offpeak_ratio(self) -> float:
+        """
+        Ratio de volumen entre horas pico y valle.
+
+        Definimos pico como horas 10:00–19:59, resto valle.
+        """
+        df = self.df.dropna(subset=["datetime_start"]).copy()
+        if df.empty:
+            return float("nan")
+
+        df["hour"] = df["datetime_start"].dt.hour
+
+        peak_hours = list(range(10, 20))
+        is_peak = df["hour"].isin(peak_hours)
+
+        peak_vol = df.loc[is_peak, "interaction_id"].nunique()
+        off_vol = df.loc[~is_peak, "interaction_id"].nunique()
+
+        if off_vol == 0:
+            return float("inf") if peak_vol > 0 else float("nan")
+
+        return float(round(peak_vol / off_vol, 3))
+
+    def concentration_top20_skills_pct(self) -> float:
+        """
+        % del volumen concentrado en el top 20% de skills (por nº de interacciones).
+        """
+        counts = (
+            self.df.groupby("queue_skill")["interaction_id"].nunique().sort_values(
+                ascending=False
+            )
+        )
+
+        n_skills = len(counts)
+        if n_skills == 0:
+            return float("nan")
+
+        top_n = max(1, int(np.ceil(0.2 * n_skills)))
+        top_vol = counts.head(top_n).sum()
+        total = counts.sum()
+
+        if total == 0:
+            return float("nan")
+
+        return float(round(top_vol / total * 100, 2))
+
+    # ------------------------------------------------------------------ #
+    # Plots
+    # ------------------------------------------------------------------ #
+    def plot_heatmap_24x7(self) -> Axes:
+        """
+        Heatmap de volumen por día de la semana (0-6) y hora (0-23).
+        Devuelve Axes para que el pipeline pueda guardar la figura.
+        """
+        data = self.heatmap_24x7()
+
+        fig, ax = plt.subplots(figsize=(10, 4))
+        im = ax.imshow(data.values, aspect="auto", origin="lower")
+
+        ax.set_xticks(range(24))
+        ax.set_xticklabels([str(h) for h in range(24)])
+
+        ax.set_yticks(range(7))
+        ax.set_yticklabels(["L", "M", "X", "J", "V", "S", "D"])
+
+
+        ax.set_xlabel("Hora del día")
+        ax.set_ylabel("Día de la semana")
+        ax.set_title("Volumen por día de la semana y hora")
+
+        plt.colorbar(im, ax=ax, label="Nº interacciones")
+
+        return ax
+
+    def plot_channel_distribution(self) -> Axes:
+        """
+        Distribución de volumen por canal.
+        """
+        series = self.volume_by_channel()
+
+        fig, ax = plt.subplots(figsize=(6, 4))
+        series.plot(kind="bar", ax=ax)
+
+        ax.set_xlabel("Canal")
+        ax.set_ylabel("Nº interacciones")
+        ax.set_title("Volumen por canal")
+        ax.grid(axis="y", alpha=0.3)
+
+        return ax
+
+    def plot_skill_pareto(self) -> Axes:
+        """
+        Pareto simple de volumen por skill (solo barras de volumen).
+        """
+        series = self.volume_by_skill()
+
+        fig, ax = plt.subplots(figsize=(10, 4))
+        series.plot(kind="bar", ax=ax)
+
+        ax.set_xlabel("Skill / Cola")
+        ax.set_ylabel("Nº interacciones")
+        ax.set_title("Pareto de volumen por skill")
+        ax.grid(axis="y", alpha=0.3)
+
+        plt.xticks(rotation=45, ha="right")
+
+        return ax
--- a/backend/beyond_metrics/dimensions/init.py
+++ b/backend/beyond_metrics/dimensions/init.py
@@ -0,0 +1,13 @@
+from .Volumetria import VolumetriaMetrics
+from .OperationalPerformance import OperationalPerformanceMetrics
+from .SatisfactionExperience import SatisfactionExperienceMetrics
+from .EconomyCost import EconomyCostMetrics, EconomyConfig
+
+__all__ = [
+    # Dimensiones
+    "VolumetriaMetrics",
+    "OperationalPerformanceMetrics",
+    "SatisfactionExperienceMetrics",
+    "EconomyCostMetrics",
+    "EconomyConfig",
+]
--- a/backend/beyond_metrics/io/init.py
+++ b/backend/beyond_metrics/io/init.py
@@ -0,0 +1,22 @@
+from .base import DataSource, ResultsSink
+from .local import LocalDataSource, LocalResultsSink
+from .s3 import S3DataSource, S3ResultsSink
+from .google_drive import (
+    GoogleDriveDataSource,
+    GoogleDriveConfig,
+    GoogleDriveResultsSink,
+    GoogleDriveSinkConfig,
+)
+
+__all__ = [
+    "DataSource",
+    "ResultsSink",
+    "LocalDataSource",
+    "LocalResultsSink",
+    "S3DataSource",
+    "S3ResultsSink",
+    "GoogleDriveDataSource",
+    "GoogleDriveConfig",
+    "GoogleDriveResultsSink",
+    "GoogleDriveSinkConfig",
+]
--- a/backend/beyond_metrics/io/base.py
+++ b/backend/beyond_metrics/io/base.py
@@ -0,0 +1,36 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict
+
+import pandas as pd
+from matplotlib.figure import Figure
+
+
+class DataSource(ABC):
+    """Interfaz de lectura de datos (CSV)."""
+
+    @abstractmethod
+    def read_csv(self, path: str) -> pd.DataFrame:
+        """
+        Lee un CSV y devuelve un DataFrame.
+
+        El significado de 'path' depende de la implementación:
+        - LocalDataSource: ruta en el sistema de ficheros
+        - S3DataSource: 's3://bucket/key'
+        """
+        raise NotImplementedError
+
+
+class ResultsSink(ABC):
+    """Interfaz de escritura de resultados (JSON e imágenes)."""
+
+    @abstractmethod
+    def write_json(self, path: str, data: Dict[str, Any]) -> None:
+        """Escribe un dict como JSON en 'path'."""
+        raise NotImplementedError
+
+    @abstractmethod
+    def write_figure(self, path: str, fig: Figure) -> None:
+        """Guarda una figura matplotlib en 'path'."""
+        raise NotImplementedError
--- a/backend/beyond_metrics/io/google_drive.py
+++ b/backend/beyond_metrics/io/google_drive.py
@@ -0,0 +1,160 @@
+# beyond_metrics/io/google_drive.py
+from __future__ import annotations
+
+import io
+import json
+import re
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional, Dict, Any
+
+import pandas as pd
+from google.oauth2 import service_account
+from googleapiclient.discovery import build
+from googleapiclient.http import MediaIoBaseDownload, MediaIoBaseUpload
+
+from .base import DataSource, ResultsSink
+
+
+GDRIVE_SCOPES = ["https://www.googleapis.com/auth/drive.readonly",
+                 "https://www.googleapis.com/auth/drive.file"]
+
+
+def _extract_file_id(file_id_or_url: str) -> str:
+    """
+    Acepta:
+      - un ID directo de Google Drive (ej: '1AbC...')
+      - una URL de Google Drive compartida
+
+    y devuelve siempre el file_id.
+    """
+    if "http://" not in file_id_or_url and "https://" not in file_id_or_url:
+        return file_id_or_url.strip()
+
+    patterns = [
+        r"/d/([a-zA-Z0-9_-]{10,})",        # https://drive.google.com/file/d/<ID>/view
+        r"id=([a-zA-Z0-9_-]{10,})",        # https://drive.google.com/open?id=<ID>
+    ]
+
+    for pattern in patterns:
+        m = re.search(pattern, file_id_or_url)
+        if m:
+            return m.group(1)
+
+    raise ValueError(f"No se pudo extraer un file_id de la URL de Google Drive: {file_id_or_url}")
+
+
+# -------- DataSource --------
+
+@dataclass
+class GoogleDriveConfig:
+    credentials_path: str            # ruta al JSON de service account
+    impersonate_user: Optional[str] = None
+
+
+class GoogleDriveDataSource(DataSource):
+    """
+    DataSource que lee CSVs desde Google Drive.
+    """
+
+    def __init__(self, config: GoogleDriveConfig) -> None:
+        self._config = config
+        self._service = self._build_service(readonly=True)
+
+    def _build_service(self, readonly: bool = True):
+        scopes = ["https://www.googleapis.com/auth/drive.readonly"] if readonly else GDRIVE_SCOPES
+        creds = service_account.Credentials.from_service_account_file(
+            self._config.credentials_path,
+            scopes=scopes,
+        )
+
+        if self._config.impersonate_user:
+            creds = creds.with_subject(self._config.impersonate_user)
+
+        service = build("drive", "v3", credentials=creds)
+        return service
+
+    def read_csv(self, path: str) -> pd.DataFrame:
+        file_id = _extract_file_id(path)
+
+        request = self._service.files().get_media(fileId=file_id)
+        fh = io.BytesIO()
+        downloader = MediaIoBaseDownload(fh, request)
+
+        done = False
+        while not done:
+            _, done = downloader.next_chunk()
+
+        fh.seek(0)
+        df = pd.read_csv(fh)
+        return df
+
+
+# -------- ResultsSink --------
+
+@dataclass
+class GoogleDriveSinkConfig:
+    credentials_path: str            # ruta al JSON de service account
+    base_folder_id: str              # ID de la carpeta de Drive donde escribir
+    impersonate_user: Optional[str] = None
+
+
+class GoogleDriveResultsSink(ResultsSink):
+    """
+    ResultsSink que sube JSONs e imágenes a una carpeta de Google Drive.
+
+    Nota: por simplicidad, usamos solo el nombre del fichero (basename de `path`).
+    Es decir, si le pasas 'data/output/123/results.json', en Drive se guardará
+    como 'results.json' dentro de base_folder_id.
+    """
+
+    def __init__(self, config: GoogleDriveSinkConfig) -> None:
+        self._config = config
+        self._service = self._build_service()
+
+    def _build_service(self):
+        creds = service_account.Credentials.from_service_account_file(
+            self._config.credentials_path,
+            scopes=GDRIVE_SCOPES,
+        )
+
+        if self._config.impersonate_user:
+            creds = creds.with_subject(self._config.impersonate_user)
+
+        service = build("drive", "v3", credentials=creds)
+        return service
+
+    def _upload_bytes(self, data: bytes, mime_type: str, target_path: str) -> str:
+        """
+        Sube un fichero en memoria a Drive y devuelve el file_id.
+        """
+        filename = Path(target_path).name
+
+        media = MediaIoBaseUpload(io.BytesIO(data), mimetype=mime_type, resumable=False)
+        file_metadata = {
+            "name": filename,
+            "parents": [self._config.base_folder_id],
+        }
+
+        created = self._service.files().create(
+            body=file_metadata,
+            media_body=media,
+            fields="id",
+        ).execute()
+
+        return created["id"]
+
+    def write_json(self, path: str, data: Dict[str, Any]) -> None:
+        payload = json.dumps(data, ensure_ascii=False, indent=2).encode("utf-8")
+        self._upload_bytes(payload, "application/json", path)
+
+    def write_figure(self, path: str, fig) -> None:
+        from matplotlib.figure import Figure
+
+        if not isinstance(fig, Figure):
+            raise TypeError("write_figure espera un matplotlib.figure.Figure")
+
+        buf = io.BytesIO()
+        fig.savefig(buf, format="png", bbox_inches="tight")
+        buf.seek(0)
+        self._upload_bytes(buf.read(), "image/png", path)
--- a/backend/beyond_metrics/io/local.py
+++ b/backend/beyond_metrics/io/local.py
@@ -0,0 +1,57 @@
+from __future__ import annotations
+
+import json
+import os
+from typing import Any, Dict
+
+import pandas as pd
+from matplotlib.figure import Figure
+
+from .base import DataSource, ResultsSink
+
+
+class LocalDataSource(DataSource):
+    """
+    DataSource que lee CSV desde el sistema de ficheros local.
+
+    - base_dir: se prefiere que todos los paths sean relativos a esta carpeta.
+    """
+
+    def __init__(self, base_dir: str = ".") -> None:
+        self.base_dir = base_dir
+
+    def _full_path(self, path: str) -> str:
+        if os.path.isabs(path):
+            return path
+        return os.path.join(self.base_dir, path)
+
+    def read_csv(self, path: str) -> pd.DataFrame:
+        full = self._full_path(path)
+        return pd.read_csv(full)
+    
+
+class LocalResultsSink(ResultsSink):
+    """
+    ResultsSink que escribe JSON e imágenes en el sistema de ficheros local.
+    """
+
+    def __init__(self, base_dir: str = ".") -> None:
+        self.base_dir = base_dir
+
+    def _full_path(self, path: str) -> str:
+        if os.path.isabs(path):
+            full = path
+        else:
+            full = os.path.join(self.base_dir, path)
+        # Crear carpetas si no existen
+        os.makedirs(os.path.dirname(full), exist_ok=True)
+        return full
+
+    def write_json(self, path: str, data: Dict[str, Any]) -> None:
+        full = self._full_path(path)
+        with open(full, "w", encoding="utf-8") as f:
+            json.dump(data, f, ensure_ascii=False, indent=2)
+
+    def write_figure(self, path: str, fig: Figure) -> None:
+        full = self._full_path(path)
+        fig.savefig(full, bbox_inches="tight")
--- a/backend/beyond_metrics/io/s3.py
+++ b/backend/beyond_metrics/io/s3.py
@@ -0,0 +1,62 @@
+from __future__ import annotations
+
+import io
+import json
+from typing import Any, Dict, Tuple
+
+import boto3
+import pandas as pd
+from matplotlib.figure import Figure
+
+from .base import DataSource, ResultsSink
+
+
+def _split_s3_path(path: str) -> Tuple[str, str]:
+    """
+    Convierte 's3://bucket/key' en (bucket, key).
+    """
+    if not path.startswith("s3://"):
+        raise ValueError(f"Ruta S3 inválida: {path}")
+
+    without_scheme = path[len("s3://") :]
+    parts = without_scheme.split("/", 1)
+    if len(parts) != 2:
+        raise ValueError(f"Ruta S3 inválida: {path}")
+    return parts[0], parts[1]
+
+
+class S3DataSource(DataSource):
+    """
+    DataSource que lee CSV desde S3 usando boto3.
+    """
+
+    def __init__(self, boto3_client: Any | None = None) -> None:
+        self.s3 = boto3_client or boto3.client("s3")
+
+    def read_csv(self, path: str) -> pd.DataFrame:
+        bucket, key = _split_s3_path(path)
+        obj = self.s3.get_object(Bucket=bucket, Key=key)
+        body = obj["Body"].read()
+        buffer = io.BytesIO(body)
+        return pd.read_csv(buffer)
+
+
+class S3ResultsSink(ResultsSink):
+    """
+    ResultsSink que escribe JSON e imágenes en S3.
+    """
+
+    def __init__(self, boto3_client: Any | None = None) -> None:
+        self.s3 = boto3_client or boto3.client("s3")
+
+    def write_json(self, path: str, data: Dict[str, Any]) -> None:
+        bucket, key = _split_s3_path(path)
+        body = json.dumps(data, ensure_ascii=False, indent=2).encode("utf-8")
+        self.s3.put_object(Bucket=bucket, Key=key, Body=body)
+
+    def write_figure(self, path: str, fig: Figure) -> None:
+        bucket, key = _split_s3_path(path)
+        buf = io.BytesIO()
+        fig.savefig(buf, format="png", bbox_inches="tight")
+        buf.seek(0)
+        self.s3.put_object(Bucket=bucket, Key=key, Body=buf.getvalue(), ContentType="image/png")
--- a/backend/beyond_metrics/pipeline.py
+++ b/backend/beyond_metrics/pipeline.py
@@ -0,0 +1,291 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import datetime
+from importlib import import_module
+from typing import Any, Dict, List, Mapping, Optional, cast, Callable
+import logging
+import os
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from matplotlib.axes import Axes
+from matplotlib.figure import Figure
+
+from .io import (
+    DataSource,
+    ResultsSink,
+)
+
+LOGGER = logging.getLogger(__name__)
+
+
+def setup_basic_logging(level: str = "INFO") -> None:
+    """
+    Configuración básica de logging, por si se necesita desde scripts.
+    """
+    logging.basicConfig(
+        level=getattr(logging, level.upper(), logging.INFO),
+        format="%(asctime)s %(levelname)s [%(name)s] %(message)s",
+    )
+
+
+def _import_class(path: str) -> type:
+    """
+    Import dinámico de una clase a partir de un string tipo:
+        "beyond_metrics.dimensions.VolumetriaMetrics"
+    """
+    LOGGER.debug("Importando clase %s", path)
+    module_name, class_name = path.rsplit(".", 1)
+    module = import_module(module_name)
+    cls = getattr(module, class_name)
+    return cls
+
+
+def _serialize_for_json(obj: Any) -> Any:
+    """
+    Convierte objetos típicos de numpy/pandas en tipos JSON-friendly.
+    """
+    if obj is None or isinstance(obj, (str, int, float, bool)):
+        return obj
+
+    if isinstance(obj, (np.integer, np.floating)):
+        return float(obj)
+
+    if isinstance(obj, pd.DataFrame):
+        return obj.to_dict(orient="records")
+    if isinstance(obj, pd.Series):
+        return obj.to_list()
+
+    if isinstance(obj, (list, tuple)):
+        return [_serialize_for_json(x) for x in obj]
+
+    if isinstance(obj, dict):
+        return {str(k): _serialize_for_json(v) for k, v in obj.items()}
+
+    return str(obj)
+
+
+PostRunCallback = Callable[[Dict[str, Any], str, ResultsSink], None]
+
+
+@dataclass
+class BeyondMetricsPipeline:
+    """
+    Pipeline principal de BeyondMetrics.
+
+    - Lee un CSV desde un DataSource (local, S3, Google Drive, etc.).
+    - Ejecuta dimensiones configuradas en un dict de configuración.
+    - Serializa resultados numéricos/tabulares a JSON.
+    - Guarda las imágenes de los métodos que comienzan por 'plot_'.
+    """
+
+    datasource: DataSource
+    sink: ResultsSink
+    dimensions_config: Mapping[str, Any]
+    dimension_params: Optional[Mapping[str, Mapping[str, Any]]] = None
+    post_run: Optional[List[PostRunCallback]] = None
+
+    def run(
+        self,
+        input_path: str,
+        run_dir: str,
+        *,
+        write_results_json: bool = True,
+    ) -> Dict[str, Any]:
+        
+        LOGGER.info("Inicio de ejecución de BeyondMetricsPipeline")
+        LOGGER.info("Leyendo CSV de entrada: %s", input_path)
+
+        # 1) Leer datos
+        df = self.datasource.read_csv(input_path)
+        LOGGER.info("CSV leído con %d filas y %d columnas", df.shape[0], df.shape[1])
+
+        # 2) Determinar carpeta/base de salida para esta ejecución
+        run_base = run_dir.rstrip("/")
+        LOGGER.info("Ruta base de esta ejecución: %s", run_base)
+
+        # 3) Ejecutar dimensiones
+        dimensions_cfg = self.dimensions_config
+        if not isinstance(dimensions_cfg, dict):
+            raise ValueError("El bloque 'dimensions' debe ser un dict.")
+
+        all_results: Dict[str, Any] = {}
+
+        for dim_name, dim_cfg in dimensions_cfg.items():
+            if not isinstance(dim_cfg, dict):
+                raise ValueError(f"Config inválida para dimensión '{dim_name}' (debe ser dict).")
+
+            if not dim_cfg.get("enabled", True):
+                LOGGER.info("Dimensión '%s' desactivada; se omite.", dim_name)
+                continue
+
+            class_path = dim_cfg.get("class")
+            if not class_path:
+                raise ValueError(f"Falta 'class' en la dimensión '{dim_name}'.")
+
+            metrics: List[str] = dim_cfg.get("metrics", [])
+            if not metrics:
+                LOGGER.info("Dimensión '%s' sin métricas configuradas; se omite.", dim_name)
+                continue
+
+            cls = _import_class(class_path)
+
+            extra_kwargs = {}
+            if self.dimension_params is not None:
+                extra_kwargs = self.dimension_params.get(dim_name, {}) or {}
+
+            # Las dimensiones reciben df en el constructor
+            instance = cls(df, **extra_kwargs)
+
+            dim_results: Dict[str, Any] = {}
+
+            for metric_name in metrics:
+                LOGGER.info("  - Ejecutando métrica '%s.%s'", dim_name, metric_name)
+                result = self._execute_metric(instance, metric_name, run_base, dim_name)
+                dim_results[metric_name] = result
+
+            all_results[dim_name] = dim_results
+
+        # 4) Guardar JSON de resultados (opcional)
+        if write_results_json:
+            results_json_path = f"{run_base}/results.json"
+            LOGGER.info("Guardando resultados en JSON: %s", results_json_path)
+            self.sink.write_json(results_json_path, all_results)
+
+        # 5) Ejecutar callbacks post-run (scorers, agentes, etc.)
+        if self.post_run:
+            LOGGER.info("Ejecutando %d callbacks post-run...", len(self.post_run))
+            for cb in self.post_run:
+                try:
+                    LOGGER.info("Ejecutando post-run callback: %s", cb)
+                    cb(all_results, run_base, self.sink)
+                except Exception:
+                    LOGGER.exception("Error ejecutando post-run callback %s", cb)
+
+        LOGGER.info("Ejecución completada correctamente.")
+        return all_results
+
+
+    def _execute_metric(
+        self,
+        instance: Any,
+        metric_name: str,
+        run_base: str,
+        dim_name: str,
+    ) -> Any:
+        """
+        Ejecuta una métrica:
+
+        - Si empieza por 'plot_' -> se asume que devuelve Axes:
+            - se guarda la figura como PNG
+            - se devuelve {"type": "image", "path": "..."}
+        - Si no, se serializa el valor a JSON.
+
+        Además, para métricas categóricas (por skill/canal) de la dimensión
+        'volumetry', devolvemos explícitamente etiquetas y valores para que
+        el frontend pueda saber a qué pertenece cada número.
+        """
+        method = getattr(instance, metric_name, None)
+        if method is None or not callable(method):
+            raise ValueError(
+                f"La métrica '{metric_name}' no existe en {type(instance).__name__}"
+            )
+
+        # Caso plots
+        if metric_name.startswith("plot_"):
+            ax = method()
+            if not isinstance(ax, Axes):
+                raise TypeError(
+                    f"La métrica '{metric_name}' de '{type(instance).__name__}' "
+                    f"debería devolver un matplotlib.axes.Axes"
+                )
+            fig = ax.get_figure()
+            if fig is None:
+                raise RuntimeError(
+                    "Axes.get_figure() devolvió None, lo cual no debería pasar."
+                )
+            fig = cast(Figure, fig)
+
+            filename = f"{dim_name}_{metric_name}.png"
+            img_path = f"{run_base}/{filename}"
+
+            LOGGER.debug("Guardando figura en %s", img_path)
+            self.sink.write_figure(img_path, fig)
+            plt.close(fig)
+
+            return {
+                "type": "image",
+                "path": img_path,
+            }
+
+        # Caso numérico/tabular
+        value = method()
+
+        # Caso especial: series categóricas de volumetría (por skill / canal)
+        # Devolvemos {"labels": [...], "values": [...]} para mantener la
+        # información de etiquetas en el JSON.
+        if (
+            dim_name == "volumetry"
+            and isinstance(value, pd.Series)
+            and metric_name
+            in {
+                "volume_by_channel",
+                "volume_by_skill",
+                "channel_distribution_pct",
+                "skill_distribution_pct",
+            }
+        ):
+            labels = [str(idx) for idx in value.index.tolist()]
+            # Aseguramos que todos los valores sean numéricos JSON-friendly
+            values = [float(v) for v in value.astype(float).tolist()]
+            return {
+                "labels": labels,
+                "values": values,
+            }
+
+        return _serialize_for_json(value)
+
+
+
+def load_dimensions_config(path: str) -> Dict[str, Any]:
+    """
+    Carga un JSON de configuración que contiene solo el bloque 'dimensions'.
+    """
+    import json
+    from pathlib import Path
+
+    with Path(path).open("r", encoding="utf-8") as f:
+        cfg = json.load(f)
+
+    dimensions = cfg.get("dimensions")
+    if dimensions is None:
+        raise ValueError("El fichero de configuración debe contener un bloque 'dimensions'.")
+
+    return dimensions
+
+
+def build_pipeline(
+    dimensions_config_path: str,
+    datasource: DataSource,
+    sink: ResultsSink,
+    dimension_params: Optional[Mapping[str, Mapping[str, Any]]] = None,
+    post_run: Optional[List[PostRunCallback]] = None,
+) -> BeyondMetricsPipeline:
+    """
+    Crea un BeyondMetricsPipeline a partir de:
+    - ruta al JSON con dimensiones/métricas
+    - un DataSource ya construido (local/S3/Drive)
+    - un ResultsSink ya construido (local/S3/Drive)
+    - una lista opcional de callbacks post_run que se ejecutan al final
+      (útil para scorers, agentes de IA, etc.)
+    """
+    dims_cfg = load_dimensions_config(dimensions_config_path)
+    return BeyondMetricsPipeline(
+        datasource=datasource,
+        sink=sink,
+        dimensions_config=dims_cfg,
+        dimension_params=dimension_params,
+        post_run=post_run,
+    )