Initial commit - ACME demo version

This commit is contained in:
sujucu70
2026-02-04 11:08:21 +01:00
commit 1bb0765766
180 changed files with 52249 additions and 0 deletions

13
backend/.dockerignore Normal file
View File

@@ -0,0 +1,13 @@
.venv
__pycache__
*.pyc
*.pyo
*.pyd
.git
.gitignore
test_results
dist
build
data/output
*.zip
.DS_Store

15
backend/.gitignore vendored Normal file
View File

@@ -0,0 +1,15 @@
__pycache__/
*.py[cod]
*.pyo
*.pyd
*.log
.env
.venv
venv/
env/
.idea/
.vscode/
.ipynb_checkpoints/
dist/
build/
*.egg-info/

31
backend/Dockerfile Normal file
View File

@@ -0,0 +1,31 @@
# backend/Dockerfile
FROM python:3.11-slim
# Evitar .pyc y buffering
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
WORKDIR /app
# Dependencias del sistema mínimas
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
&& rm -rf /var/lib/apt/lists/*
# Copiamos pyproject y lock si lo hubiera
COPY pyproject.toml ./
# Instalamos dependencias
RUN pip install --upgrade pip && \
pip install .
# Copiamos el resto del código (respetando .dockerignore)
COPY . .
# Variables de autenticación básica
ENV BASIC_AUTH_USERNAME=admin
ENV BASIC_AUTH_PASSWORD=admin
EXPOSE 8000
CMD ["python", "-m", "uvicorn", "beyond_api.main:app", "--host", "0.0.0.0", "--port", "8000"]

View File

@@ -0,0 +1,4 @@
# vacío o con un pequeño comentario
"""
Paquete de API para BeyondCX Heatmap.
"""

View File

@@ -0,0 +1,3 @@
from .analysis import router
__all__ = ["router"]

View File

@@ -0,0 +1,221 @@
from __future__ import annotations
import os
from pathlib import Path
import json
import math
from uuid import uuid4
from typing import Optional, Any, Literal
from fastapi import APIRouter, UploadFile, File, Form, HTTPException, Depends
from fastapi.responses import JSONResponse
from beyond_api.security import get_current_user
from beyond_api.services.analysis_service import run_analysis_collect_json
# Cache paths - same as in cache.py
CACHE_DIR = Path(os.getenv("CACHE_DIR", "/data/cache"))
CACHED_FILE = CACHE_DIR / "cached_data.csv"
router = APIRouter(
prefix="",
tags=["analysis"],
)
def sanitize_for_json(obj: Any) -> Any:
"""
Recorre un objeto (dict/list/escalares) y convierte:
- NaN, +inf, -inf -> None
para que sea JSON-compliant.
"""
if isinstance(obj, float):
if math.isnan(obj) or math.isinf(obj):
return None
return obj
if obj is None or isinstance(obj, (str, int, bool)):
return obj
if isinstance(obj, dict):
return {k: sanitize_for_json(v) for k, v in obj.items()}
if isinstance(obj, (list, tuple)):
return [sanitize_for_json(v) for v in obj]
return str(obj)
@router.post("/analysis")
async def analysis_endpoint(
csv_file: UploadFile = File(...),
economy_json: Optional[str] = Form(default=None),
analysis: Literal["basic", "premium"] = Form(default="premium"),
current_user: str = Depends(get_current_user),
):
"""
Ejecuta el pipeline sobre un CSV subido (multipart/form-data) y devuelve
ÚNICAMENTE un JSON con todos los resultados (incluyendo agentic_readiness).
Parámetro `analysis`:
- "basic": usa una configuración reducida (p.ej. configs/basic.json)
- "premium": usa la configuración completa por defecto
(p.ej. beyond_metrics_config.json), sin romper lo existente.
"""
# Validar `analysis` (por si llega algo raro)
if analysis not in {"basic", "premium"}:
raise HTTPException(
status_code=400,
detail="analysis debe ser 'basic' o 'premium'.",
)
# 1) Parseo de economía (si viene)
economy_data = None
if economy_json:
try:
economy_data = json.loads(economy_json)
except json.JSONDecodeError:
raise HTTPException(
status_code=400,
detail="economy_json no es un JSON válido.",
)
# 2) Guardar el CSV subido en una carpeta de trabajo
base_input_dir = Path("data/input")
base_input_dir.mkdir(parents=True, exist_ok=True)
original_name = csv_file.filename or f"input_{uuid4().hex}.csv"
safe_name = Path(original_name).name # evita rutas con ../
input_path = base_input_dir / safe_name
with input_path.open("wb") as f:
while True:
chunk = await csv_file.read(1024 * 1024) # 1 MB
if not chunk:
break
f.write(chunk)
try:
# 3) Ejecutar el análisis y obtener el JSON en memoria
results_json = run_analysis_collect_json(
input_path=input_path,
economy_data=economy_data,
analysis=analysis, # "basic" o "premium"
company_folder=None,
)
finally:
# 3b) Limpiar el CSV temporal
try:
input_path.unlink(missing_ok=True)
except Exception:
# No queremos romper la respuesta si falla el borrado
pass
# 4) Limpiar NaN/inf para que el JSON sea válido
safe_results = sanitize_for_json(results_json)
# 5) Devolver SOLO JSON
return JSONResponse(
content={
"user": current_user,
"results": safe_results,
}
)
def extract_date_range_from_csv(file_path: Path) -> dict:
"""Extrae el rango de fechas del CSV."""
import pandas as pd
try:
# Leer solo la columna de fecha para eficiencia
df = pd.read_csv(file_path, usecols=['datetime_start'], parse_dates=['datetime_start'])
if 'datetime_start' in df.columns and len(df) > 0:
min_date = df['datetime_start'].min()
max_date = df['datetime_start'].max()
return {
"min": min_date.strftime('%Y-%m-%d') if pd.notna(min_date) else None,
"max": max_date.strftime('%Y-%m-%d') if pd.notna(max_date) else None,
}
except Exception as e:
print(f"Error extracting date range: {e}")
return {"min": None, "max": None}
def count_unique_queues_from_csv(file_path: Path) -> int:
"""Cuenta las colas únicas en el CSV."""
import pandas as pd
try:
df = pd.read_csv(file_path, usecols=['queue_skill'])
if 'queue_skill' in df.columns:
return df['queue_skill'].nunique()
except Exception as e:
print(f"Error counting queues: {e}")
return 0
@router.post("/analysis/cached")
async def analysis_cached_endpoint(
economy_json: Optional[str] = Form(default=None),
analysis: Literal["basic", "premium"] = Form(default="premium"),
current_user: str = Depends(get_current_user),
):
"""
Ejecuta el pipeline sobre el archivo CSV cacheado en el servidor.
Útil para re-analizar sin tener que subir el archivo de nuevo.
"""
# Validar que existe el archivo cacheado
if not CACHED_FILE.exists():
raise HTTPException(
status_code=404,
detail="No hay archivo cacheado en el servidor. Sube un archivo primero.",
)
# Validar `analysis`
if analysis not in {"basic", "premium"}:
raise HTTPException(
status_code=400,
detail="analysis debe ser 'basic' o 'premium'.",
)
# Parseo de economía (si viene)
economy_data = None
if economy_json:
try:
economy_data = json.loads(economy_json)
except json.JSONDecodeError:
raise HTTPException(
status_code=400,
detail="economy_json no es un JSON válido.",
)
# Extraer metadatos del CSV
date_range = extract_date_range_from_csv(CACHED_FILE)
unique_queues = count_unique_queues_from_csv(CACHED_FILE)
try:
# Ejecutar el análisis sobre el archivo cacheado
results_json = run_analysis_collect_json(
input_path=CACHED_FILE,
economy_data=economy_data,
analysis=analysis,
company_folder=None,
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Error ejecutando análisis: {str(e)}",
)
# Limpiar NaN/inf para que el JSON sea válido
safe_results = sanitize_for_json(results_json)
return JSONResponse(
content={
"user": current_user,
"results": safe_results,
"source": "cached",
"dateRange": date_range,
"uniqueQueues": unique_queues,
}
)

View File

@@ -0,0 +1,26 @@
# beyond_api/api/auth.py
from __future__ import annotations
from fastapi import APIRouter, Depends
from fastapi.responses import JSONResponse
from beyond_api.security import get_current_user
router = APIRouter(
prefix="/auth",
tags=["auth"],
)
@router.get("/check")
def check_auth(current_user: str = Depends(get_current_user)):
"""
Endpoint muy simple: si las credenciales Basic son correctas,
devuelve 200 con el usuario. Si no, get_current_user lanza 401.
"""
return JSONResponse(
content={
"user": current_user,
"status": "ok",
}
)

View File

@@ -0,0 +1,288 @@
# beyond_api/api/cache.py
"""
Server-side cache for CSV files.
Stores the uploaded CSV file and metadata for later re-analysis.
"""
from __future__ import annotations
import json
import os
import shutil
import sys
from datetime import datetime
from pathlib import Path
from typing import Any, Optional
from fastapi import APIRouter, Depends, HTTPException, status, UploadFile, File, Form
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from beyond_api.security import get_current_user
router = APIRouter(
prefix="/cache",
tags=["cache"],
)
# Directory for cache files - use platform-appropriate default
def _get_default_cache_dir() -> Path:
"""Get a platform-appropriate default cache directory."""
env_cache_dir = os.getenv("CACHE_DIR")
if env_cache_dir:
return Path(env_cache_dir)
# On Windows, check if C:/data/cache exists (legacy location)
# Otherwise use a local .cache directory relative to the backend
# On Unix/Docker, use /data/cache
if sys.platform == "win32":
# Check legacy location first (for backwards compatibility)
legacy_cache = Path("C:/data/cache")
if legacy_cache.exists():
return legacy_cache
# Fallback to local .cache directory in the backend folder
backend_dir = Path(__file__).parent.parent.parent
return backend_dir / ".cache"
else:
return Path("/data/cache")
CACHE_DIR = _get_default_cache_dir()
CACHED_FILE = CACHE_DIR / "cached_data.csv"
METADATA_FILE = CACHE_DIR / "metadata.json"
DRILLDOWN_FILE = CACHE_DIR / "drilldown_data.json"
# Log cache directory on module load
import logging
logger = logging.getLogger(__name__)
logger.info(f"[Cache] Using cache directory: {CACHE_DIR}")
logger.info(f"[Cache] Drilldown file path: {DRILLDOWN_FILE}")
class CacheMetadata(BaseModel):
fileName: str
fileSize: int
recordCount: int
cachedAt: str
costPerHour: float
def ensure_cache_dir():
"""Create cache directory if it doesn't exist."""
CACHE_DIR.mkdir(parents=True, exist_ok=True)
def count_csv_records(file_path: Path) -> int:
"""Count records in CSV file (excluding header)."""
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
# Count lines minus header
return sum(1 for _ in f) - 1
except Exception:
return 0
@router.get("/check")
def check_cache(current_user: str = Depends(get_current_user)):
"""
Check if there's cached data available.
Returns metadata if cache exists, null otherwise.
"""
if not METADATA_FILE.exists() or not CACHED_FILE.exists():
return JSONResponse(content={"exists": False, "metadata": None})
try:
with open(METADATA_FILE, "r") as f:
metadata = json.load(f)
return JSONResponse(content={"exists": True, "metadata": metadata})
except Exception as e:
return JSONResponse(content={"exists": False, "metadata": None, "error": str(e)})
@router.get("/file")
def get_cached_file_path(current_user: str = Depends(get_current_user)):
"""
Returns the path to the cached CSV file for internal use.
"""
if not CACHED_FILE.exists():
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No cached file found"
)
return JSONResponse(content={"path": str(CACHED_FILE)})
@router.get("/download")
def download_cached_file(current_user: str = Depends(get_current_user)):
"""
Download the cached CSV file for frontend parsing.
Returns the file as a streaming response.
"""
from fastapi.responses import FileResponse
if not CACHED_FILE.exists():
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No cached file found"
)
return FileResponse(
path=CACHED_FILE,
media_type="text/csv",
filename="cached_data.csv"
)
@router.post("/file")
async def save_cached_file(
csv_file: UploadFile = File(...),
fileName: str = Form(...),
fileSize: int = Form(...),
costPerHour: float = Form(...),
current_user: str = Depends(get_current_user)
):
"""
Save uploaded CSV file to server cache.
"""
ensure_cache_dir()
try:
# Save the CSV file
with open(CACHED_FILE, "wb") as f:
while True:
chunk = await csv_file.read(1024 * 1024) # 1 MB chunks
if not chunk:
break
f.write(chunk)
# Count records
record_count = count_csv_records(CACHED_FILE)
# Save metadata
metadata = {
"fileName": fileName,
"fileSize": fileSize,
"recordCount": record_count,
"cachedAt": datetime.now().isoformat(),
"costPerHour": costPerHour,
}
with open(METADATA_FILE, "w") as f:
json.dump(metadata, f)
return JSONResponse(content={
"success": True,
"message": f"Cached file with {record_count} records",
"metadata": metadata
})
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error saving cache: {str(e)}"
)
@router.get("/drilldown")
def get_cached_drilldown(current_user: str = Depends(get_current_user)):
"""
Get the cached drilldownData JSON.
Returns the pre-calculated drilldown data for fast cache usage.
"""
logger.info(f"[Cache] GET /drilldown - checking file: {DRILLDOWN_FILE}")
logger.info(f"[Cache] File exists: {DRILLDOWN_FILE.exists()}")
if not DRILLDOWN_FILE.exists():
logger.warning(f"[Cache] Drilldown file not found at: {DRILLDOWN_FILE}")
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No cached drilldown data found"
)
try:
with open(DRILLDOWN_FILE, "r", encoding="utf-8") as f:
drilldown_data = json.load(f)
logger.info(f"[Cache] Loaded drilldown with {len(drilldown_data)} skills")
return JSONResponse(content={"success": True, "drilldownData": drilldown_data})
except Exception as e:
logger.error(f"[Cache] Error reading drilldown: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error reading drilldown data: {str(e)}"
)
@router.post("/drilldown")
async def save_cached_drilldown(
drilldown_json: str = Form(...),
current_user: str = Depends(get_current_user)
):
"""
Save drilldownData JSON to server cache.
Called by frontend after calculating drilldown from uploaded file.
Receives JSON as form field.
"""
logger.info(f"[Cache] POST /drilldown - saving to: {DRILLDOWN_FILE}")
logger.info(f"[Cache] Cache directory: {CACHE_DIR}")
ensure_cache_dir()
logger.info(f"[Cache] Cache dir exists after ensure: {CACHE_DIR.exists()}")
try:
# Parse and validate JSON
drilldown_data = json.loads(drilldown_json)
logger.info(f"[Cache] Parsed drilldown JSON with {len(drilldown_data)} skills")
# Save to file
with open(DRILLDOWN_FILE, "w", encoding="utf-8") as f:
json.dump(drilldown_data, f)
logger.info(f"[Cache] Drilldown saved successfully, file exists: {DRILLDOWN_FILE.exists()}")
return JSONResponse(content={
"success": True,
"message": f"Cached drilldown data with {len(drilldown_data)} skills"
})
except json.JSONDecodeError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid JSON: {str(e)}"
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error saving drilldown data: {str(e)}"
)
@router.delete("/file")
def clear_cache(current_user: str = Depends(get_current_user)):
"""
Clear the server-side cache (CSV, metadata, and drilldown data).
"""
try:
if CACHED_FILE.exists():
CACHED_FILE.unlink()
if METADATA_FILE.exists():
METADATA_FILE.unlink()
if DRILLDOWN_FILE.exists():
DRILLDOWN_FILE.unlink()
return JSONResponse(content={"success": True, "message": "Cache cleared"})
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error clearing cache: {str(e)}"
)
# Keep old endpoints for backwards compatibility but mark as deprecated
@router.get("/interactions")
def get_cached_interactions_deprecated(current_user: str = Depends(get_current_user)):
"""DEPRECATED: Use /cache/file instead."""
raise HTTPException(
status_code=status.HTTP_410_GONE,
detail="This endpoint is deprecated. Use /cache/file with re-analysis instead."
)
@router.post("/interactions")
def save_cached_interactions_deprecated(current_user: str = Depends(get_current_user)):
"""DEPRECATED: Use /cache/file instead."""
raise HTTPException(
status_code=status.HTTP_410_GONE,
detail="This endpoint is deprecated. Use /cache/file instead."
)

View File

@@ -0,0 +1,37 @@
import logging
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
# importa tus routers
from beyond_api.api.analysis import router as analysis_router
from beyond_api.api.auth import router as auth_router
from beyond_api.api.cache import router as cache_router
def setup_basic_logging() -> None:
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s [%(name)s] %(message)s",
)
setup_basic_logging()
app = FastAPI()
origins = [
"http://localhost:3000",
"http://localhost:3001",
"http://127.0.0.1:3000",
"http://127.0.0.1:3001",
]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.include_router(analysis_router)
app.include_router(auth_router)
app.include_router(cache_router)

View File

@@ -0,0 +1,37 @@
from __future__ import annotations
import os
import secrets
from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPBasic, HTTPBasicCredentials
# auto_error=False para que no dispare el popup nativo del navegador automáticamente
security = HTTPBasic(auto_error=False)
# En producción: export BASIC_AUTH_USERNAME y BASIC_AUTH_PASSWORD.
BASIC_USER = os.getenv("BASIC_AUTH_USERNAME", "beyond")
BASIC_PASS = os.getenv("BASIC_AUTH_PASSWORD", "beyond2026")
def get_current_user(credentials: HTTPBasicCredentials | None = Depends(security)) -> str:
"""
Valida el usuario/contraseña vía HTTP Basic.
NO envía WWW-Authenticate para evitar el popup nativo del navegador
(el frontend tiene su propio formulario de login).
"""
if credentials is None:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Credenciales requeridas",
)
correct_username = secrets.compare_digest(credentials.username, BASIC_USER)
correct_password = secrets.compare_digest(credentials.password, BASIC_PASS)
if not (correct_username and correct_password):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Credenciales incorrectas",
)
return credentials.username

View File

View File

@@ -0,0 +1,262 @@
from __future__ import annotations
from pathlib import Path
from uuid import uuid4
from datetime import datetime
from typing import Optional, Literal
import json
import zipfile
from beyond_metrics.io import LocalDataSource, LocalResultsSink, ResultsSink
from beyond_metrics.pipeline import build_pipeline
from beyond_metrics.dimensions.EconomyCost import EconomyConfig
from beyond_flows.scorers import AgenticScorer
from typing import Any, Mapping, Optional, Dict
def _build_economy_config(economy_data: Optional[Mapping[str, Any]]) -> EconomyConfig:
"""
Construye EconomyConfig validando tipos y evitando que el type checker
mezcle floats y dicts en un solo diccionario.
"""
# Valores por defecto
default_customer_segments: Dict[str, str] = {
"VIP": "high",
"Premium": "high",
"Soporte_General": "medium",
"Ventas": "medium",
"Basico": "low",
}
if economy_data is None:
return EconomyConfig(
labor_cost_per_hour=20.0,
overhead_rate=0.10,
tech_costs_annual=5000.0,
automation_cpi=0.20,
automation_volume_share=0.5,
automation_success_rate=0.6,
customer_segments=default_customer_segments,
)
def _get_float(field: str, default: float) -> float:
value = economy_data.get(field, default)
if isinstance(value, (int, float)):
return float(value)
raise ValueError(f"El campo '{field}' debe ser numérico (float). Valor recibido: {value!r}")
# Campos escalares
labor_cost_per_hour = _get_float("labor_cost_per_hour", 20.0)
overhead_rate = _get_float("overhead_rate", 0.10)
tech_costs_annual = _get_float("tech_costs_annual", 5000.0)
automation_cpi = _get_float("automation_cpi", 0.20)
automation_volume_share = _get_float("automation_volume_share", 0.5)
automation_success_rate = _get_float("automation_success_rate", 0.6)
# customer_segments puede venir o no; si viene, validarlo
customer_segments: Dict[str, str] = dict(default_customer_segments)
if "customer_segments" in economy_data and economy_data["customer_segments"] is not None:
cs = economy_data["customer_segments"]
if not isinstance(cs, Mapping):
raise ValueError("customer_segments debe ser un diccionario {segment: level}")
for k, v in cs.items():
if not isinstance(v, str):
raise ValueError(
f"El valor de customer_segments['{k}'] debe ser str. Valor recibido: {v!r}"
)
customer_segments[str(k)] = v
return EconomyConfig(
labor_cost_per_hour=labor_cost_per_hour,
overhead_rate=overhead_rate,
tech_costs_annual=tech_costs_annual,
automation_cpi=automation_cpi,
automation_volume_share=automation_volume_share,
automation_success_rate=automation_success_rate,
customer_segments=customer_segments,
)
def run_analysis(
input_path: Path,
economy_data: Optional[dict] = None,
return_type: Literal["path", "zip"] = "path",
company_folder: Optional[str] = None,
) -> tuple[Path, Optional[Path]]:
"""
Ejecuta el pipeline sobre un CSV y devuelve:
- (results_dir, None) si return_type == "path"
- (results_dir, zip_path) si return_type == "zip"
input_path puede ser absoluto o relativo, pero los resultados
se escribirán SIEMPRE en la carpeta del CSV, dentro de una
subcarpeta con nombre = timestamp (y opcionalmente prefijada
por company_folder).
"""
input_path = input_path.resolve()
if not input_path.exists():
raise FileNotFoundError(f"El CSV no existe: {input_path}")
if not input_path.is_file():
raise ValueError(f"La ruta no apunta a un fichero CSV: {input_path}")
# Carpeta donde está el CSV
csv_dir = input_path.parent
# DataSource y ResultsSink apuntan a ESA carpeta
datasource = LocalDataSource(base_dir=str(csv_dir))
sink = LocalResultsSink(base_dir=str(csv_dir))
# Config de economía
economy_cfg = _build_economy_config(economy_data)
dimension_params: Dict[str, Mapping[str, Any]] = {
"economy_costs": {
"config": economy_cfg,
}
}
# Callback de scoring
def agentic_post_run(results: Dict[str, Any], run_base: str, sink_: ResultsSink) -> None:
scorer = AgenticScorer()
try:
agentic = scorer.compute_and_return(results)
except Exception as e:
# No rompemos toda la ejecución si el scorer falla
agentic = {
"error": f"{type(e).__name__}: {e}",
}
sink_.write_json(f"{run_base}/agentic_readiness.json", agentic)
pipeline = build_pipeline(
dimensions_config_path="beyond_metrics/configs/beyond_metrics_config.json",
datasource=datasource,
sink=sink,
dimension_params=dimension_params,
post_run=[agentic_post_run],
)
# Timestamp de ejecución (nombre de la carpeta de resultados)
timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
# Ruta lógica de resultados (RELATIVA al base_dir del sink)
if company_folder:
# Ej: "Cliente_X/20251208-153045"
run_dir_rel = f"{company_folder.rstrip('/')}/{timestamp}"
else:
# Ej: "20251208-153045"
run_dir_rel = timestamp
# Ejecutar pipeline: el CSV se pasa relativo a csv_dir
pipeline.run(
input_path=input_path.name,
run_dir=run_dir_rel,
)
# Carpeta real con los resultados
results_dir = csv_dir / run_dir_rel
if return_type == "path":
return results_dir, None
# --- ZIP de resultados -------------------------------------------------
# Creamos el ZIP en la MISMA carpeta del CSV, con nombre basado en run_dir
zip_name = f"{run_dir_rel.replace('/', '_')}.zip"
zip_path = csv_dir / zip_name
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
for file in results_dir.rglob("*"):
if file.is_file():
# Lo guardamos relativo a la carpeta de resultados
arcname = file.relative_to(results_dir.parent)
zipf.write(file, arcname)
return results_dir, zip_path
from typing import Any, Mapping, Dict # asegúrate de tener estos imports arriba
def run_analysis_collect_json(
input_path: Path,
economy_data: Optional[dict] = None,
analysis: Literal["basic", "premium"] = "premium",
company_folder: Optional[str] = None,
) -> Dict[str, Any]:
"""
Ejecuta el pipeline y devuelve un único JSON con todos los resultados.
A diferencia de run_analysis:
- NO escribe results.json
- NO escribe agentic_readiness.json
- agentic_readiness se incrusta en el dict de resultados
El parámetro `analysis` permite elegir el nivel de análisis:
- "basic" -> beyond_metrics/configs/basic.json
- "premium" -> beyond_metrics/configs/beyond_metrics_config.json
"""
# Normalizamos y validamos la ruta del CSV
input_path = input_path.resolve()
if not input_path.exists():
raise FileNotFoundError(f"El CSV no existe: {input_path}")
if not input_path.is_file():
raise ValueError(f"La ruta no apunta a un fichero CSV: {input_path}")
# Carpeta donde está el CSV
csv_dir = input_path.parent
# DataSource y ResultsSink apuntan a ESA carpeta
datasource = LocalDataSource(base_dir=str(csv_dir))
sink = LocalResultsSink(base_dir=str(csv_dir))
# Config de economía
economy_cfg = _build_economy_config(economy_data)
dimension_params: Dict[str, Mapping[str, Any]] = {
"economy_costs": {
"config": economy_cfg,
}
}
# Elegimos el fichero de configuración de dimensiones según `analysis`
if analysis == "basic":
dimensions_config_path = "beyond_metrics/configs/basic.json"
else:
dimensions_config_path = "beyond_metrics/configs/beyond_metrics_config.json"
# Callback post-run: añadir agentic_readiness al JSON final (sin escribir ficheros)
def agentic_post_run(results: Dict[str, Any], run_base: str, sink_: ResultsSink) -> None:
scorer = AgenticScorer()
try:
agentic = scorer.compute_and_return(results)
except Exception as e:
agentic = {"error": f"{type(e).__name__}: {e}"}
results["agentic_readiness"] = agentic
pipeline = build_pipeline(
dimensions_config_path=dimensions_config_path,
datasource=datasource,
sink=sink,
dimension_params=dimension_params,
post_run=[agentic_post_run],
)
# Timestamp de ejecución (para separar posibles artefactos como plots)
timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
if company_folder:
run_dir_rel = f"{company_folder.rstrip('/')}/{timestamp}"
else:
run_dir_rel = timestamp
# Ejecutar pipeline sin escribir results.json
results = pipeline.run(
input_path=input_path.name,
run_dir=run_dir_rel,
write_results_json=False,
)
return results

View File

View File

View File

View File

@@ -0,0 +1,43 @@
from __future__ import annotations
from typing import Any, Dict
from beyond_metrics.io import LocalDataSource, LocalResultsSink, ResultsSink
from beyond_metrics.pipeline import build_pipeline
from beyond_flows.scorers import AgenticScorer
def agentic_post_run(results: Dict[str, Any], run_base: str, sink: ResultsSink) -> None:
"""
Callback post-run que calcula el Agentic Readiness y lo añade al diccionario final
como la clave "agentic_readiness".
"""
scorer = AgenticScorer()
agentic = scorer.compute_and_return(results)
# Enriquecemos el JSON final (sin escribir un segundo fichero)
results["agentic_readiness"] = agentic
def run_pipeline_with_agentic(
input_csv,
base_results_dir,
dimensions_config_path="beyond_metrics/configs/beyond_metrics_config.json",
):
datasource = LocalDataSource(base_dir=".")
sink = LocalResultsSink(base_dir=".")
pipeline = build_pipeline(
dimensions_config_path=dimensions_config_path,
datasource=datasource,
sink=sink,
post_run=[agentic_post_run],
)
results = pipeline.run(
input_path=input_csv,
run_dir=base_results_dir,
)
return results

View File

@@ -0,0 +1,3 @@
from .agentic_score import AgenticScorer
__all__ = ["AgenticScorer"]

View File

@@ -0,0 +1,760 @@
"""
agentic_score.py
Calcula el Agentic Readiness Score de un contact center a partir
de un JSON con KPIs agregados (misma estructura que results.json).
Diseñado como clase para integrarse fácilmente en pipelines.
Características:
- Tolerante a datos faltantes: si una dimensión no se puede calcular
(porque faltan KPIs), se marca como `computed = False` y no se
incluye en el cálculo del score global.
- La llamada típica en un pipeline será:
from agentic_score import AgenticScorer
scorer = AgenticScorer()
result = scorer.run_on_folder("/ruta/a/carpeta")
Esa carpeta debe contener un `results.json` de entrada.
El módulo generará un `agentic_readiness.json` en la misma carpeta.
"""
from __future__ import annotations
import json
import math
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence, Union
Number = Union[int, float]
# =========================
# Helpers
# =========================
def _is_nan(x: Any) -> bool:
"""Devuelve True si x es NaN, None o el string 'NaN'."""
try:
if x is None:
return True
if isinstance(x, str) and x.lower() == "nan":
return True
return math.isnan(float(x))
except (TypeError, ValueError):
return False
def _safe_mean(values: Sequence[Optional[Number]]) -> Optional[float]:
nums: List[float] = []
for v in values:
if v is None:
continue
if _is_nan(v):
continue
nums.append(float(v))
if not nums:
return None
return sum(nums) / len(nums)
def _get_nested(d: Dict[str, Any], *keys: str, default: Any = None) -> Any:
"""Acceso seguro a diccionarios anidados."""
cur: Any = d
for k in keys:
if not isinstance(cur, dict) or k not in cur:
return default
cur = cur[k]
return cur
def _clamp(value: float, lo: float = 0.0, hi: float = 10.0) -> float:
return max(lo, min(hi, value))
def _normalize_numeric_sequence(field: Any) -> Optional[List[Number]]:
"""
Normaliza un campo que representa una secuencia numérica.
Soporta:
- Formato antiguo del pipeline: [10, 20, 30]
- Formato nuevo del pipeline: {"labels": [...], "values": [10, 20, 30]}
Devuelve:
- lista de números, si hay datos numéricos válidos
- None, si el campo no tiene una secuencia numérica interpretable
"""
if field is None:
return None
# Formato nuevo: {"labels": [...], "values": [...]}
if isinstance(field, dict) and "values" in field:
seq = field.get("values")
else:
seq = field
if not isinstance(seq, Sequence):
return None
out: List[Number] = []
for v in seq:
if isinstance(v, (int, float)):
out.append(v)
else:
# Intentamos conversión suave por si viene como string numérico
try:
out.append(float(v))
except (TypeError, ValueError):
continue
return out or None
# =========================
# Scoring functions
# =========================
def score_repetitividad(volume_by_skill: Optional[List[Number]]) -> Dict[str, Any]:
"""
Repetitividad basada en volumen medio por skill.
Regla (pensada por proceso/skill):
- 10 si volumen > 80
- 5 si 4080
- 0 si < 40
Si no hay datos (lista vacía o no numérica), la dimensión
se marca como no calculada (computed = False).
"""
if not volume_by_skill:
return {
"score": None,
"computed": False,
"reason": "sin_datos_volumen",
"details": {
"avg_volume_per_skill": None,
"volume_by_skill": volume_by_skill,
},
}
avg_volume = _safe_mean(volume_by_skill)
if avg_volume is None:
return {
"score": None,
"computed": False,
"reason": "volumen_no_numerico",
"details": {
"avg_volume_per_skill": None,
"volume_by_skill": volume_by_skill,
},
}
if avg_volume > 80:
score = 10.0
reason = "alto_volumen"
elif avg_volume >= 40:
score = 5.0
reason = "volumen_medio"
else:
score = 0.0
reason = "volumen_bajo"
return {
"score": score,
"computed": True,
"reason": reason,
"details": {
"avg_volume_per_skill": avg_volume,
"volume_by_skill": volume_by_skill,
"thresholds": {
"high": 80,
"medium": 40,
},
},
}
def score_predictibilidad(aht_ratio: Any,
escalation_rate: Any) -> Dict[str, Any]:
"""
Predictibilidad basada en:
- Variabilidad AHT: ratio P90/P50
- Tasa de escalación (%)
Regla:
- 10 si ratio < 1.5 y escalación < 10%
- 5 si ratio 1.52.0 o escalación 1020%
- 0 si ratio > 2.0 y escalación > 20%
- 3 fallback si datos parciales
Si no hay ni ratio ni escalación, la dimensión no se calcula.
"""
if aht_ratio is None and escalation_rate is None:
return {
"score": None,
"computed": False,
"reason": "sin_datos",
"details": {
"aht_p90_p50_ratio": None,
"escalation_rate_pct": None,
},
}
# Normalizamos ratio
if aht_ratio is None or _is_nan(aht_ratio):
ratio: Optional[float] = None
else:
ratio = float(aht_ratio)
# Normalizamos escalación
if escalation_rate is None or _is_nan(escalation_rate):
esc: Optional[float] = None
else:
esc = float(escalation_rate)
if ratio is None and esc is None:
return {
"score": None,
"computed": False,
"reason": "sin_datos",
"details": {
"aht_p90_p50_ratio": None,
"escalation_rate_pct": None,
},
}
score: float
reason: str
if ratio is not None and esc is not None:
if ratio < 1.5 and esc < 10.0:
score = 10.0
reason = "alta_predictibilidad"
elif (1.5 <= ratio <= 2.0) or (10.0 <= esc <= 20.0):
score = 5.0
reason = "predictibilidad_media"
elif ratio > 2.0 and esc > 20.0:
score = 0.0
reason = "baja_predictibilidad"
else:
score = 3.0
reason = "caso_intermedio"
else:
# Datos parciales: penalizamos pero no ponemos a 0
score = 3.0
reason = "datos_parciales"
return {
"score": score,
"computed": True,
"reason": reason,
"details": {
"aht_p90_p50_ratio": ratio,
"escalation_rate_pct": esc,
"rules": {
"high": {"max_ratio": 1.5, "max_esc_pct": 10},
"medium": {"ratio_range": [1.5, 2.0], "esc_range_pct": [10, 20]},
"low": {"min_ratio": 2.0, "min_esc_pct": 20},
},
},
}
def score_estructuracion(channel_distribution_pct: Any) -> Dict[str, Any]:
"""
Estructuración de datos usando proxy de canal.
Asumimos que el canal con mayor % es texto (en proyectos reales se puede
parametrizar esta asignación).
Regla:
- 10 si texto > 60%
- 5 si 3060%
- 0 si < 30%
Si no hay datos de canales, la dimensión no se calcula.
"""
if not channel_distribution_pct:
return {
"score": None,
"computed": False,
"reason": "sin_datos_canal",
"details": {
"estimated_text_share_pct": None,
"channel_distribution_pct": channel_distribution_pct,
},
}
try:
values: List[float] = []
for x in channel_distribution_pct:
if _is_nan(x):
continue
values.append(float(x))
if not values:
raise ValueError("sin valores numéricos")
max_share = max(values)
except Exception:
return {
"score": None,
"computed": False,
"reason": "canales_no_numericos",
"details": {
"estimated_text_share_pct": None,
"channel_distribution_pct": channel_distribution_pct,
},
}
if max_share > 60.0:
score = 10.0
reason = "alta_proporcion_texto"
elif max_share >= 30.0:
score = 5.0
reason = "proporcion_texto_media"
else:
score = 0.0
reason = "baja_proporcion_texto"
return {
"score": score,
"computed": True,
"reason": reason,
"details": {
"estimated_text_share_pct": max_share,
"channel_distribution_pct": channel_distribution_pct,
"thresholds_pct": {
"high": 60,
"medium": 30,
},
},
}
def score_complejidad(aht_ratio: Any,
escalation_rate: Any) -> Dict[str, Any]:
"""
Complejidad inversa del proceso (010).
1) Base: inversa lineal de la variabilidad AHT (ratio P90/P50):
- ratio = 1.0 -> 10
- ratio = 1.5 -> ~7.5
- ratio = 2.0 -> 5
- ratio = 2.5 -> 2.5
- ratio >= 3.0 -> 0
formula_base = (3 - ratio) / (3 - 1) * 10, acotado a [0,10]
2) Ajuste por escalación:
- restamos (escalation_rate / 5) puntos.
Nota: más score = proceso más "simple / automatizable".
Si no hay ni ratio ni escalación, la dimensión no se calcula.
"""
if aht_ratio is None or _is_nan(aht_ratio):
ratio: Optional[float] = None
else:
ratio = float(aht_ratio)
if escalation_rate is None or _is_nan(escalation_rate):
esc: Optional[float] = None
else:
esc = float(escalation_rate)
if ratio is None and esc is None:
return {
"score": None,
"computed": False,
"reason": "sin_datos",
"details": {
"aht_p90_p50_ratio": None,
"escalation_rate_pct": None,
},
}
# Base por variabilidad
if ratio is None:
base = 5.0 # fallback neutro
base_reason = "sin_ratio_usamos_valor_neutro"
else:
base_raw = (3.0 - ratio) / (3.0 - 1.0) * 10.0
base = _clamp(base_raw)
base_reason = "calculado_desde_ratio"
# Ajuste por escalación
if esc is None:
adj = 0.0
adj_reason = "sin_escalacion_sin_ajuste"
else:
adj = - (esc / 5.0) # cada 5 puntos de escalación resta 1
adj_reason = "ajuste_por_escalacion"
final_score = _clamp(base + adj)
return {
"score": final_score,
"computed": True,
"reason": "complejidad_inversa",
"details": {
"aht_p90_p50_ratio": ratio,
"escalation_rate_pct": esc,
"base_score": base,
"base_reason": base_reason,
"adjustment": adj,
"adjustment_reason": adj_reason,
},
}
def score_estabilidad(peak_offpeak_ratio: Any) -> Dict[str, Any]:
"""
Estabilidad del proceso basada en relación pico/off-peak.
Regla:
- 10 si ratio < 3
- 7 si 35
- 3 si 57
- 0 si > 7
Si no hay dato de ratio, la dimensión no se calcula.
"""
if peak_offpeak_ratio is None or _is_nan(peak_offpeak_ratio):
return {
"score": None,
"computed": False,
"reason": "sin_datos_peak_offpeak",
"details": {
"peak_offpeak_ratio": None,
},
}
r = float(peak_offpeak_ratio)
if r < 3.0:
score = 10.0
reason = "muy_estable"
elif r < 5.0:
score = 7.0
reason = "estable_moderado"
elif r < 7.0:
score = 3.0
reason = "pico_pronunciado"
else:
score = 0.0
reason = "muy_inestable"
return {
"score": score,
"computed": True,
"reason": reason,
"details": {
"peak_offpeak_ratio": r,
"thresholds": {
"very_stable": 3.0,
"stable": 5.0,
"unstable": 7.0,
},
},
}
def score_roi(annual_savings: Any) -> Dict[str, Any]:
"""
ROI potencial anual.
Regla:
- 10 si ahorro > 100k €/año
- 5 si 10k100k €/año
- 0 si < 10k €/año
Si no hay dato de ahorro, la dimensión no se calcula.
"""
if annual_savings is None or _is_nan(annual_savings):
return {
"score": None,
"computed": False,
"reason": "sin_datos_ahorro",
"details": {
"annual_savings_eur": None,
},
}
savings = float(annual_savings)
if savings > 100_000:
score = 10.0
reason = "roi_alto"
elif savings >= 10_000:
score = 5.0
reason = "roi_medio"
else:
score = 0.0
reason = "roi_bajo"
return {
"score": score,
"computed": True,
"reason": reason,
"details": {
"annual_savings_eur": savings,
"thresholds_eur": {
"high": 100_000,
"medium": 10_000,
},
},
}
def classify_agentic_score(score: Optional[float]) -> Dict[str, Any]:
"""
Clasificación final (alineada con frontend):
- ≥6: COPILOT 🤖 (Listo para Copilot)
- 45.99: OPTIMIZE 🔧 (Optimizar Primero)
- <4: HUMAN 👤 (Requiere Gestión Humana)
Si score es None (ninguna dimensión disponible), devuelve NO_DATA.
"""
if score is None:
return {
"label": "NO_DATA",
"emoji": "",
"description": (
"No se ha podido calcular el Agentic Readiness Score porque "
"ninguna de las dimensiones tenía datos suficientes."
),
}
if score >= 6.0:
label = "COPILOT"
emoji = "🤖"
description = (
"Listo para Copilot. Procesos con predictibilidad y simplicidad "
"suficientes para asistencia IA (sugerencias en tiempo real, autocompletado)."
)
elif score >= 4.0:
label = "OPTIMIZE"
emoji = "🔧"
description = (
"Optimizar primero. Estandarizar procesos y reducir variabilidad "
"antes de implementar asistencia IA."
)
else:
label = "HUMAN"
emoji = "👤"
description = (
"Requiere gestión humana. Procesos complejos o variables que "
"necesitan intervención humana antes de considerar automatización."
)
return {
"label": label,
"emoji": emoji,
"description": description,
}
# =========================
# Clase principal
# =========================
class AgenticScorer:
"""
Clase para calcular el Agentic Readiness Score a partir de resultados
agregados (results.json) y dejar la salida en agentic_readiness.json
en la misma carpeta.
"""
def __init__(
self,
input_filename: str = "results.json",
output_filename: str = "agentic_readiness.json",
) -> None:
self.input_filename = input_filename
self.output_filename = output_filename
self.base_weights: Dict[str, float] = {
"repetitividad": 0.25,
"predictibilidad": 0.20,
"estructuracion": 0.15,
"complejidad": 0.15,
"estabilidad": 0.10,
"roi": 0.15,
}
# --------- IO helpers ---------
def load_results(self, folder_path: Union[str, Path]) -> Dict[str, Any]:
folder = Path(folder_path)
input_path = folder / self.input_filename
if not input_path.exists():
raise FileNotFoundError(
f"No se ha encontrado el archivo de entrada '{self.input_filename}' "
f"en la carpeta: {folder}"
)
with input_path.open("r", encoding="utf-8") as f:
return json.load(f)
def save_agentic_readiness(self, folder_path: Union[str, Path], result: Dict[str, Any]) -> Path:
folder = Path(folder_path)
output_path = folder / self.output_filename
with output_path.open("w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
return output_path
# --------- Core computation ---------
def compute_from_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
"""
Calcula el Agentic Readiness Score a partir de un dict de datos.
Tolerante a datos faltantes: renormaliza pesos usando solo
dimensiones con `computed = True`.
Compatibilidad con pipeline:
- Soporta tanto el formato antiguo:
"volume_by_skill": [10, 20, 30]
- como el nuevo:
"volume_by_skill": {"labels": [...], "values": [10, 20, 30]}
"""
volumetry = data.get("volumetry", {})
op = data.get("operational_performance", {})
econ = data.get("economy_costs", {})
# Normalizamos aquí los posibles formatos para contentar al type checker
volume_by_skill = _normalize_numeric_sequence(
volumetry.get("volume_by_skill")
)
channel_distribution_pct = _normalize_numeric_sequence(
volumetry.get("channel_distribution_pct")
)
peak_offpeak_ratio = volumetry.get("peak_offpeak_ratio")
aht_ratio = _get_nested(op, "aht_distribution", "p90_p50_ratio")
escalation_rate = op.get("escalation_rate")
annual_savings = _get_nested(econ, "potential_savings", "annual_savings")
# --- Calculamos sub-scores (cada uno decide si está 'computed' o no) ---
repet = score_repetitividad(volume_by_skill)
pred = score_predictibilidad(aht_ratio, escalation_rate)
estr = score_estructuracion(channel_distribution_pct)
comp = score_complejidad(aht_ratio, escalation_rate)
estab = score_estabilidad(peak_offpeak_ratio)
roi = score_roi(annual_savings)
sub_scores = {
"repetitividad": repet,
"predictibilidad": pred,
"estructuracion": estr,
"complejidad": comp,
"estabilidad": estab,
"roi": roi,
}
# --- Renormalización de pesos sólo con dimensiones disponibles ---
effective_weights: Dict[str, float] = {}
for name, base_w in self.base_weights.items():
dim = sub_scores.get(name, {})
if dim.get("computed"):
effective_weights[name] = base_w
total_effective_weight = sum(effective_weights.values())
if total_effective_weight > 0:
normalized_weights = {
name: w / total_effective_weight for name, w in effective_weights.items()
}
else:
normalized_weights = {}
# --- Score final ---
if not normalized_weights:
final_score: Optional[float] = None
else:
acc = 0.0
for name, dim in sub_scores.items():
if not dim.get("computed"):
continue
w = normalized_weights.get(name, 0.0)
acc += (dim.get("score") or 0.0) * w
final_score = round(acc, 2)
classification = classify_agentic_score(final_score)
result = {
"agentic_readiness": {
"version": "1.0",
"final_score": final_score,
"classification": classification,
"weights": {
"base_weights": self.base_weights,
"normalized_weights": normalized_weights,
},
"sub_scores": sub_scores,
"metadata": {
"source_module": "agentic_score.py",
"notes": (
"Modelo simplificado basado en KPIs agregados. "
"Renormaliza los pesos cuando faltan dimensiones."
),
},
}
}
return result
def compute_and_return(self, data: Dict[str, Any]) -> Dict[str, Any]:
"""
Permite calcular el Agentic Readiness directamente desde
un objeto Python (dict), sin necesidad de carpetas ni archivos.
"""
return self.compute_from_data(data)
def run_on_folder(self, folder_path: Union[str, Path]) -> Dict[str, Any]:
"""
Punto de entrada típico para el pipeline:
- Lee <folder>/results.json
- Calcula Agentic Readiness
- Escribe <folder>/agentic_readiness.json
- Devuelve el dict con el resultado
"""
data = self.load_results(folder_path)
result = self.compute_from_data(data)
self.save_agentic_readiness(folder_path, result)
return result
# =========================
# CLI opcional
# =========================
def main(argv: List[str]) -> None:
if len(argv) < 2:
print(
"Uso: python agentic_score.py <carpeta_resultados>\n"
"La carpeta debe contener un 'results.json'. Se generará un "
"'agentic_readiness.json' en la misma carpeta.",
file=sys.stderr,
)
sys.exit(1)
folder = argv[1]
scorer = AgenticScorer()
try:
result = scorer.run_on_folder(folder)
except Exception as e:
print(f"Error al procesar la carpeta '{folder}': {e}", file=sys.stderr)
sys.exit(1)
# Por comodidad, también mostramos el score final por consola
ar = result.get("agentic_readiness", {})
print(json.dumps(result, ensure_ascii=False, indent=2))
final_score = ar.get("final_score")
classification = ar.get("classification", {})
label = classification.get("label")
emoji = classification.get("emoji")
if final_score is not None and label:
print(f"\nAgentic Readiness Score: {final_score} {emoji} ({label})")
if __name__ == "__main__":
main(sys.argv)

View File

@@ -0,0 +1,55 @@
"""
beyond_metrics package
======================
Capa pública del sistema BeyondMetrics.
Expone:
- Dimensiones (Volumetría, Eficiencia, ...)
- Pipeline principal
- Conectores de IO (local, S3, ...)
"""
from .dimensions import (
VolumetriaMetrics,
OperationalPerformanceMetrics,
SatisfactionExperienceMetrics,
EconomyCostMetrics,
)
from .pipeline import (
BeyondMetricsPipeline,
build_pipeline,
load_dimensions_config, # opcional, pero útil
)
from .io import (
DataSource,
ResultsSink,
LocalDataSource,
LocalResultsSink,
S3DataSource,
S3ResultsSink,
# si has añadido GoogleDrive, puedes exponerlo aquí también:
# GoogleDriveDataSource,
# GoogleDriveResultsSink,
)
__all__ = [
# Dimensiones
"VolumetriaMetrics",
"OperationalPerformanceMetrics",
"SatisfactionExperienceMetrics",
"EconomyCostMetrics",
# Pipeline
"BeyondMetricsPipeline",
"build_pipeline",
"load_dimensions_config",
# IO
"DataSource",
"ResultsSink",
"LocalDataSource",
"LocalResultsSink",
"S3DataSource",
"S3ResultsSink",
# "GoogleDriveDataSource",
# "GoogleDriveResultsSink",
]

View File

@@ -0,0 +1,310 @@
from __future__ import annotations
import json
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Optional, Sequence
from reportlab.lib.pagesizes import A4
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader
from openai import OpenAI
DEFAULT_SYSTEM_PROMPT = (
"Eres un consultor experto en contact centers. "
"Vas a recibir resultados analíticos de un sistema de métricas "
"(BeyondMetrics) en formato JSON. Tu tarea es generar un informe claro, "
"accionable y orientado a negocio, destacando los principales hallazgos, "
"riesgos y oportunidades de mejora."
)
@dataclass
class ReportAgentConfig:
"""
Configuración básica del agente de informes.
openai_api_key:
Se puede pasar explícitamente o leer de la variable de entorno OPENAI_API_KEY.
model:
Modelo de ChatGPT a utilizar, p.ej. 'gpt-4.1-mini' o similar.
system_prompt:
Prompt de sistema para controlar el estilo del informe.
"""
openai_api_key: Optional[str] = None
model: str = "gpt-4.1-mini"
system_prompt: str = DEFAULT_SYSTEM_PROMPT
class BeyondMetricsReportAgent:
"""
Agente muy sencillo que:
1) Lee el JSON de resultados de una ejecución de BeyondMetrics.
2) Construye un prompt con esos resultados.
3) Llama a ChatGPT para generar un informe en texto.
4) Guarda el informe en un PDF en disco, EMBEBIENDO las imágenes PNG
generadas por el pipeline como anexos.
MVP: centrado en texto + figuras incrustadas.
"""
def __init__(self, config: Optional[ReportAgentConfig] = None) -> None:
self.config = config or ReportAgentConfig()
api_key = self.config.openai_api_key or os.getenv("OPENAI_API_KEY")
if not api_key:
raise RuntimeError(
"Falta la API key de OpenAI. "
"Pásala en ReportAgentConfig(openai_api_key=...) o "
"define la variable de entorno OPENAI_API_KEY."
)
# Cliente de la nueva API de OpenAI
self._client = OpenAI(api_key=api_key)
# ------------------------------------------------------------------
# API pública principal
# ------------------------------------------------------------------
def generate_pdf_report(
self,
run_base: str,
output_pdf_path: Optional[str] = None,
extra_user_prompt: str = "",
) -> str:
"""
Genera un informe en PDF a partir de una carpeta de resultados.
Parámetros:
- run_base:
Carpeta base de la ejecución. Debe contener al menos 'results.json'
y, opcionalmente, imágenes PNG generadas por el pipeline.
- output_pdf_path:
Ruta completa del PDF de salida. Si es None, se crea
'beyondmetrics_report.pdf' dentro de run_base.
- extra_user_prompt:
Texto adicional para afinar la petición al agente
(p.ej. "enfatiza eficiencia y SLA", etc.)
Devuelve:
- La ruta del PDF generado.
"""
run_dir = Path(run_base)
results_json = run_dir / "results.json"
if not results_json.exists():
raise FileNotFoundError(
f"No se ha encontrado {results_json}. "
"Asegúrate de ejecutar primero el pipeline."
)
# 1) Leer JSON de resultados
with results_json.open("r", encoding="utf-8") as f:
results_data: Dict[str, Any] = json.load(f)
# 2) Buscar imágenes generadas
image_files = sorted(p for p in run_dir.glob("*.png"))
# 3) Construir prompt de usuario
user_prompt = self._build_user_prompt(
results=results_data,
image_files=[p.name for p in image_files],
extra_user_prompt=extra_user_prompt,
)
# 4) Llamar a ChatGPT para obtener el texto del informe
report_text = self._call_chatgpt(user_prompt)
# 5) Crear PDF con texto + imágenes embebidas
if output_pdf_path is None:
output_pdf_path = str(run_dir / "beyondmetrics_report.pdf")
self._write_pdf(output_pdf_path, report_text, image_files)
return output_pdf_path
# ------------------------------------------------------------------
# Construcción del prompt
# ------------------------------------------------------------------
def _build_user_prompt(
self,
results: Dict[str, Any],
image_files: Sequence[str],
extra_user_prompt: str = "",
) -> str:
"""
Construye el mensaje de usuario que se enviará al modelo.
Para un MVP, serializamos el JSON de resultados entero.
Más adelante se puede resumir si el JSON crece demasiado.
"""
results_str = json.dumps(results, indent=2, ensure_ascii=False)
images_section = (
"Imágenes generadas en la ejecución:\n"
+ "\n".join(f"- {name}" for name in image_files)
if image_files
else "No se han generado imágenes en esta ejecución."
)
extra = (
f"\n\nInstrucciones adicionales del usuario:\n{extra_user_prompt}"
if extra_user_prompt
else ""
)
prompt = (
"A continuación te proporciono los resultados de una ejecución de BeyondMetrics "
"en formato JSON. Debes elaborar un INFORME EJECUTIVO para un cliente de "
"contact center. El informe debe incluir:\n"
"- Resumen ejecutivo en lenguaje de negocio.\n"
"- Principales hallazgos por dimensión.\n"
"- Riesgos o problemas detectados.\n"
"- Recomendaciones accionables.\n\n"
"Resultados (JSON):\n"
f"{results_str}\n\n"
f"{images_section}"
f"{extra}"
)
return prompt
# ------------------------------------------------------------------
# Llamada a ChatGPT (nueva API)
# ------------------------------------------------------------------
def _call_chatgpt(self, user_prompt: str) -> str:
"""
Llama al modelo de ChatGPT y devuelve el contenido del mensaje de respuesta.
Implementado con la nueva API de OpenAI.
"""
resp = self._client.chat.completions.create(
model=self.config.model,
messages=[
{"role": "system", "content": self.config.system_prompt},
{"role": "user", "content": user_prompt},
],
temperature=0.3,
)
content = resp.choices[0].message.content
if not isinstance(content, str):
raise RuntimeError("La respuesta del modelo no contiene texto.")
return content
# ------------------------------------------------------------------
# Escritura de PDF (texto + imágenes)
# ------------------------------------------------------------------
def _write_pdf(
self,
output_path: str,
text: str,
image_paths: Sequence[Path],
) -> None:
"""
Crea un PDF A4 con:
1) Texto del informe (páginas iniciales).
2) Una sección de anexos donde se incrustan las imágenes PNG
generadas por el pipeline, escaladas para encajar en la página.
"""
output_path = str(output_path)
c = canvas.Canvas(output_path, pagesize=A4)
width, height = A4
margin_x = 50
margin_y = 50
max_width = width - 2 * margin_x
line_height = 14
c.setFont("Helvetica", 11)
# --- Escribir texto principal ---
def _wrap_line(line: str, max_chars: int = 100) -> list[str]:
parts: list[str] = []
current: list[str] = []
count = 0
for word in line.split():
if count + len(word) + 1 > max_chars:
parts.append(" ".join(current))
current = [word]
count = len(word) + 1
else:
current.append(word)
count += len(word) + 1
if current:
parts.append(" ".join(current))
return parts
y = height - margin_y
for raw_line in text.splitlines():
wrapped_lines = _wrap_line(raw_line)
for line in wrapped_lines:
if y < margin_y:
c.showPage()
c.setFont("Helvetica", 11)
y = height - margin_y
c.drawString(margin_x, y, line)
y -= line_height
# --- Anexar imágenes como figuras ---
if image_paths:
# Nueva página para las figuras
c.showPage()
c.setFont("Helvetica-Bold", 14)
c.drawString(margin_x, height - margin_y, "Anexo: Figuras")
c.setFont("Helvetica", 11)
current_y = height - margin_y - 2 * line_height
for img_path in image_paths:
# Si no cabe la imagen en la página, pasamos a la siguiente
available_height = current_y - margin_y
if available_height < 100: # espacio mínimo
c.showPage()
c.setFont("Helvetica-Bold", 14)
c.drawString(margin_x, height - margin_y, "Anexo: Figuras (cont.)")
c.setFont("Helvetica", 11)
current_y = height - margin_y - 2 * line_height
available_height = current_y - margin_y
# Título de la figura
title = f"Figura: {img_path.name}"
c.drawString(margin_x, current_y, title)
current_y -= line_height
# Cargar imagen y escalarla
try:
img = ImageReader(str(img_path))
iw, ih = img.getSize()
# Escala para encajar en ancho y alto disponibles
max_img_height = available_height - 2 * line_height
scale = min(max_width / iw, max_img_height / ih)
if scale <= 0:
scale = 1.0 # fallback
draw_w = iw * scale
draw_h = ih * scale
x = margin_x
y_img = current_y - draw_h
c.drawImage(
img,
x,
y_img,
width=draw_w,
height=draw_h,
preserveAspectRatio=True,
mask="auto",
)
current_y = y_img - 2 * line_height
except Exception as e:
# Si falla la carga, lo indicamos en el PDF
err_msg = f"No se pudo cargar la imagen {img_path.name}: {e}"
c.drawString(margin_x, current_y, err_msg)
current_y -= 2 * line_height
c.save()

View File

@@ -0,0 +1,27 @@
{
"dimensions": {
"volumetry": {
"class": "beyond_metrics.VolumetriaMetrics",
"enabled": true,
"metrics": [
"volume_by_channel",
"volume_by_skill"
]
},
"operational_performance": {
"class": "beyond_metrics.dimensions.OperationalPerformance.OperationalPerformanceMetrics",
"enabled": false,
"metrics": []
},
"customer_satisfaction": {
"class": "beyond_metrics.dimensions.SatisfactionExperience.SatisfactionExperienceMetrics",
"enabled": false,
"metrics": []
},
"economy_costs": {
"class": "beyond_metrics.dimensions.EconomyCost.EconomyCostMetrics",
"enabled": false,
"metrics": []
}
}
}

View File

@@ -0,0 +1,58 @@
{
"dimensions": {
"volumetry": {
"class": "beyond_metrics.VolumetriaMetrics",
"enabled": true,
"metrics": [
"volume_by_channel",
"volume_by_skill",
"channel_distribution_pct",
"skill_distribution_pct",
"heatmap_24x7",
"monthly_seasonality_cv",
"peak_offpeak_ratio",
"concentration_top20_skills_pct"
]
},
"operational_performance": {
"class": "beyond_metrics.dimensions.OperationalPerformance.OperationalPerformanceMetrics",
"enabled": true,
"metrics": [
"aht_distribution",
"talk_hold_acw_p50_by_skill",
"metrics_by_skill",
"fcr_rate",
"escalation_rate",
"abandonment_rate",
"high_hold_time_rate",
"recurrence_rate_7d",
"repeat_channel_rate",
"occupancy_rate",
"performance_score"
]
},
"customer_satisfaction": {
"class": "beyond_metrics.dimensions.SatisfactionExperience.SatisfactionExperienceMetrics",
"enabled": true,
"metrics": [
"csat_global",
"csat_avg_by_skill_channel",
"nps_avg_by_skill_channel",
"ces_avg_by_skill_channel",
"csat_aht_correlation",
"csat_aht_skill_summary"
]
},
"economy_costs": {
"class": "beyond_metrics.dimensions.EconomyCost.EconomyCostMetrics",
"enabled": true,
"metrics": [
"cpi_by_skill_channel",
"annual_cost_by_skill_channel",
"cost_breakdown",
"inefficiency_cost_by_skill_channel",
"potential_savings"
]
}
}
}

View File

@@ -0,0 +1,494 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict, List, Optional, Any
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.axes import Axes
REQUIRED_COLUMNS_ECON: List[str] = [
"interaction_id",
"datetime_start",
"queue_skill",
"channel",
"duration_talk",
"hold_time",
"wrap_up_time",
]
@dataclass
class EconomyConfig:
"""
Parámetros manuales para la dimensión de Economía y Costes.
- labor_cost_per_hour: coste total/hora de un agente (fully loaded).
- overhead_rate: % overhead variable (ej. 0.1 = 10% sobre labor).
- tech_costs_annual: coste anual de tecnología (licencias, infra, ...).
- automation_cpi: coste por interacción automatizada (ej. 0.15€).
- automation_volume_share: % del volumen automatizable (0-1).
- automation_success_rate: % éxito de la automatización (0-1).
- customer_segments: mapping opcional skill -> segmento ("high"/"medium"/"low")
para futuros insights de ROI por segmento.
"""
labor_cost_per_hour: float
overhead_rate: float = 0.0
tech_costs_annual: float = 0.0
automation_cpi: Optional[float] = None
automation_volume_share: float = 0.0
automation_success_rate: float = 0.0
customer_segments: Optional[Dict[str, str]] = None
@dataclass
class EconomyCostMetrics:
"""
DIMENSIÓN 4: ECONOMÍA y COSTES
Propósito:
- Cuantificar el COSTE actual (CPI, coste anual).
- Estimar el impacto de overhead y tecnología.
- Calcular un primer estimado de "coste de ineficiencia" y ahorro potencial.
Requiere:
- Columnas del dataset transaccional (ver REQUIRED_COLUMNS_ECON).
Inputs opcionales vía EconomyConfig:
- labor_cost_per_hour (obligatorio para cualquier cálculo de €).
- overhead_rate, tech_costs_annual, automation_*.
- customer_segments (para insights de ROI por segmento).
"""
df: pd.DataFrame
config: Optional[EconomyConfig] = None
def __post_init__(self) -> None:
self._validate_columns()
self._prepare_data()
# ------------------------------------------------------------------ #
# Helpers internos
# ------------------------------------------------------------------ #
def _validate_columns(self) -> None:
missing = [c for c in REQUIRED_COLUMNS_ECON if c not in self.df.columns]
if missing:
raise ValueError(
f"Faltan columnas obligatorias para EconomyCostMetrics: {missing}"
)
def _prepare_data(self) -> None:
df = self.df.copy()
df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
for col in ["duration_talk", "hold_time", "wrap_up_time"]:
df[col] = pd.to_numeric(df[col], errors="coerce")
df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
df["channel"] = df["channel"].astype(str).str.strip()
# Handle time = talk + hold + wrap
df["handle_time"] = (
df["duration_talk"].fillna(0)
+ df["hold_time"].fillna(0)
+ df["wrap_up_time"].fillna(0)
) # segundos
# Filtrar por record_status para cálculos de AHT/CPI
# Solo incluir registros VALID (excluir NOISE, ZOMBIE, ABANDON)
if "record_status" in df.columns:
df["record_status"] = df["record_status"].astype(str).str.strip().str.upper()
df["_is_valid_for_cost"] = df["record_status"] == "VALID"
else:
# Legacy data sin record_status: incluir todo
df["_is_valid_for_cost"] = True
self.df = df
@property
def is_empty(self) -> bool:
return self.df.empty
def _has_cost_config(self) -> bool:
return self.config is not None and self.config.labor_cost_per_hour is not None
# ------------------------------------------------------------------ #
# KPI 1: CPI por canal/skill
# ------------------------------------------------------------------ #
def cpi_by_skill_channel(self) -> pd.DataFrame:
"""
CPI (Coste Por Interacción) por skill/canal.
CPI = (Labor_cost_per_interaction + Overhead_variable) / EFFECTIVE_PRODUCTIVITY
- Labor_cost_per_interaction = (labor_cost_per_hour * AHT_hours)
- Overhead_variable = overhead_rate * Labor_cost_per_interaction
- EFFECTIVE_PRODUCTIVITY = 0.70 (70% - accounts for non-productive time)
Excluye registros abandonados del cálculo de costes para consistencia
con el path del frontend (fresh CSV).
Si no hay config de costes -> devuelve DataFrame vacío.
Incluye queue_skill y channel como columnas (no solo índice) para que
el frontend pueda hacer lookup por nombre de skill.
"""
if not self._has_cost_config():
return pd.DataFrame()
cfg = self.config
assert cfg is not None # para el type checker
df = self.df.copy()
if df.empty:
return pd.DataFrame()
# Filter out abandonments for cost calculation (consistency with frontend)
if "is_abandoned" in df.columns:
df_cost = df[df["is_abandoned"] != True]
else:
df_cost = df
# Filtrar por record_status: solo VALID para cálculo de AHT
# Excluye NOISE, ZOMBIE, ABANDON
if "_is_valid_for_cost" in df_cost.columns:
df_cost = df_cost[df_cost["_is_valid_for_cost"] == True]
if df_cost.empty:
return pd.DataFrame()
# AHT por skill/canal (en segundos) - solo registros VALID
grouped = df_cost.groupby(["queue_skill", "channel"])["handle_time"].mean()
if grouped.empty:
return pd.DataFrame()
aht_sec = grouped
aht_hours = aht_sec / 3600.0
# Apply productivity factor (70% effectiveness)
# This accounts for non-productive agent time (breaks, training, etc.)
EFFECTIVE_PRODUCTIVITY = 0.70
labor_cost = cfg.labor_cost_per_hour * aht_hours
overhead = labor_cost * cfg.overhead_rate
raw_cpi = labor_cost + overhead
cpi = raw_cpi / EFFECTIVE_PRODUCTIVITY
out = pd.DataFrame(
{
"aht_seconds": aht_sec.round(2),
"labor_cost": labor_cost.round(4),
"overhead_cost": overhead.round(4),
"cpi_total": cpi.round(4),
}
)
# Reset index to include queue_skill and channel as columns for frontend lookup
return out.sort_index().reset_index()
# ------------------------------------------------------------------ #
# KPI 2: coste anual por skill/canal
# ------------------------------------------------------------------ #
def annual_cost_by_skill_channel(self) -> pd.DataFrame:
"""
Coste anual por skill/canal.
cost_annual = CPI * volumen (cantidad de interacciones de la muestra).
Nota: por simplicidad asumimos que el dataset refleja un periodo anual.
Si en el futuro quieres anualizar (ej. dataset = 1 mes) se puede añadir
un factor de escalado en EconomyConfig.
"""
cpi_table = self.cpi_by_skill_channel()
if cpi_table.empty:
return pd.DataFrame()
df = self.df.copy()
volume = (
df.groupby(["queue_skill", "channel"])["interaction_id"]
.nunique()
.rename("volume")
)
# Set index on cpi_table to match volume's MultiIndex for join
cpi_indexed = cpi_table.set_index(["queue_skill", "channel"])
joined = cpi_indexed.join(volume, how="left").fillna({"volume": 0})
joined["annual_cost"] = (joined["cpi_total"] * joined["volume"]).round(2)
return joined
# ------------------------------------------------------------------ #
# KPI 3: desglose de costes (labor / tech / overhead)
# ------------------------------------------------------------------ #
def cost_breakdown(self) -> Dict[str, float]:
"""
Desglose % de costes: labor, overhead, tech.
labor_total = sum(labor_cost_per_interaction)
overhead_total = labor_total * overhead_rate
tech_total = tech_costs_annual (si se ha proporcionado)
Devuelve porcentajes sobre el total.
Si falta configuración de coste -> devuelve {}.
"""
if not self._has_cost_config():
return {}
cfg = self.config
assert cfg is not None
cpi_table = self.cpi_by_skill_channel()
if cpi_table.empty:
return {}
df = self.df.copy()
volume = (
df.groupby(["queue_skill", "channel"])["interaction_id"]
.nunique()
.rename("volume")
)
# Set index on cpi_table to match volume's MultiIndex for join
cpi_indexed = cpi_table.set_index(["queue_skill", "channel"])
joined = cpi_indexed.join(volume, how="left").fillna({"volume": 0})
# Costes anuales de labor y overhead
annual_labor = (joined["labor_cost"] * joined["volume"]).sum()
annual_overhead = (joined["overhead_cost"] * joined["volume"]).sum()
annual_tech = cfg.tech_costs_annual
total = annual_labor + annual_overhead + annual_tech
if total <= 0:
return {}
return {
"labor_pct": round(annual_labor / total * 100, 2),
"overhead_pct": round(annual_overhead / total * 100, 2),
"tech_pct": round(annual_tech / total * 100, 2),
"labor_annual": round(annual_labor, 2),
"overhead_annual": round(annual_overhead, 2),
"tech_annual": round(annual_tech, 2),
"total_annual": round(total, 2),
}
# ------------------------------------------------------------------ #
# KPI 4: coste de ineficiencia (€ por variabilidad/escalación)
# ------------------------------------------------------------------ #
def inefficiency_cost_by_skill_channel(self) -> pd.DataFrame:
"""
Estimación muy simplificada de coste de ineficiencia:
Para cada skill/canal:
- AHT_p50, AHT_p90 (segundos).
- Delta = max(0, AHT_p90 - AHT_p50).
- Se asume que ~40% de las interacciones están por encima de la mediana.
- Ineff_seconds = Delta * volume * 0.4
- Ineff_cost = LaborCPI_per_second * Ineff_seconds
NOTA: Es un modelo aproximado para cuantificar "orden de magnitud".
"""
if not self._has_cost_config():
return pd.DataFrame()
cfg = self.config
assert cfg is not None
df = self.df.copy()
# Filtrar por record_status: solo VALID para cálculo de AHT
# Excluye NOISE, ZOMBIE, ABANDON
if "_is_valid_for_cost" in df.columns:
df = df[df["_is_valid_for_cost"] == True]
grouped = df.groupby(["queue_skill", "channel"])
stats = grouped["handle_time"].agg(
aht_p50=lambda s: float(np.percentile(s.dropna(), 50)),
aht_p90=lambda s: float(np.percentile(s.dropna(), 90)),
volume="count",
)
if stats.empty:
return pd.DataFrame()
# CPI para obtener coste/segundo de labor
# cpi_by_skill_channel now returns with reset_index, so we need to set index for join
cpi_table_raw = self.cpi_by_skill_channel()
if cpi_table_raw.empty:
return pd.DataFrame()
# Set queue_skill+channel as index for the join
cpi_table = cpi_table_raw.set_index(["queue_skill", "channel"])
merged = stats.join(cpi_table[["labor_cost"]], how="left")
merged = merged.fillna(0.0)
delta = (merged["aht_p90"] - merged["aht_p50"]).clip(lower=0.0)
affected_fraction = 0.4 # aproximación
ineff_seconds = delta * merged["volume"] * affected_fraction
# labor_cost = coste por interacción con AHT medio;
# aproximamos coste/segundo como labor_cost / AHT_medio
aht_mean = grouped["handle_time"].mean()
merged["aht_mean"] = aht_mean
cost_per_second = merged["labor_cost"] / merged["aht_mean"].replace(0, np.nan)
cost_per_second = cost_per_second.fillna(0.0)
ineff_cost = (ineff_seconds * cost_per_second).round(2)
merged["ineff_seconds"] = ineff_seconds.round(2)
merged["ineff_cost"] = ineff_cost
# Reset index to include queue_skill and channel as columns for frontend lookup
return merged[["aht_p50", "aht_p90", "volume", "ineff_seconds", "ineff_cost"]].reset_index()
# ------------------------------------------------------------------ #
# KPI 5: ahorro potencial anual por automatización
# ------------------------------------------------------------------ #
def potential_savings(self) -> Dict[str, Any]:
"""
Ahorro potencial anual basado en:
Ahorro = (CPI_humano - CPI_automatizado) * Volumen_automatizable * Tasa_éxito
Donde:
- CPI_humano = media ponderada de cpi_total.
- CPI_automatizado = config.automation_cpi
- Volumen_automatizable = volume_total * automation_volume_share
- Tasa_éxito = automation_success_rate
Si faltan parámetros en config -> devuelve {}.
"""
if not self._has_cost_config():
return {}
cfg = self.config
assert cfg is not None
if cfg.automation_cpi is None or cfg.automation_volume_share <= 0 or cfg.automation_success_rate <= 0:
return {}
cpi_table = self.annual_cost_by_skill_channel()
if cpi_table.empty:
return {}
total_volume = cpi_table["volume"].sum()
if total_volume <= 0:
return {}
# CPI humano medio ponderado
weighted_cpi = (
(cpi_table["cpi_total"] * cpi_table["volume"]).sum() / total_volume
)
volume_automatizable = total_volume * cfg.automation_volume_share
effective_volume = volume_automatizable * cfg.automation_success_rate
delta_cpi = max(0.0, weighted_cpi - cfg.automation_cpi)
annual_savings = delta_cpi * effective_volume
return {
"cpi_humano": round(weighted_cpi, 4),
"cpi_automatizado": round(cfg.automation_cpi, 4),
"volume_total": float(total_volume),
"volume_automatizable": float(volume_automatizable),
"effective_volume": float(effective_volume),
"annual_savings": round(annual_savings, 2),
}
# ------------------------------------------------------------------ #
# PLOTS
# ------------------------------------------------------------------ #
def plot_cost_waterfall(self) -> Axes:
"""
Waterfall de costes anuales (labor + tech + overhead).
"""
breakdown = self.cost_breakdown()
if not breakdown:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin configuración de costes", ha="center", va="center")
ax.set_axis_off()
return ax
labels = ["Labor", "Overhead", "Tech"]
values = [
breakdown["labor_annual"],
breakdown["overhead_annual"],
breakdown["tech_annual"],
]
fig, ax = plt.subplots(figsize=(8, 4))
running = 0.0
positions = []
bottoms = []
for v in values:
positions.append(running)
bottoms.append(running)
running += v
# barras estilo waterfall
x = np.arange(len(labels))
ax.bar(x, values)
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.set_ylabel("€ anuales")
ax.set_title("Desglose anual de costes")
for idx, v in enumerate(values):
ax.text(idx, v, f"{v:,.0f}", ha="center", va="bottom")
ax.grid(axis="y", alpha=0.3)
return ax
def plot_cpi_by_channel(self) -> Axes:
"""
Gráfico de barras de CPI medio por canal.
"""
cpi_table = self.cpi_by_skill_channel()
if cpi_table.empty:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin configuración de costes", ha="center", va="center")
ax.set_axis_off()
return ax
df = self.df.copy()
volume = (
df.groupby(["queue_skill", "channel"])["interaction_id"]
.nunique()
.rename("volume")
)
# Set index on cpi_table to match volume's MultiIndex for join
cpi_indexed = cpi_table.set_index(["queue_skill", "channel"])
joined = cpi_indexed.join(volume, how="left").fillna({"volume": 0})
# CPI medio ponderado por canal
per_channel = (
joined.reset_index()
.groupby("channel")
.apply(lambda g: (g["cpi_total"] * g["volume"]).sum() / max(g["volume"].sum(), 1))
.rename("cpi_mean")
.round(4)
)
fig, ax = plt.subplots(figsize=(6, 4))
per_channel.plot(kind="bar", ax=ax)
ax.set_xlabel("Canal")
ax.set_ylabel("CPI medio (€)")
ax.set_title("Coste por interacción (CPI) por canal")
ax.grid(axis="y", alpha=0.3)
return ax

View File

@@ -0,0 +1,716 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Dict, List
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.axes import Axes
import math
REQUIRED_COLUMNS_OP: List[str] = [
"interaction_id",
"datetime_start",
"queue_skill",
"channel",
"duration_talk",
"hold_time",
"wrap_up_time",
"agent_id",
"transfer_flag",
]
@dataclass
class OperationalPerformanceMetrics:
"""
Dimensión: RENDIMIENTO OPERACIONAL Y DE SERVICIO
Propósito: medir el balance entre rapidez (eficiencia) y calidad de resolución,
más la variabilidad del servicio.
Requiere como mínimo:
- interaction_id
- datetime_start
- queue_skill
- channel
- duration_talk (segundos)
- hold_time (segundos)
- wrap_up_time (segundos)
- agent_id
- transfer_flag (bool/int)
Columnas opcionales:
- is_resolved (bool/int) -> para FCR
- abandoned_flag (bool/int) -> para tasa de abandono
- customer_id / caller_id -> para reincidencia y repetición de canal
- logged_time (segundos) -> para occupancy_rate
"""
df: pd.DataFrame
# Benchmarks / parámetros de normalización (puedes ajustarlos)
AHT_GOOD: float = 300.0 # 5 min
AHT_BAD: float = 900.0 # 15 min
VAR_RATIO_GOOD: float = 1.2 # P90/P50 ~1.2 muy estable
VAR_RATIO_BAD: float = 3.0 # P90/P50 >=3 muy inestable
def __post_init__(self) -> None:
self._validate_columns()
self._prepare_data()
# ------------------------------------------------------------------ #
# Helpers internos
# ------------------------------------------------------------------ #
def _validate_columns(self) -> None:
missing = [c for c in REQUIRED_COLUMNS_OP if c not in self.df.columns]
if missing:
raise ValueError(
f"Faltan columnas obligatorias para OperationalPerformanceMetrics: {missing}"
)
def _prepare_data(self) -> None:
df = self.df.copy()
# Tipos
df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
for col in ["duration_talk", "hold_time", "wrap_up_time"]:
df[col] = pd.to_numeric(df[col], errors="coerce")
# Handle Time
df["handle_time"] = (
df["duration_talk"].fillna(0)
+ df["hold_time"].fillna(0)
+ df["wrap_up_time"].fillna(0)
)
# v3.0: Filtrar NOISE y ZOMBIE para cálculos de variabilidad
# record_status: 'VALID', 'NOISE', 'ZOMBIE', 'ABANDON'
# Para AHT/CV solo usamos 'VALID' (excluye noise, zombie, abandon)
if "record_status" in df.columns:
df["record_status"] = df["record_status"].astype(str).str.strip().str.upper()
# Crear máscara para registros válidos: SOLO "VALID"
# Excluye explícitamente NOISE, ZOMBIE, ABANDON y cualquier otro valor
df["_is_valid_for_cv"] = df["record_status"] == "VALID"
# Log record_status breakdown for debugging
status_counts = df["record_status"].value_counts()
valid_count = int(df["_is_valid_for_cv"].sum())
print(f"[OperationalPerformance] Record status breakdown:")
print(f" Total rows: {len(df)}")
for status, count in status_counts.items():
print(f" - {status}: {count}")
print(f" VALID rows for AHT calculation: {valid_count}")
else:
# Legacy data sin record_status: incluir todo
df["_is_valid_for_cv"] = True
print(f"[OperationalPerformance] No record_status column - using all {len(df)} rows")
# Normalización básica
df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
df["channel"] = df["channel"].astype(str).str.strip()
df["agent_id"] = df["agent_id"].astype(str).str.strip()
# Flags opcionales convertidos a bool cuando existan
for flag_col in ["is_resolved", "abandoned_flag", "transfer_flag"]:
if flag_col in df.columns:
df[flag_col] = df[flag_col].astype(int).astype(bool)
# customer_id: usamos customer_id si existe, si no caller_id
if "customer_id" in df.columns:
df["customer_id"] = df["customer_id"].astype(str)
elif "caller_id" in df.columns:
df["customer_id"] = df["caller_id"].astype(str)
else:
df["customer_id"] = None
# logged_time opcional
# Normalizamos logged_time: siempre será una serie float con NaN si no existe
df["logged_time"] = pd.to_numeric(df.get("logged_time", np.nan), errors="coerce")
self.df = df
@property
def is_empty(self) -> bool:
return self.df.empty
# ------------------------------------------------------------------ #
# AHT y variabilidad
# ------------------------------------------------------------------ #
def aht_distribution(self) -> Dict[str, float]:
"""
Devuelve P10, P50, P90 del AHT y el ratio P90/P50 como medida de variabilidad.
v3.0: Filtra NOISE y ZOMBIE para el cálculo de variabilidad.
Solo usa registros con record_status='valid' o sin status (legacy).
"""
# Filtrar solo registros válidos para cálculo de variabilidad
df_valid = self.df[self.df["_is_valid_for_cv"] == True]
ht = df_valid["handle_time"].dropna().astype(float)
if ht.empty:
return {}
p10 = float(np.percentile(ht, 10))
p50 = float(np.percentile(ht, 50))
p90 = float(np.percentile(ht, 90))
ratio = float(p90 / p50) if p50 > 0 else float("nan")
return {
"p10": round(p10, 2),
"p50": round(p50, 2),
"p90": round(p90, 2),
"p90_p50_ratio": round(ratio, 3),
}
def talk_hold_acw_p50_by_skill(self) -> pd.DataFrame:
"""
P50 de talk_time, hold_time y wrap_up_time por skill.
Incluye queue_skill como columna (no solo índice) para que
el frontend pueda hacer lookup por nombre de skill.
"""
df = self.df
def perc(s: pd.Series, q: float) -> float:
s = s.dropna().astype(float)
if s.empty:
return float("nan")
return float(np.percentile(s, q))
grouped = df.groupby("queue_skill")
result = pd.DataFrame(
{
"talk_p50": grouped["duration_talk"].apply(lambda s: perc(s, 50)),
"hold_p50": grouped["hold_time"].apply(lambda s: perc(s, 50)),
"acw_p50": grouped["wrap_up_time"].apply(lambda s: perc(s, 50)),
}
)
# Reset index to include queue_skill as column for frontend lookup
return result.round(2).sort_index().reset_index()
# ------------------------------------------------------------------ #
# FCR, escalación, abandono, reincidencia, repetición canal
# ------------------------------------------------------------------ #
def fcr_rate(self) -> float:
"""
FCR (First Contact Resolution).
Prioridad 1: Usar fcr_real_flag del CSV si existe
Prioridad 2: Calcular como 100 - escalation_rate
"""
df = self.df
total = len(df)
if total == 0:
return float("nan")
# Prioridad 1: Usar fcr_real_flag si existe
if "fcr_real_flag" in df.columns:
col = df["fcr_real_flag"]
# Normalizar a booleano
if col.dtype == "O":
fcr_mask = (
col.astype(str)
.str.strip()
.str.lower()
.isin(["true", "t", "1", "yes", "y", "si", ""])
)
else:
fcr_mask = pd.to_numeric(col, errors="coerce").fillna(0) > 0
fcr_count = int(fcr_mask.sum())
fcr = (fcr_count / total) * 100.0
return float(max(0.0, min(100.0, round(fcr, 2))))
# Prioridad 2: Fallback a 100 - escalation_rate
try:
esc = self.escalation_rate()
except Exception:
esc = float("nan")
if esc is not None and not math.isnan(esc):
fcr = 100.0 - esc
return float(max(0.0, min(100.0, round(fcr, 2))))
return float("nan")
def escalation_rate(self) -> float:
"""
% de interacciones que requieren escalación (transfer_flag == True).
"""
df = self.df
total = len(df)
if total == 0:
return float("nan")
escalated = df["transfer_flag"].sum()
return float(round(escalated / total * 100, 2))
def abandonment_rate(self) -> float:
"""
% de interacciones abandonadas.
Busca en orden: is_abandoned, abandoned_flag, abandoned
Si ninguna columna existe, devuelve NaN.
"""
df = self.df
total = len(df)
if total == 0:
return float("nan")
# Buscar columna de abandono en orden de prioridad
abandon_col = None
for col_name in ["is_abandoned", "abandoned_flag", "abandoned"]:
if col_name in df.columns:
abandon_col = col_name
break
if abandon_col is None:
return float("nan")
col = df[abandon_col]
# Normalizar a booleano
if col.dtype == "O":
abandon_mask = (
col.astype(str)
.str.strip()
.str.lower()
.isin(["true", "t", "1", "yes", "y", "si", ""])
)
else:
abandon_mask = pd.to_numeric(col, errors="coerce").fillna(0) > 0
abandoned = int(abandon_mask.sum())
return float(round(abandoned / total * 100, 2))
def high_hold_time_rate(self, threshold_seconds: float = 60.0) -> float:
"""
% de interacciones con hold_time > threshold (por defecto 60s).
Proxy de complejidad: si el agente tuvo que poner en espera al cliente
más de 60 segundos, probablemente tuvo que consultar/investigar.
"""
df = self.df
total = len(df)
if total == 0:
return float("nan")
hold_times = df["hold_time"].fillna(0)
high_hold_count = (hold_times > threshold_seconds).sum()
return float(round(high_hold_count / total * 100, 2))
def recurrence_rate_7d(self) -> float:
"""
% de clientes que vuelven a contactar en < 7 días para el MISMO skill.
Se basa en customer_id (o caller_id si no hay customer_id) + queue_skill.
Calcula:
- Para cada combinación cliente + skill, ordena por datetime_start
- Si hay dos contactos consecutivos separados < 7 días (mismo cliente, mismo skill),
cuenta como "recurrente"
- Tasa = nº clientes recurrentes / nº total de clientes
NOTA: Solo cuenta como recurrencia si el cliente llama por el MISMO skill.
Un cliente que llama a "Ventas" y luego a "Soporte" NO es recurrente.
"""
df = self.df.dropna(subset=["datetime_start"]).copy()
# Normalizar identificador de cliente
if "customer_id" not in df.columns:
if "caller_id" in df.columns:
df["customer_id"] = df["caller_id"]
else:
# No hay identificador de cliente -> no se puede calcular
return float("nan")
df = df.dropna(subset=["customer_id"])
if df.empty:
return float("nan")
# Ordenar por cliente + skill + fecha
df = df.sort_values(["customer_id", "queue_skill", "datetime_start"])
# Diferencia de tiempo entre contactos consecutivos por cliente Y skill
# Esto asegura que solo contamos recontactos del mismo cliente para el mismo skill
df["delta"] = df.groupby(["customer_id", "queue_skill"])["datetime_start"].diff()
# Marcamos los contactos que ocurren a menos de 7 días del anterior (mismo skill)
recurrence_mask = df["delta"] < pd.Timedelta(days=7)
# Nº de clientes que tienen al menos un contacto recurrente (para cualquier skill)
recurrent_customers = df.loc[recurrence_mask, "customer_id"].nunique()
total_customers = df["customer_id"].nunique()
if total_customers == 0:
return float("nan")
rate = recurrent_customers / total_customers * 100.0
return float(round(rate, 2))
def repeat_channel_rate(self) -> float:
"""
% de reincidencias (<7 días) en las que el cliente usa el MISMO canal.
Si no hay customer_id/caller_id o solo un contacto por cliente, devuelve NaN.
"""
df = self.df.dropna(subset=["datetime_start"]).copy()
if df["customer_id"].isna().all():
return float("nan")
df = df.sort_values(["customer_id", "datetime_start"])
df["next_customer"] = df["customer_id"].shift(-1)
df["next_datetime"] = df["datetime_start"].shift(-1)
df["next_channel"] = df["channel"].shift(-1)
same_customer = df["customer_id"] == df["next_customer"]
within_7d = (df["next_datetime"] - df["datetime_start"]) < pd.Timedelta(days=7)
recurrent_mask = same_customer & within_7d
if not recurrent_mask.any():
return float("nan")
same_channel = df["channel"] == df["next_channel"]
same_channel_recurrent = (recurrent_mask & same_channel).sum()
total_recurrent = recurrent_mask.sum()
return float(round(same_channel_recurrent / total_recurrent * 100, 2))
# ------------------------------------------------------------------ #
# Occupancy
# ------------------------------------------------------------------ #
def occupancy_rate(self) -> float:
"""
Tasa de ocupación:
occupancy = sum(handle_time) / sum(logged_time) * 100.
Requiere columna 'logged_time'. Si no existe o es todo 0, devuelve NaN.
"""
df = self.df
if "logged_time" not in df.columns:
return float("nan")
logged = df["logged_time"].fillna(0)
handle = df["handle_time"].fillna(0)
total_logged = logged.sum()
if total_logged == 0:
return float("nan")
occ = handle.sum() / total_logged
return float(round(occ * 100, 2))
# ------------------------------------------------------------------ #
# Score de rendimiento 0-10
# ------------------------------------------------------------------ #
def performance_score(self) -> Dict[str, float]:
"""
Calcula un score 0-10 combinando:
- AHT (bajo es mejor)
- FCR (alto es mejor)
- Variabilidad (P90/P50, bajo es mejor)
- Otros factores (ocupación / escalación)
Fórmula:
score = 0.4 * (10 - AHT_norm) +
0.3 * FCR_norm +
0.2 * (10 - Var_norm) +
0.1 * Otros_score
Donde *_norm son valores en escala 0-10.
"""
dist = self.aht_distribution()
if not dist:
return {"score": float("nan")}
p50 = dist["p50"]
ratio = dist["p90_p50_ratio"]
# AHT_normalized: 0 (mejor) a 10 (peor)
aht_norm = self._scale_to_0_10(p50, self.AHT_GOOD, self.AHT_BAD)
# FCR_normalized: 0-10 directamente desde % (0-100)
fcr_pct = self.fcr_rate()
fcr_norm = fcr_pct / 10.0 if not np.isnan(fcr_pct) else 0.0
# Variabilidad_normalized: 0 (ratio bueno) a 10 (ratio malo)
var_norm = self._scale_to_0_10(ratio, self.VAR_RATIO_GOOD, self.VAR_RATIO_BAD)
# Otros factores: combinamos ocupación (ideal ~80%) y escalación (ideal baja)
occ = self.occupancy_rate()
esc = self.escalation_rate()
other_score = self._compute_other_factors_score(occ, esc)
score = (
0.4 * (10.0 - aht_norm)
+ 0.3 * fcr_norm
+ 0.2 * (10.0 - var_norm)
+ 0.1 * other_score
)
# Clamp 0-10
score = max(0.0, min(10.0, score))
return {
"score": round(score, 2),
"aht_norm": round(aht_norm, 2),
"fcr_norm": round(fcr_norm, 2),
"var_norm": round(var_norm, 2),
"other_score": round(other_score, 2),
}
def _scale_to_0_10(self, value: float, good: float, bad: float) -> float:
"""
Escala linealmente un valor:
- good -> 0
- bad -> 10
Con saturación fuera de rango.
"""
if np.isnan(value):
return 5.0 # neutro
if good == bad:
return 5.0
if good < bad:
# Menor es mejor
if value <= good:
return 0.0
if value >= bad:
return 10.0
return 10.0 * (value - good) / (bad - good)
else:
# Mayor es mejor
if value >= good:
return 0.0
if value <= bad:
return 10.0
return 10.0 * (good - value) / (good - bad)
def _compute_other_factors_score(self, occ_pct: float, esc_pct: float) -> float:
"""
Otros factores (0-10) basados en:
- ocupación ideal alrededor de 80%
- tasa de escalación ideal baja (<10%)
"""
# Ocupación: 0 penalización si está entre 75-85, se penaliza fuera
if np.isnan(occ_pct):
occ_penalty = 5.0
else:
deviation = abs(occ_pct - 80.0)
occ_penalty = min(10.0, deviation / 5.0 * 2.0) # cada 5 puntos se suman 2, máx 10
occ_score = max(0.0, 10.0 - occ_penalty)
# Escalación: 0-10 donde 0% -> 10 puntos, >=40% -> 0
if np.isnan(esc_pct):
esc_score = 5.0
else:
if esc_pct <= 0:
esc_score = 10.0
elif esc_pct >= 40:
esc_score = 0.0
else:
esc_score = 10.0 * (1.0 - esc_pct / 40.0)
# Media simple de ambos
return (occ_score + esc_score) / 2.0
# ------------------------------------------------------------------ #
# Plots
# ------------------------------------------------------------------ #
def plot_aht_boxplot_by_skill(self) -> Axes:
"""
Boxplot del AHT por skill (P10-P50-P90 visual).
"""
df = self.df.copy()
if df.empty or "handle_time" not in df.columns:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin datos de AHT", ha="center", va="center")
ax.set_axis_off()
return ax
df = df.dropna(subset=["handle_time"])
if df.empty:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "AHT no disponible", ha="center", va="center")
ax.set_axis_off()
return ax
fig, ax = plt.subplots(figsize=(8, 4))
df.boxplot(column="handle_time", by="queue_skill", ax=ax, showfliers=False)
ax.set_xlabel("Skill / Cola")
ax.set_ylabel("AHT (segundos)")
ax.set_title("Distribución de AHT por skill")
plt.suptitle("")
plt.xticks(rotation=45, ha="right")
ax.grid(axis="y", alpha=0.3)
return ax
def plot_resolution_funnel_by_skill(self) -> Axes:
"""
Funnel / barras apiladas de Talk + Hold + ACW por skill (P50).
Permite ver el equilibrio de tiempos por skill.
"""
p50 = self.talk_hold_acw_p50_by_skill()
if p50.empty:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin datos para funnel", ha="center", va="center")
ax.set_axis_off()
return ax
fig, ax = plt.subplots(figsize=(10, 4))
skills = p50.index
talk = p50["talk_p50"]
hold = p50["hold_p50"]
acw = p50["acw_p50"]
x = np.arange(len(skills))
ax.bar(x, talk, label="Talk P50")
ax.bar(x, hold, bottom=talk, label="Hold P50")
ax.bar(x, acw, bottom=talk + hold, label="ACW P50")
ax.set_xticks(x)
ax.set_xticklabels(skills, rotation=45, ha="right")
ax.set_ylabel("Segundos")
ax.set_title("Funnel de resolución (P50) por skill")
ax.legend()
ax.grid(axis="y", alpha=0.3)
return ax
# ------------------------------------------------------------------ #
# Métricas por skill (para consistencia frontend cached/fresh)
# ------------------------------------------------------------------ #
def metrics_by_skill(self) -> List[Dict[str, Any]]:
"""
Calcula métricas operacionales por skill:
- transfer_rate: % de interacciones con transfer_flag == True
- abandonment_rate: % de interacciones abandonadas
- fcr_tecnico: 100 - transfer_rate (sin transferencia)
- fcr_real: % sin transferencia Y sin recontacto 7d (si hay datos)
- volume: número de interacciones
Devuelve una lista de dicts, uno por skill, para que el frontend
tenga acceso a las métricas reales por skill (no estimadas).
"""
df = self.df
if df.empty:
return []
results = []
# Detectar columna de abandono
abandon_col = None
for col_name in ["is_abandoned", "abandoned_flag", "abandoned"]:
if col_name in df.columns:
abandon_col = col_name
break
# Detectar columna de repeat_call_7d para FCR real
repeat_col = None
for col_name in ["repeat_call_7d", "repeat_7d", "is_repeat_7d"]:
if col_name in df.columns:
repeat_col = col_name
break
for skill, group in df.groupby("queue_skill"):
total = len(group)
if total == 0:
continue
# Transfer rate
if "transfer_flag" in group.columns:
transfer_count = group["transfer_flag"].sum()
transfer_rate = float(round(transfer_count / total * 100, 2))
else:
transfer_rate = 0.0
# FCR Técnico = 100 - transfer_rate
fcr_tecnico = float(round(100.0 - transfer_rate, 2))
# Abandonment rate
abandonment_rate = 0.0
if abandon_col:
col = group[abandon_col]
if col.dtype == "O":
abandon_mask = (
col.astype(str)
.str.strip()
.str.lower()
.isin(["true", "t", "1", "yes", "y", "si", ""])
)
else:
abandon_mask = pd.to_numeric(col, errors="coerce").fillna(0) > 0
abandoned = int(abandon_mask.sum())
abandonment_rate = float(round(abandoned / total * 100, 2))
# FCR Real (sin transferencia Y sin recontacto 7d)
fcr_real = fcr_tecnico # default to fcr_tecnico if no repeat data
if repeat_col and "transfer_flag" in group.columns:
repeat_data = group[repeat_col]
if repeat_data.dtype == "O":
repeat_mask = (
repeat_data.astype(str)
.str.strip()
.str.lower()
.isin(["true", "t", "1", "yes", "y", "si", ""])
)
else:
repeat_mask = pd.to_numeric(repeat_data, errors="coerce").fillna(0) > 0
# FCR Real: no transfer AND no repeat
fcr_real_mask = (~group["transfer_flag"]) & (~repeat_mask)
fcr_real_count = fcr_real_mask.sum()
fcr_real = float(round(fcr_real_count / total * 100, 2))
# AHT Mean (promedio de handle_time sobre registros válidos)
# Filtramos solo registros 'valid' (excluye noise/zombie) para consistencia
if "_is_valid_for_cv" in group.columns:
valid_records = group[group["_is_valid_for_cv"]]
else:
valid_records = group
if len(valid_records) > 0 and "handle_time" in valid_records.columns:
aht_mean = float(round(valid_records["handle_time"].mean(), 2))
else:
aht_mean = 0.0
# AHT Total (promedio de handle_time sobre TODOS los registros)
# Incluye NOISE, ZOMBIE, ABANDON - solo para información/comparación
if len(group) > 0 and "handle_time" in group.columns:
aht_total = float(round(group["handle_time"].mean(), 2))
else:
aht_total = 0.0
# Hold Time Mean (promedio de hold_time sobre registros válidos)
# Consistente con fresh path que usa MEAN, no P50
if len(valid_records) > 0 and "hold_time" in valid_records.columns:
hold_time_mean = float(round(valid_records["hold_time"].mean(), 2))
else:
hold_time_mean = 0.0
results.append({
"skill": str(skill),
"volume": int(total),
"transfer_rate": transfer_rate,
"abandonment_rate": abandonment_rate,
"fcr_tecnico": fcr_tecnico,
"fcr_real": fcr_real,
"aht_mean": aht_mean,
"aht_total": aht_total,
"hold_time_mean": hold_time_mean,
})
return results

View File

@@ -0,0 +1,318 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Dict, List, Any
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.axes import Axes
# Solo columnas del dataset “core”
REQUIRED_COLUMNS_SAT: List[str] = [
"interaction_id",
"datetime_start",
"queue_skill",
"channel",
"duration_talk",
"hold_time",
"wrap_up_time",
]
@dataclass
class SatisfactionExperienceMetrics:
"""
Dimensión 3: SATISFACCIÓN y EXPERIENCIA
Todas las columnas de satisfacción (csat/nps/ces/aht) son OPCIONALES.
Si no están, las métricas que las usan devuelven vacío/NaN pero
nunca rompen el pipeline.
"""
df: pd.DataFrame
def __post_init__(self) -> None:
self._validate_columns()
self._prepare_data()
# ------------------------------------------------------------------ #
# Helpers
# ------------------------------------------------------------------ #
def _validate_columns(self) -> None:
missing = [c for c in REQUIRED_COLUMNS_SAT if c not in self.df.columns]
if missing:
raise ValueError(
f"Faltan columnas obligatorias para SatisfactionExperienceMetrics: {missing}"
)
def _prepare_data(self) -> None:
df = self.df.copy()
df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
# Duraciones base siempre existen
for col in ["duration_talk", "hold_time", "wrap_up_time"]:
df[col] = pd.to_numeric(df[col], errors="coerce")
# Handle time
df["handle_time"] = (
df["duration_talk"].fillna(0)
+ df["hold_time"].fillna(0)
+ df["wrap_up_time"].fillna(0)
)
# csat_score opcional
df["csat_score"] = pd.to_numeric(df.get("csat_score", np.nan), errors="coerce")
# aht opcional: si existe columna explícita la usamos, si no usamos handle_time
if "aht" in df.columns:
df["aht"] = pd.to_numeric(df["aht"], errors="coerce")
else:
df["aht"] = df["handle_time"]
# NPS / CES opcionales
df["nps_score"] = pd.to_numeric(df.get("nps_score", np.nan), errors="coerce")
df["ces_score"] = pd.to_numeric(df.get("ces_score", np.nan), errors="coerce")
df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
df["channel"] = df["channel"].astype(str).str.strip()
self.df = df
@property
def is_empty(self) -> bool:
return self.df.empty
# ------------------------------------------------------------------ #
# KPIs
# ------------------------------------------------------------------ #
def csat_avg_by_skill_channel(self) -> pd.DataFrame:
"""
CSAT promedio por skill/canal.
Si no hay csat_score, devuelve DataFrame vacío.
"""
df = self.df
if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0:
return pd.DataFrame()
df = df.dropna(subset=["csat_score"])
if df.empty:
return pd.DataFrame()
pivot = (
df.pivot_table(
index="queue_skill",
columns="channel",
values="csat_score",
aggfunc="mean",
)
.sort_index()
.round(2)
)
return pivot
def nps_avg_by_skill_channel(self) -> pd.DataFrame:
"""
NPS medio por skill/canal, si existe nps_score.
"""
df = self.df
if "nps_score" not in df.columns or df["nps_score"].notna().sum() == 0:
return pd.DataFrame()
df = df.dropna(subset=["nps_score"])
if df.empty:
return pd.DataFrame()
pivot = (
df.pivot_table(
index="queue_skill",
columns="channel",
values="nps_score",
aggfunc="mean",
)
.sort_index()
.round(2)
)
return pivot
def ces_avg_by_skill_channel(self) -> pd.DataFrame:
"""
CES medio por skill/canal, si existe ces_score.
"""
df = self.df
if "ces_score" not in df.columns or df["ces_score"].notna().sum() == 0:
return pd.DataFrame()
df = df.dropna(subset=["ces_score"])
if df.empty:
return pd.DataFrame()
pivot = (
df.pivot_table(
index="queue_skill",
columns="channel",
values="ces_score",
aggfunc="mean",
)
.sort_index()
.round(2)
)
return pivot
def csat_global(self) -> float:
"""
CSAT medio global (todas las interacciones).
Usa la columna opcional `csat_score`:
- Si no existe, devuelve NaN.
- Si todos los valores son NaN / vacíos, devuelve NaN.
"""
df = self.df
if "csat_score" not in df.columns:
return float("nan")
series = pd.to_numeric(df["csat_score"], errors="coerce").dropna()
if series.empty:
return float("nan")
mean = series.mean()
return float(round(mean, 2))
def csat_aht_correlation(self) -> Dict[str, Any]:
"""
Correlación Pearson CSAT vs AHT.
Si falta csat o aht, o no hay varianza, devuelve NaN y código adecuado.
"""
df = self.df
if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0:
return {"r": float("nan"), "n": 0.0, "interpretation_code": "sin_datos"}
if "aht" not in df.columns or df["aht"].notna().sum() == 0:
return {"r": float("nan"), "n": 0.0, "interpretation_code": "sin_datos"}
df = df.dropna(subset=["csat_score", "aht"]).copy()
n = len(df)
if n < 2:
return {"r": float("nan"), "n": float(n), "interpretation_code": "insuficiente"}
x = df["aht"].astype(float)
y = df["csat_score"].astype(float)
if x.std(ddof=1) == 0 or y.std(ddof=1) == 0:
return {"r": float("nan"), "n": float(n), "interpretation_code": "sin_varianza"}
r = float(np.corrcoef(x, y)[0, 1])
if r < -0.3:
interpretation = "negativo"
elif r > 0.3:
interpretation = "positivo"
else:
interpretation = "neutral"
return {"r": round(r, 3), "n": float(n), "interpretation_code": interpretation}
def csat_aht_skill_summary(self) -> pd.DataFrame:
"""
Resumen por skill con clasificación del "sweet spot".
Si falta csat o aht, devuelve DataFrame vacío.
"""
df = self.df
if df["csat_score"].notna().sum() == 0 or df["aht"].notna().sum() == 0:
return pd.DataFrame(columns=["csat_avg", "aht_avg", "classification"])
df = df.dropna(subset=["csat_score", "aht"]).copy()
if df.empty:
return pd.DataFrame(columns=["csat_avg", "aht_avg", "classification"])
grouped = df.groupby("queue_skill").agg(
csat_avg=("csat_score", "mean"),
aht_avg=("aht", "mean"),
)
aht_all = df["aht"].astype(float)
csat_all = df["csat_score"].astype(float)
aht_p40 = float(np.percentile(aht_all, 40))
aht_p60 = float(np.percentile(aht_all, 60))
csat_p40 = float(np.percentile(csat_all, 40))
csat_p60 = float(np.percentile(csat_all, 60))
def classify(row) -> str:
csat = row["csat_avg"]
aht = row["aht_avg"]
if aht <= aht_p40 and csat >= csat_p60:
return "ideal_automatizar"
if aht >= aht_p60 and csat >= csat_p40:
return "requiere_humano"
return "neutral"
grouped["classification"] = grouped.apply(classify, axis=1)
return grouped.round({"csat_avg": 2, "aht_avg": 2})
# ------------------------------------------------------------------ #
# Plots
# ------------------------------------------------------------------ #
def plot_csat_vs_aht_scatter(self) -> Axes:
"""
Scatter CSAT vs AHT por skill.
Si no hay datos suficientes, devuelve un Axes con mensaje.
"""
df = self.df
if df["csat_score"].notna().sum() == 0 or df["aht"].notna().sum() == 0:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin datos de CSAT/AHT", ha="center", va="center")
ax.set_axis_off()
return ax
df = df.dropna(subset=["csat_score", "aht"]).copy()
if df.empty:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin datos de CSAT/AHT", ha="center", va="center")
ax.set_axis_off()
return ax
fig, ax = plt.subplots(figsize=(8, 5))
for skill, sub in df.groupby("queue_skill"):
ax.scatter(sub["aht"], sub["csat_score"], label=skill, alpha=0.7)
ax.set_xlabel("AHT (segundos)")
ax.set_ylabel("CSAT")
ax.set_title("CSAT vs AHT por skill")
ax.grid(alpha=0.3)
ax.legend(title="Skill", bbox_to_anchor=(1.05, 1), loc="upper left")
plt.tight_layout()
return ax
def plot_csat_distribution(self) -> Axes:
"""
Histograma de CSAT.
Si no hay csat_score, devuelve un Axes con mensaje.
"""
df = self.df
if "csat_score" not in df.columns or df["csat_score"].notna().sum() == 0:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin datos de CSAT", ha="center", va="center")
ax.set_axis_off()
return ax
df = df.dropna(subset=["csat_score"]).copy()
if df.empty:
fig, ax = plt.subplots()
ax.text(0.5, 0.5, "Sin datos de CSAT", ha="center", va="center")
ax.set_axis_off()
return ax
fig, ax = plt.subplots(figsize=(6, 4))
ax.hist(df["csat_score"], bins=10, alpha=0.7)
ax.set_xlabel("CSAT")
ax.set_ylabel("Frecuencia")
ax.set_title("Distribución de CSAT")
ax.grid(axis="y", alpha=0.3)
return ax

View File

@@ -0,0 +1,268 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import List
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.axes import Axes
REQUIRED_COLUMNS_VOLUMETRIA: List[str] = [
"interaction_id",
"datetime_start",
"queue_skill",
"channel",
]
@dataclass
class VolumetriaMetrics:
"""
Métricas de volumetría basadas en el nuevo esquema de datos.
Columnas mínimas requeridas:
- interaction_id
- datetime_start
- queue_skill
- channel
Otras columnas pueden existir pero no son necesarias para estas métricas.
"""
df: pd.DataFrame
def __post_init__(self) -> None:
self._validate_columns()
self._prepare_data()
# ------------------------------------------------------------------ #
# Helpers internos
# ------------------------------------------------------------------ #
def _validate_columns(self) -> None:
missing = [c for c in REQUIRED_COLUMNS_VOLUMETRIA if c not in self.df.columns]
if missing:
raise ValueError(
f"Faltan columnas obligatorias para VolumetriaMetrics: {missing}"
)
def _prepare_data(self) -> None:
df = self.df.copy()
# Asegurar tipo datetime
df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
# Normalizar strings
df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
df["channel"] = df["channel"].astype(str).str.strip()
# Guardamos el df preparado
self.df = df
# ------------------------------------------------------------------ #
# Propiedades útiles
# ------------------------------------------------------------------ #
@property
def is_empty(self) -> bool:
return self.df.empty
# ------------------------------------------------------------------ #
# Métricas numéricas / tabulares
# ------------------------------------------------------------------ #
def volume_by_channel(self) -> pd.Series:
"""
Nº de interacciones por canal.
"""
return self.df.groupby("channel")["interaction_id"].nunique().sort_values(
ascending=False
)
def volume_by_skill(self) -> pd.Series:
"""
Nº de interacciones por skill / cola.
"""
return self.df.groupby("queue_skill")["interaction_id"].nunique().sort_values(
ascending=False
)
def channel_distribution_pct(self) -> pd.Series:
"""
Distribución porcentual del volumen por canal.
"""
counts = self.volume_by_channel()
total = counts.sum()
if total == 0:
return counts * 0.0
return (counts / total * 100).round(2)
def skill_distribution_pct(self) -> pd.Series:
"""
Distribución porcentual del volumen por skill.
"""
counts = self.volume_by_skill()
total = counts.sum()
if total == 0:
return counts * 0.0
return (counts / total * 100).round(2)
def heatmap_24x7(self) -> pd.DataFrame:
"""
Matriz [día_semana x hora] con nº de interacciones.
dayofweek: 0=Lunes ... 6=Domingo
"""
df = self.df.dropna(subset=["datetime_start"]).copy()
if df.empty:
# Devolvemos un df vacío pero con índice/columnas esperadas
idx = range(7)
cols = range(24)
return pd.DataFrame(0, index=idx, columns=cols)
df["dow"] = df["datetime_start"].dt.dayofweek
df["hour"] = df["datetime_start"].dt.hour
pivot = (
df.pivot_table(
index="dow",
columns="hour",
values="interaction_id",
aggfunc="nunique",
fill_value=0,
)
.reindex(index=range(7), fill_value=0)
.reindex(columns=range(24), fill_value=0)
)
return pivot
def monthly_seasonality_cv(self) -> float:
"""
Coeficiente de variación del volumen mensual.
CV = std / mean (en %).
"""
df = self.df.dropna(subset=["datetime_start"]).copy()
if df.empty:
return float("nan")
df["year_month"] = df["datetime_start"].dt.to_period("M")
monthly_counts = (
df.groupby("year_month")["interaction_id"].nunique().astype(float)
)
if len(monthly_counts) < 2:
return float("nan")
mean = monthly_counts.mean()
std = monthly_counts.std(ddof=1)
if mean == 0:
return float("nan")
return float(round(std / mean * 100, 2))
def peak_offpeak_ratio(self) -> float:
"""
Ratio de volumen entre horas pico y valle.
Definimos pico como horas 10:0019:59, resto valle.
"""
df = self.df.dropna(subset=["datetime_start"]).copy()
if df.empty:
return float("nan")
df["hour"] = df["datetime_start"].dt.hour
peak_hours = list(range(10, 20))
is_peak = df["hour"].isin(peak_hours)
peak_vol = df.loc[is_peak, "interaction_id"].nunique()
off_vol = df.loc[~is_peak, "interaction_id"].nunique()
if off_vol == 0:
return float("inf") if peak_vol > 0 else float("nan")
return float(round(peak_vol / off_vol, 3))
def concentration_top20_skills_pct(self) -> float:
"""
% del volumen concentrado en el top 20% de skills (por nº de interacciones).
"""
counts = (
self.df.groupby("queue_skill")["interaction_id"].nunique().sort_values(
ascending=False
)
)
n_skills = len(counts)
if n_skills == 0:
return float("nan")
top_n = max(1, int(np.ceil(0.2 * n_skills)))
top_vol = counts.head(top_n).sum()
total = counts.sum()
if total == 0:
return float("nan")
return float(round(top_vol / total * 100, 2))
# ------------------------------------------------------------------ #
# Plots
# ------------------------------------------------------------------ #
def plot_heatmap_24x7(self) -> Axes:
"""
Heatmap de volumen por día de la semana (0-6) y hora (0-23).
Devuelve Axes para que el pipeline pueda guardar la figura.
"""
data = self.heatmap_24x7()
fig, ax = plt.subplots(figsize=(10, 4))
im = ax.imshow(data.values, aspect="auto", origin="lower")
ax.set_xticks(range(24))
ax.set_xticklabels([str(h) for h in range(24)])
ax.set_yticks(range(7))
ax.set_yticklabels(["L", "M", "X", "J", "V", "S", "D"])
ax.set_xlabel("Hora del día")
ax.set_ylabel("Día de la semana")
ax.set_title("Volumen por día de la semana y hora")
plt.colorbar(im, ax=ax, label="Nº interacciones")
return ax
def plot_channel_distribution(self) -> Axes:
"""
Distribución de volumen por canal.
"""
series = self.volume_by_channel()
fig, ax = plt.subplots(figsize=(6, 4))
series.plot(kind="bar", ax=ax)
ax.set_xlabel("Canal")
ax.set_ylabel("Nº interacciones")
ax.set_title("Volumen por canal")
ax.grid(axis="y", alpha=0.3)
return ax
def plot_skill_pareto(self) -> Axes:
"""
Pareto simple de volumen por skill (solo barras de volumen).
"""
series = self.volume_by_skill()
fig, ax = plt.subplots(figsize=(10, 4))
series.plot(kind="bar", ax=ax)
ax.set_xlabel("Skill / Cola")
ax.set_ylabel("Nº interacciones")
ax.set_title("Pareto de volumen por skill")
ax.grid(axis="y", alpha=0.3)
plt.xticks(rotation=45, ha="right")
return ax

View File

@@ -0,0 +1,13 @@
from .Volumetria import VolumetriaMetrics
from .OperationalPerformance import OperationalPerformanceMetrics
from .SatisfactionExperience import SatisfactionExperienceMetrics
from .EconomyCost import EconomyCostMetrics, EconomyConfig
__all__ = [
# Dimensiones
"VolumetriaMetrics",
"OperationalPerformanceMetrics",
"SatisfactionExperienceMetrics",
"EconomyCostMetrics",
"EconomyConfig",
]

View File

@@ -0,0 +1,22 @@
from .base import DataSource, ResultsSink
from .local import LocalDataSource, LocalResultsSink
from .s3 import S3DataSource, S3ResultsSink
from .google_drive import (
GoogleDriveDataSource,
GoogleDriveConfig,
GoogleDriveResultsSink,
GoogleDriveSinkConfig,
)
__all__ = [
"DataSource",
"ResultsSink",
"LocalDataSource",
"LocalResultsSink",
"S3DataSource",
"S3ResultsSink",
"GoogleDriveDataSource",
"GoogleDriveConfig",
"GoogleDriveResultsSink",
"GoogleDriveSinkConfig",
]

View File

@@ -0,0 +1,36 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Any, Dict
import pandas as pd
from matplotlib.figure import Figure
class DataSource(ABC):
"""Interfaz de lectura de datos (CSV)."""
@abstractmethod
def read_csv(self, path: str) -> pd.DataFrame:
"""
Lee un CSV y devuelve un DataFrame.
El significado de 'path' depende de la implementación:
- LocalDataSource: ruta en el sistema de ficheros
- S3DataSource: 's3://bucket/key'
"""
raise NotImplementedError
class ResultsSink(ABC):
"""Interfaz de escritura de resultados (JSON e imágenes)."""
@abstractmethod
def write_json(self, path: str, data: Dict[str, Any]) -> None:
"""Escribe un dict como JSON en 'path'."""
raise NotImplementedError
@abstractmethod
def write_figure(self, path: str, fig: Figure) -> None:
"""Guarda una figura matplotlib en 'path'."""
raise NotImplementedError

View File

@@ -0,0 +1,160 @@
# beyond_metrics/io/google_drive.py
from __future__ import annotations
import io
import json
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Dict, Any
import pandas as pd
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload, MediaIoBaseUpload
from .base import DataSource, ResultsSink
GDRIVE_SCOPES = ["https://www.googleapis.com/auth/drive.readonly",
"https://www.googleapis.com/auth/drive.file"]
def _extract_file_id(file_id_or_url: str) -> str:
"""
Acepta:
- un ID directo de Google Drive (ej: '1AbC...')
- una URL de Google Drive compartida
y devuelve siempre el file_id.
"""
if "http://" not in file_id_or_url and "https://" not in file_id_or_url:
return file_id_or_url.strip()
patterns = [
r"/d/([a-zA-Z0-9_-]{10,})", # https://drive.google.com/file/d/<ID>/view
r"id=([a-zA-Z0-9_-]{10,})", # https://drive.google.com/open?id=<ID>
]
for pattern in patterns:
m = re.search(pattern, file_id_or_url)
if m:
return m.group(1)
raise ValueError(f"No se pudo extraer un file_id de la URL de Google Drive: {file_id_or_url}")
# -------- DataSource --------
@dataclass
class GoogleDriveConfig:
credentials_path: str # ruta al JSON de service account
impersonate_user: Optional[str] = None
class GoogleDriveDataSource(DataSource):
"""
DataSource que lee CSVs desde Google Drive.
"""
def __init__(self, config: GoogleDriveConfig) -> None:
self._config = config
self._service = self._build_service(readonly=True)
def _build_service(self, readonly: bool = True):
scopes = ["https://www.googleapis.com/auth/drive.readonly"] if readonly else GDRIVE_SCOPES
creds = service_account.Credentials.from_service_account_file(
self._config.credentials_path,
scopes=scopes,
)
if self._config.impersonate_user:
creds = creds.with_subject(self._config.impersonate_user)
service = build("drive", "v3", credentials=creds)
return service
def read_csv(self, path: str) -> pd.DataFrame:
file_id = _extract_file_id(path)
request = self._service.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while not done:
_, done = downloader.next_chunk()
fh.seek(0)
df = pd.read_csv(fh)
return df
# -------- ResultsSink --------
@dataclass
class GoogleDriveSinkConfig:
credentials_path: str # ruta al JSON de service account
base_folder_id: str # ID de la carpeta de Drive donde escribir
impersonate_user: Optional[str] = None
class GoogleDriveResultsSink(ResultsSink):
"""
ResultsSink que sube JSONs e imágenes a una carpeta de Google Drive.
Nota: por simplicidad, usamos solo el nombre del fichero (basename de `path`).
Es decir, si le pasas 'data/output/123/results.json', en Drive se guardará
como 'results.json' dentro de base_folder_id.
"""
def __init__(self, config: GoogleDriveSinkConfig) -> None:
self._config = config
self._service = self._build_service()
def _build_service(self):
creds = service_account.Credentials.from_service_account_file(
self._config.credentials_path,
scopes=GDRIVE_SCOPES,
)
if self._config.impersonate_user:
creds = creds.with_subject(self._config.impersonate_user)
service = build("drive", "v3", credentials=creds)
return service
def _upload_bytes(self, data: bytes, mime_type: str, target_path: str) -> str:
"""
Sube un fichero en memoria a Drive y devuelve el file_id.
"""
filename = Path(target_path).name
media = MediaIoBaseUpload(io.BytesIO(data), mimetype=mime_type, resumable=False)
file_metadata = {
"name": filename,
"parents": [self._config.base_folder_id],
}
created = self._service.files().create(
body=file_metadata,
media_body=media,
fields="id",
).execute()
return created["id"]
def write_json(self, path: str, data: Dict[str, Any]) -> None:
payload = json.dumps(data, ensure_ascii=False, indent=2).encode("utf-8")
self._upload_bytes(payload, "application/json", path)
def write_figure(self, path: str, fig) -> None:
from matplotlib.figure import Figure
if not isinstance(fig, Figure):
raise TypeError("write_figure espera un matplotlib.figure.Figure")
buf = io.BytesIO()
fig.savefig(buf, format="png", bbox_inches="tight")
buf.seek(0)
self._upload_bytes(buf.read(), "image/png", path)

View File

@@ -0,0 +1,57 @@
from __future__ import annotations
import json
import os
from typing import Any, Dict
import pandas as pd
from matplotlib.figure import Figure
from .base import DataSource, ResultsSink
class LocalDataSource(DataSource):
"""
DataSource que lee CSV desde el sistema de ficheros local.
- base_dir: se prefiere que todos los paths sean relativos a esta carpeta.
"""
def __init__(self, base_dir: str = ".") -> None:
self.base_dir = base_dir
def _full_path(self, path: str) -> str:
if os.path.isabs(path):
return path
return os.path.join(self.base_dir, path)
def read_csv(self, path: str) -> pd.DataFrame:
full = self._full_path(path)
return pd.read_csv(full)
class LocalResultsSink(ResultsSink):
"""
ResultsSink que escribe JSON e imágenes en el sistema de ficheros local.
"""
def __init__(self, base_dir: str = ".") -> None:
self.base_dir = base_dir
def _full_path(self, path: str) -> str:
if os.path.isabs(path):
full = path
else:
full = os.path.join(self.base_dir, path)
# Crear carpetas si no existen
os.makedirs(os.path.dirname(full), exist_ok=True)
return full
def write_json(self, path: str, data: Dict[str, Any]) -> None:
full = self._full_path(path)
with open(full, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
def write_figure(self, path: str, fig: Figure) -> None:
full = self._full_path(path)
fig.savefig(full, bbox_inches="tight")

View File

@@ -0,0 +1,62 @@
from __future__ import annotations
import io
import json
from typing import Any, Dict, Tuple
import boto3
import pandas as pd
from matplotlib.figure import Figure
from .base import DataSource, ResultsSink
def _split_s3_path(path: str) -> Tuple[str, str]:
"""
Convierte 's3://bucket/key' en (bucket, key).
"""
if not path.startswith("s3://"):
raise ValueError(f"Ruta S3 inválida: {path}")
without_scheme = path[len("s3://") :]
parts = without_scheme.split("/", 1)
if len(parts) != 2:
raise ValueError(f"Ruta S3 inválida: {path}")
return parts[0], parts[1]
class S3DataSource(DataSource):
"""
DataSource que lee CSV desde S3 usando boto3.
"""
def __init__(self, boto3_client: Any | None = None) -> None:
self.s3 = boto3_client or boto3.client("s3")
def read_csv(self, path: str) -> pd.DataFrame:
bucket, key = _split_s3_path(path)
obj = self.s3.get_object(Bucket=bucket, Key=key)
body = obj["Body"].read()
buffer = io.BytesIO(body)
return pd.read_csv(buffer)
class S3ResultsSink(ResultsSink):
"""
ResultsSink que escribe JSON e imágenes en S3.
"""
def __init__(self, boto3_client: Any | None = None) -> None:
self.s3 = boto3_client or boto3.client("s3")
def write_json(self, path: str, data: Dict[str, Any]) -> None:
bucket, key = _split_s3_path(path)
body = json.dumps(data, ensure_ascii=False, indent=2).encode("utf-8")
self.s3.put_object(Bucket=bucket, Key=key, Body=body)
def write_figure(self, path: str, fig: Figure) -> None:
bucket, key = _split_s3_path(path)
buf = io.BytesIO()
fig.savefig(buf, format="png", bbox_inches="tight")
buf.seek(0)
self.s3.put_object(Bucket=bucket, Key=key, Body=buf.getvalue(), ContentType="image/png")

View File

@@ -0,0 +1,291 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime
from importlib import import_module
from typing import Any, Dict, List, Mapping, Optional, cast, Callable
import logging
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.axes import Axes
from matplotlib.figure import Figure
from .io import (
DataSource,
ResultsSink,
)
LOGGER = logging.getLogger(__name__)
def setup_basic_logging(level: str = "INFO") -> None:
"""
Configuración básica de logging, por si se necesita desde scripts.
"""
logging.basicConfig(
level=getattr(logging, level.upper(), logging.INFO),
format="%(asctime)s %(levelname)s [%(name)s] %(message)s",
)
def _import_class(path: str) -> type:
"""
Import dinámico de una clase a partir de un string tipo:
"beyond_metrics.dimensions.VolumetriaMetrics"
"""
LOGGER.debug("Importando clase %s", path)
module_name, class_name = path.rsplit(".", 1)
module = import_module(module_name)
cls = getattr(module, class_name)
return cls
def _serialize_for_json(obj: Any) -> Any:
"""
Convierte objetos típicos de numpy/pandas en tipos JSON-friendly.
"""
if obj is None or isinstance(obj, (str, int, float, bool)):
return obj
if isinstance(obj, (np.integer, np.floating)):
return float(obj)
if isinstance(obj, pd.DataFrame):
return obj.to_dict(orient="records")
if isinstance(obj, pd.Series):
return obj.to_list()
if isinstance(obj, (list, tuple)):
return [_serialize_for_json(x) for x in obj]
if isinstance(obj, dict):
return {str(k): _serialize_for_json(v) for k, v in obj.items()}
return str(obj)
PostRunCallback = Callable[[Dict[str, Any], str, ResultsSink], None]
@dataclass
class BeyondMetricsPipeline:
"""
Pipeline principal de BeyondMetrics.
- Lee un CSV desde un DataSource (local, S3, Google Drive, etc.).
- Ejecuta dimensiones configuradas en un dict de configuración.
- Serializa resultados numéricos/tabulares a JSON.
- Guarda las imágenes de los métodos que comienzan por 'plot_'.
"""
datasource: DataSource
sink: ResultsSink
dimensions_config: Mapping[str, Any]
dimension_params: Optional[Mapping[str, Mapping[str, Any]]] = None
post_run: Optional[List[PostRunCallback]] = None
def run(
self,
input_path: str,
run_dir: str,
*,
write_results_json: bool = True,
) -> Dict[str, Any]:
LOGGER.info("Inicio de ejecución de BeyondMetricsPipeline")
LOGGER.info("Leyendo CSV de entrada: %s", input_path)
# 1) Leer datos
df = self.datasource.read_csv(input_path)
LOGGER.info("CSV leído con %d filas y %d columnas", df.shape[0], df.shape[1])
# 2) Determinar carpeta/base de salida para esta ejecución
run_base = run_dir.rstrip("/")
LOGGER.info("Ruta base de esta ejecución: %s", run_base)
# 3) Ejecutar dimensiones
dimensions_cfg = self.dimensions_config
if not isinstance(dimensions_cfg, dict):
raise ValueError("El bloque 'dimensions' debe ser un dict.")
all_results: Dict[str, Any] = {}
for dim_name, dim_cfg in dimensions_cfg.items():
if not isinstance(dim_cfg, dict):
raise ValueError(f"Config inválida para dimensión '{dim_name}' (debe ser dict).")
if not dim_cfg.get("enabled", True):
LOGGER.info("Dimensión '%s' desactivada; se omite.", dim_name)
continue
class_path = dim_cfg.get("class")
if not class_path:
raise ValueError(f"Falta 'class' en la dimensión '{dim_name}'.")
metrics: List[str] = dim_cfg.get("metrics", [])
if not metrics:
LOGGER.info("Dimensión '%s' sin métricas configuradas; se omite.", dim_name)
continue
cls = _import_class(class_path)
extra_kwargs = {}
if self.dimension_params is not None:
extra_kwargs = self.dimension_params.get(dim_name, {}) or {}
# Las dimensiones reciben df en el constructor
instance = cls(df, **extra_kwargs)
dim_results: Dict[str, Any] = {}
for metric_name in metrics:
LOGGER.info(" - Ejecutando métrica '%s.%s'", dim_name, metric_name)
result = self._execute_metric(instance, metric_name, run_base, dim_name)
dim_results[metric_name] = result
all_results[dim_name] = dim_results
# 4) Guardar JSON de resultados (opcional)
if write_results_json:
results_json_path = f"{run_base}/results.json"
LOGGER.info("Guardando resultados en JSON: %s", results_json_path)
self.sink.write_json(results_json_path, all_results)
# 5) Ejecutar callbacks post-run (scorers, agentes, etc.)
if self.post_run:
LOGGER.info("Ejecutando %d callbacks post-run...", len(self.post_run))
for cb in self.post_run:
try:
LOGGER.info("Ejecutando post-run callback: %s", cb)
cb(all_results, run_base, self.sink)
except Exception:
LOGGER.exception("Error ejecutando post-run callback %s", cb)
LOGGER.info("Ejecución completada correctamente.")
return all_results
def _execute_metric(
self,
instance: Any,
metric_name: str,
run_base: str,
dim_name: str,
) -> Any:
"""
Ejecuta una métrica:
- Si empieza por 'plot_' -> se asume que devuelve Axes:
- se guarda la figura como PNG
- se devuelve {"type": "image", "path": "..."}
- Si no, se serializa el valor a JSON.
Además, para métricas categóricas (por skill/canal) de la dimensión
'volumetry', devolvemos explícitamente etiquetas y valores para que
el frontend pueda saber a qué pertenece cada número.
"""
method = getattr(instance, metric_name, None)
if method is None or not callable(method):
raise ValueError(
f"La métrica '{metric_name}' no existe en {type(instance).__name__}"
)
# Caso plots
if metric_name.startswith("plot_"):
ax = method()
if not isinstance(ax, Axes):
raise TypeError(
f"La métrica '{metric_name}' de '{type(instance).__name__}' "
f"debería devolver un matplotlib.axes.Axes"
)
fig = ax.get_figure()
if fig is None:
raise RuntimeError(
"Axes.get_figure() devolvió None, lo cual no debería pasar."
)
fig = cast(Figure, fig)
filename = f"{dim_name}_{metric_name}.png"
img_path = f"{run_base}/{filename}"
LOGGER.debug("Guardando figura en %s", img_path)
self.sink.write_figure(img_path, fig)
plt.close(fig)
return {
"type": "image",
"path": img_path,
}
# Caso numérico/tabular
value = method()
# Caso especial: series categóricas de volumetría (por skill / canal)
# Devolvemos {"labels": [...], "values": [...]} para mantener la
# información de etiquetas en el JSON.
if (
dim_name == "volumetry"
and isinstance(value, pd.Series)
and metric_name
in {
"volume_by_channel",
"volume_by_skill",
"channel_distribution_pct",
"skill_distribution_pct",
}
):
labels = [str(idx) for idx in value.index.tolist()]
# Aseguramos que todos los valores sean numéricos JSON-friendly
values = [float(v) for v in value.astype(float).tolist()]
return {
"labels": labels,
"values": values,
}
return _serialize_for_json(value)
def load_dimensions_config(path: str) -> Dict[str, Any]:
"""
Carga un JSON de configuración que contiene solo el bloque 'dimensions'.
"""
import json
from pathlib import Path
with Path(path).open("r", encoding="utf-8") as f:
cfg = json.load(f)
dimensions = cfg.get("dimensions")
if dimensions is None:
raise ValueError("El fichero de configuración debe contener un bloque 'dimensions'.")
return dimensions
def build_pipeline(
dimensions_config_path: str,
datasource: DataSource,
sink: ResultsSink,
dimension_params: Optional[Mapping[str, Mapping[str, Any]]] = None,
post_run: Optional[List[PostRunCallback]] = None,
) -> BeyondMetricsPipeline:
"""
Crea un BeyondMetricsPipeline a partir de:
- ruta al JSON con dimensiones/métricas
- un DataSource ya construido (local/S3/Drive)
- un ResultsSink ya construido (local/S3/Drive)
- una lista opcional de callbacks post_run que se ejecutan al final
(útil para scorers, agentes de IA, etc.)
"""
dims_cfg = load_dimensions_config(dimensions_config_path)
return BeyondMetricsPipeline(
datasource=datasource,
sink=sink,
dimensions_config=dims_cfg,
dimension_params=dimension_params,
post_run=post_run,
)

View File

@@ -0,0 +1,46 @@
version: "3.9"
services:
api:
build:
context: .
dockerfile: Dockerfile
# Si algún día subes la imagen a un registry, podrías usar:
# image: ghcr.io/TU_USUARIO/beyondcx-heatmap-api:latest
container_name: beyondcx-api
restart: unless-stopped
ports:
- "${API_PORT:-8000}:8000"
environment:
BASIC_AUTH_USERNAME: "${BASIC_AUTH_USERNAME:-admin}"
BASIC_AUTH_PASSWORD: "${BASIC_AUTH_PASSWORD:-admin}"
volumes:
- "${DATA_DIR:-./data}:/app/data"
networks:
- beyondcx-net
nginx:
image: nginx:stable
container_name: beyondcx-nginx
restart: unless-stopped
depends_on:
- api
ports:
- "80:80"
volumes:
- ./nginx/conf.d:/etc/nginx/conf.d:ro
networks:
- beyondcx-net
networks:
beyondcx-net:
driver: bridge

25
backend/docs/notas git.md Normal file
View File

@@ -0,0 +1,25 @@
git status # ver qué ha cambiado
git add . # añadir cambios
git commit -m "Describe lo que has hecho"
git push # subir al remoto
# Ejecutar tests
source .venv/bin/activate
python -m pytest -v
# Instalar el paquete
python pip install -e .
# Ejecutar el API
uvicorn beyond_api.main:app --reload
# Ejemplo Curl API
curl -X POST "http://127.0.0.1:8000/analysis" \
-u admin:admin \
-F "analysis=basic" \
-F "csv_file=@data/example/synthetic_interactions.csv" \
-F "economy_json={\"labor_cost_per_hour\":30,\"automation_volume_share\":0.7,\"customer_segments\":{\"VIP\":\"high\",\"Basico\":\"medium\"}}"
# Lo siguiente:
# Disponer de varios json y pasarlos en la peticiòn
# Meter etiquetas en la respuesta por skill

21
backend/docs/notas.md Normal file
View File

@@ -0,0 +1,21 @@
# Arrancar el proyecto en dev
# Backend
source .venv/bin/activate
export BASIC_AUTH_USERNAME=admin
export BASIC_AUTH_PASSWORD=admin
python -m uvicorn beyond_api.main:app --reload --port 8000
# Frontend
npm run dev
# Siguientes pasos: que revise todo el código y quitar todo lo random para que utilice datos reales
# Comparar los sintéticos con la demo y ver que ofrecen los mismos datos. Faltan cosas
# Hacer que funcione de alguna manera el selector de JSON
# Dockerizar
# Limpieza de código
# Todo es real, menos el benchmark y sus potential savings
# Falta hacer funcionar los selectores de paquetes

1
backend/output.json Normal file

File diff suppressed because one or more lines are too long

31
backend/pyproject.toml Normal file
View File

@@ -0,0 +1,31 @@
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "beyond-metrics"
version = "0.1.0"
description = "Librería de métricas de volumetría para contact centers"
authors = [{ name = "Nacho" }]
requires-python = ">=3.9"
dependencies = [
"pandas",
"numpy",
"matplotlib",
"openai",
"reportlab",
"google-api-python-client>=2.153.0",
"google-auth>=2.35.0",
"google-auth-oauthlib>=1.2.1",
# --- API REST ---
"fastapi",
"uvicorn[standard]",
"python-multipart", # necesario para subir ficheros
"boto3",
]
[tool.setuptools.packages.find]
where = ["."]
include = ["beyond_metrics", "beyond_flows", "beyond_api"]

168
backend/tests/test_api.sh Normal file
View File

@@ -0,0 +1,168 @@
#!/usr/bin/env bash
set -euo pipefail
# ===========================
# Configuración
# ===========================
HOST="${HOST:-localhost}"
PORT="${PORT:-8000}"
API_URL="http://$HOST:$PORT/analysis"
# Credenciales Basic Auth (ajusta si usas otras)
API_USER="${API_USER:-beyond}"
API_PASS="${API_PASS:-beyond2026}"
# Ruta del CSV en tu máquina para subirlo
LOCAL_CSV_FILE="${LOCAL_CSV_FILE:-data/example/synthetic_interactions.csv}"
# Carpetas de salida
OUT_DIR="${OUT_DIR:-./test_results}"
mkdir -p "$OUT_DIR"
print_header() {
echo
echo "============================================================"
echo "$1"
echo "============================================================"
}
# ===========================
# 1. Health-check simple (sin auth)
# ===========================
print_header "1) Comprobando que el servidor responde (sin auth) - debería devolver 401"
set +e
curl -s -o /dev/null -w "HTTP status: %{http_code}\n" \
-X POST "$API_URL"
set -e
# ===========================
# 2. Test: subir CSV (analysis=premium por defecto)
# ===========================
print_header "2) Subiendo CSV local con análisis 'premium' (default) y guardando JSON"
if [ ! -f "$LOCAL_CSV_FILE" ]; then
echo "⚠️ Aviso: el fichero LOCAL_CSV_FILE='$LOCAL_CSV_FILE' no existe."
echo " Cambia la variable LOCAL_CSV_FILE o copia el CSV a esa ruta."
else
curl -v \
-u "$API_USER:$API_PASS" \
-X POST "$API_URL" \
-F "csv_file=@${LOCAL_CSV_FILE}" \
-o "${OUT_DIR}/resultados_premium.json"
echo "✅ JSON guardado en: ${OUT_DIR}/resultados_premium.json"
echo " Primeras líneas:"
head -n 20 "${OUT_DIR}/resultados_premium.json" || true
fi
# ===========================
# 3. Test: subir CSV con analysis=basic
# ===========================
print_header "3) Subiendo CSV local con análisis 'basic' y guardando JSON"
if [ ! -f "$LOCAL_CSV_FILE" ]; then
echo "⚠️ Saltando este test porque LOCAL_CSV_FILE='$LOCAL_CSV_FILE' no existe."
else
curl -v \
-u "$API_USER:$API_PASS" \
-X POST "$API_URL" \
-F "csv_file=@${LOCAL_CSV_FILE}" \
-F "analysis=basic" \
-o "${OUT_DIR}/resultados_basic.json"
echo "✅ JSON guardado en: ${OUT_DIR}/resultados_basic.json"
echo " Primeras líneas:"
head -n 20 "${OUT_DIR}/resultados_basic.json" || true
fi
# ===========================
# 4. Test: con economy_json personalizado (premium)
# ===========================
print_header "4) Subiendo CSV con configuración económica personalizada (analysis=premium)"
if [ ! -f "$LOCAL_CSV_FILE" ]; then
echo "⚠️ Saltando este test porque LOCAL_CSV_FILE='$LOCAL_CSV_FILE' no existe."
else
curl -v \
-u "$API_USER:$API_PASS" \
-X POST "$API_URL" \
-F "csv_file=@${LOCAL_CSV_FILE}" \
-F 'economy_json={"labor_cost_per_hour":30,"automation_volume_share":0.7,"customer_segments":{"VIP":"high","Basico":"medium"}}' \
-F "analysis=premium" \
-o "${OUT_DIR}/resultados_economy_premium.json"
echo "✅ JSON con economía personalizada guardado en: ${OUT_DIR}/resultados_economy_premium.json"
echo " Primeras líneas:"
head -n 20 "${OUT_DIR}/resultados_economy_premium.json" || true
fi
# ===========================
# 5. Test de error: economy_json inválido
# ===========================
print_header "5) Petición con economy_json inválido - debe devolver 400"
set +e
curl -v \
-u "$API_USER:$API_PASS" \
-X POST "$API_URL" \
-F "csv_file=@${LOCAL_CSV_FILE}" \
-F "economy_json={invalid json" \
-o "${OUT_DIR}/error_economy_invalid.json"
STATUS=$?
set -e
echo "✅ Respuesta guardada en: ${OUT_DIR}/error_economy_invalid.json"
cat "${OUT_DIR}/error_economy_invalid.json" || true
# ===========================
# 6. Test de error: analysis inválido
# ===========================
print_header "6) Petición con analysis inválido - debe devolver 400"
set +e
curl -v \
-u "$API_USER:$API_PASS" \
-X POST "$API_URL" \
-F "csv_file=@${LOCAL_CSV_FILE}" \
-F "analysis=ultra" \
-o "${OUT_DIR}/error_analysis_invalid.json"
set -e
echo "✅ Respuesta guardada en: ${OUT_DIR}/error_analysis_invalid.json"
cat "${OUT_DIR}/error_analysis_invalid.json" || true
# ===========================
# 7. Test de error: sin csv_file (debe devolver 422)
# ===========================
print_header "7) Petición inválida (sin csv_file) - debe devolver 422 (FastAPI validation)"
set +e
curl -v \
-u "$API_USER:$API_PASS" \
-X POST "$API_URL" \
-o "${OUT_DIR}/error_missing_csv.json"
set -e
echo "✅ Respuesta guardada en: ${OUT_DIR}/error_missing_csv.json"
cat "${OUT_DIR}/error_missing_csv.json" || true
# ===========================
# 8. Test de error: credenciales incorrectas
# ===========================
print_header "8) Petición con credenciales incorrectas - debe devolver 401"
set +e
curl -v \
-u "wrong:wrong" \
-X POST "$API_URL" \
-F "csv_file=@${LOCAL_CSV_FILE}" \
-o "${OUT_DIR}/error_auth.json"
set -e
echo "✅ Respuesta de error de auth guardada en: ${OUT_DIR}/error_auth.json"
cat "${OUT_DIR}/error_auth.json" || true
echo
echo "✨ Tests terminados. Revisa la carpeta: ${OUT_DIR}"

View File

@@ -0,0 +1,128 @@
import math
from datetime import datetime
import matplotlib
import pandas as pd
from beyond_metrics.dimensions.EconomyCost import EconomyCostMetrics, EconomyConfig
matplotlib.use("Agg")
def _sample_df() -> pd.DataFrame:
data = [
{
"interaction_id": "id1",
"datetime_start": datetime(2024, 1, 1, 10, 0),
"queue_skill": "ventas",
"channel": "voz",
"duration_talk": 600,
"hold_time": 60,
"wrap_up_time": 30,
},
{
"interaction_id": "id2",
"datetime_start": datetime(2024, 1, 1, 10, 5),
"queue_skill": "ventas",
"channel": "voz",
"duration_talk": 300,
"hold_time": 30,
"wrap_up_time": 20,
},
{
"interaction_id": "id3",
"datetime_start": datetime(2024, 1, 1, 11, 0),
"queue_skill": "soporte",
"channel": "chat",
"duration_talk": 400,
"hold_time": 20,
"wrap_up_time": 30,
},
]
return pd.DataFrame(data)
def test_init_and_required_columns():
df = _sample_df()
cfg = EconomyConfig(labor_cost_per_hour=20.0, overhead_rate=0.1, tech_costs_annual=10000.0)
em = EconomyCostMetrics(df, cfg)
assert not em.is_empty
# Falta de columna obligatoria -> ValueError
df_missing = df.drop(columns=["duration_talk"])
import pytest
with pytest.raises(ValueError):
EconomyCostMetrics(df_missing, cfg)
def test_metrics_without_config_do_not_crash():
df = _sample_df()
em = EconomyCostMetrics(df, None)
assert em.cpi_by_skill_channel().empty
assert em.annual_cost_by_skill_channel().empty
assert em.cost_breakdown() == {}
assert em.inefficiency_cost_by_skill_channel().empty
assert em.potential_savings() == {}
def test_basic_cpi_and_annual_cost():
df = _sample_df()
cfg = EconomyConfig(labor_cost_per_hour=20.0, overhead_rate=0.1)
em = EconomyCostMetrics(df, cfg)
cpi = em.cpi_by_skill_channel()
assert not cpi.empty
# Debe haber filas para ventas/voz y soporte/chat
assert ("ventas", "voz") in cpi.index
assert ("soporte", "chat") in cpi.index
annual = em.annual_cost_by_skill_channel()
assert "annual_cost" in annual.columns
# costes positivos
assert (annual["annual_cost"] > 0).any()
def test_cost_breakdown_and_potential_savings():
df = _sample_df()
cfg = EconomyConfig(
labor_cost_per_hour=20.0,
overhead_rate=0.1,
tech_costs_annual=5000.0,
automation_cpi=0.2,
automation_volume_share=0.5,
automation_success_rate=0.8,
)
em = EconomyCostMetrics(df, cfg)
breakdown = em.cost_breakdown()
assert "labor_pct" in breakdown
assert "overhead_pct" in breakdown
assert "tech_pct" in breakdown
total_pct = (
breakdown["labor_pct"]
+ breakdown["overhead_pct"]
+ breakdown["tech_pct"]
)
# Permitimos pequeño error por redondeo a 2 decimales
assert abs(total_pct - 100.0) < 0.2
savings = em.potential_savings()
assert "annual_savings" in savings
assert savings["annual_savings"] >= 0.0
def test_plot_methods_return_axes():
from matplotlib.axes import Axes
df = _sample_df()
cfg = EconomyConfig(labor_cost_per_hour=20.0, overhead_rate=0.1)
em = EconomyCostMetrics(df, cfg)
ax1 = em.plot_cost_waterfall()
ax2 = em.plot_cpi_by_channel()
assert isinstance(ax1, Axes)
assert isinstance(ax2, Axes)

View File

@@ -0,0 +1,238 @@
import math
from datetime import datetime, timedelta
import matplotlib
import numpy as np
import pandas as pd
from beyond_metrics.dimensions.OperationalPerformance import OperationalPerformanceMetrics
matplotlib.use("Agg")
def _sample_df() -> pd.DataFrame:
"""
Dataset sintético pequeño para probar la dimensión de rendimiento operacional.
Incluye:
- varios skills
- FCR, abandonos, transferencias
- reincidencia <7 días
- logged_time para occupancy
"""
base = datetime(2024, 1, 1, 10, 0, 0)
rows = [
# cliente C1, resolved, no abandon, voz, ventas
{
"interaction_id": "id1",
"datetime_start": base,
"queue_skill": "ventas",
"channel": "voz",
"duration_talk": 600,
"hold_time": 60,
"wrap_up_time": 30,
"agent_id": "A1",
"transfer_flag": 0,
"is_resolved": 1,
"abandoned_flag": 0,
"customer_id": "C1",
"logged_time": 900,
},
# C1 vuelve en 3 días mismo canal/skill
{
"interaction_id": "id2",
"datetime_start": base + timedelta(days=3),
"queue_skill": "ventas",
"channel": "voz",
"duration_talk": 700,
"hold_time": 30,
"wrap_up_time": 40,
"agent_id": "A1",
"transfer_flag": 1,
"is_resolved": 1,
"abandoned_flag": 0,
"customer_id": "C1",
"logged_time": 900,
},
# cliente C2, soporte, chat, no resuelto, transferido
{
"interaction_id": "id3",
"datetime_start": base + timedelta(hours=1),
"queue_skill": "soporte",
"channel": "chat",
"duration_talk": 400,
"hold_time": 20,
"wrap_up_time": 30,
"agent_id": "A2",
"transfer_flag": 1,
"is_resolved": 0,
"abandoned_flag": 0,
"customer_id": "C2",
"logged_time": 800,
},
# cliente C3, abandonado
{
"interaction_id": "id4",
"datetime_start": base + timedelta(hours=2),
"queue_skill": "soporte",
"channel": "voz",
"duration_talk": 100,
"hold_time": 50,
"wrap_up_time": 10,
"agent_id": "A2",
"transfer_flag": 0,
"is_resolved": 0,
"abandoned_flag": 1,
"customer_id": "C3",
"logged_time": 600,
},
# cliente C4, una sola interacción, email
{
"interaction_id": "id5",
"datetime_start": base + timedelta(days=10),
"queue_skill": "ventas",
"channel": "email",
"duration_talk": 300,
"hold_time": 0,
"wrap_up_time": 20,
"agent_id": "A1",
"transfer_flag": 0,
"is_resolved": 1,
"abandoned_flag": 0,
"customer_id": "C4",
"logged_time": 700,
},
]
return pd.DataFrame(rows)
# ----------------------------------------------------------------------
# Inicialización y validación básica
# ----------------------------------------------------------------------
def test_init_and_required_columns():
df = _sample_df()
op = OperationalPerformanceMetrics(df)
assert not op.is_empty
# Falta columna obligatoria -> ValueError
df_missing = df.drop(columns=["duration_talk"])
try:
OperationalPerformanceMetrics(df_missing)
assert False, "Debería lanzar ValueError si falta duration_talk"
except ValueError:
pass
# ----------------------------------------------------------------------
# AHT y distribución
# ----------------------------------------------------------------------
def test_aht_distribution_basic():
df = _sample_df()
op = OperationalPerformanceMetrics(df)
dist = op.aht_distribution()
assert "p10" in dist and "p50" in dist and "p90" in dist and "p90_p50_ratio" in dist
# Comprobamos que el ratio P90/P50 es razonable (>1)
assert dist["p90_p50_ratio"] >= 1.0
# ----------------------------------------------------------------------
# FCR, escalación, abandono
# ----------------------------------------------------------------------
def test_fcr_escalation_abandonment_rates():
df = _sample_df()
op = OperationalPerformanceMetrics(df)
fcr = op.fcr_rate()
esc = op.escalation_rate()
aband = op.abandonment_rate()
# FCR: interacciones resueltas / total
# is_resolved=1 en id1, id2, id5 -> 3 de 5
assert math.isclose(fcr, 60.0, rel_tol=1e-6)
# Escalación: transfer_flag=1 en id2, id3 -> 2 de 5
assert math.isclose(esc, 40.0, rel_tol=1e-6)
# Abandono: abandoned_flag=1 en id4 -> 1 de 5
assert math.isclose(aband, 20.0, rel_tol=1e-6)
# ----------------------------------------------------------------------
# Reincidencia y repetición de canal
# ----------------------------------------------------------------------
def test_recurrence_and_repeat_channel():
df = _sample_df()
op = OperationalPerformanceMetrics(df)
rec = op.recurrence_rate_7d()
rep = op.repeat_channel_rate()
# Clientes: C1, C2, C3, C4 -> 4 clientes
# Recurrente: C1 (tiene 2 contactos en 3 días). Solo 1 de 4 -> 25%
assert math.isclose(rec, 25.0, rel_tol=1e-6)
# Reincidencias (<7d):
# Solo el par de C1: voz -> voz, mismo canal => 100%
assert math.isclose(rep, 100.0, rel_tol=1e-6)
# ----------------------------------------------------------------------
# Occupancy
# ----------------------------------------------------------------------
def test_occupancy_rate():
df = _sample_df()
op = OperationalPerformanceMetrics(df)
occ = op.occupancy_rate()
# handle_time = (600+60+30) + (700+30+40) + (400+20+30) + (100+50+10) + (300+0+20)
# = 690 + 770 + 450 + 160 + 320 = 2390
# logged_time total = 900 + 900 + 800 + 600 + 700 = 3900
expected_occ = 2390 / 3900 * 100
assert math.isclose(occ, round(expected_occ, 2), rel_tol=1e-6)
# ----------------------------------------------------------------------
# Performance Score
# ----------------------------------------------------------------------
def test_performance_score_structure_and_range():
df = _sample_df()
op = OperationalPerformanceMetrics(df)
score_info = op.performance_score()
assert "score" in score_info
assert 0.0 <= score_info["score"] <= 10.0
# ----------------------------------------------------------------------
# Plots
# ----------------------------------------------------------------------
def test_plot_methods_return_axes():
df = _sample_df()
op = OperationalPerformanceMetrics(df)
ax1 = op.plot_aht_boxplot_by_skill()
ax2 = op.plot_resolution_funnel_by_skill()
from matplotlib.axes import Axes
assert isinstance(ax1, Axes)
assert isinstance(ax2, Axes)

View File

@@ -0,0 +1,200 @@
import math
from datetime import datetime, timedelta
import pytest
import matplotlib
import numpy as np
import pandas as pd
from beyond_metrics.dimensions.SatisfactionExperience import SatisfactionExperienceMetrics
matplotlib.use("Agg")
def _sample_df_negative_corr() -> pd.DataFrame:
"""
Dataset sintético donde CSAT decrece claramente cuando AHT aumenta,
para que la correlación sea negativa (< -0.3).
"""
base = datetime(2024, 1, 1, 10, 0, 0)
rows = []
# AHT crece, CSAT baja
aht_values = [200, 300, 400, 500, 600, 700, 800, 900]
csat_values = [5.0, 4.7, 4.3, 3.8, 3.3, 2.8, 2.3, 2.0]
skills = ["ventas", "retencion"]
channels = ["voz", "chat"]
for i, (aht, csat) in enumerate(zip(aht_values, csat_values), start=1):
rows.append(
{
"interaction_id": f"id{i}",
"datetime_start": base + timedelta(minutes=5 * i),
"queue_skill": skills[i % len(skills)],
"channel": channels[i % len(channels)],
"csat_score": csat,
"duration_talk": aht * 0.7,
"hold_time": aht * 0.2,
"wrap_up_time": aht * 0.1,
}
)
return pd.DataFrame(rows)
def _sample_df_full() -> pd.DataFrame:
"""
Dataset más completo con NPS y CES para otras pruebas.
"""
base = datetime(2024, 1, 1, 10, 0, 0)
rows = []
for i in range(1, 11):
aht = 300 + 30 * i
csat = 3.0 + 0.1 * i # ligero incremento
nps = -20 + 5 * i
ces = 4.0 - 0.05 * i
rows.append(
{
"interaction_id": f"id{i}",
"datetime_start": base + timedelta(minutes=10 * i),
"queue_skill": "ventas" if i <= 5 else "retencion",
"channel": "voz" if i % 2 == 0 else "chat",
"csat_score": csat,
"duration_talk": aht * 0.7,
"hold_time": aht * 0.2,
"wrap_up_time": aht * 0.1,
"nps_score": nps,
"ces_score": ces,
}
)
return pd.DataFrame(rows)
# ----------------------------------------------------------------------
# Inicialización y validación
# ----------------------------------------------------------------------
def test_init_and_required_columns():
df = _sample_df_negative_corr()
sm = SatisfactionExperienceMetrics(df)
assert not sm.is_empty
# Quitar una columna REALMENTE obligatoria -> debe lanzar ValueError
df_missing = df.drop(columns=["duration_talk"])
with pytest.raises(ValueError):
SatisfactionExperienceMetrics(df_missing)
# Quitar csat_score ya NO debe romper: es opcional
df_no_csat = df.drop(columns=["csat_score"])
sm2 = SatisfactionExperienceMetrics(df_no_csat)
# simplemente no tendrá métricas de csat
assert sm2.is_empty is False
# ----------------------------------------------------------------------
# CSAT promedio y tablas
# ----------------------------------------------------------------------
def test_csat_avg_by_skill_channel():
df = _sample_df_full()
sm = SatisfactionExperienceMetrics(df)
table = sm.csat_avg_by_skill_channel()
# Debe tener al menos 2 skills y 2 canales
assert "ventas" in table.index
assert "retencion" in table.index
# Algún canal
assert any(col in table.columns for col in ["voz", "chat"])
def test_nps_and_ces_tables():
df = _sample_df_full()
sm = SatisfactionExperienceMetrics(df)
nps = sm.nps_avg_by_skill_channel()
ces = sm.ces_avg_by_skill_channel()
# Deben devolver DataFrame no vacío
assert not nps.empty
assert not ces.empty
assert "ventas" in nps.index
assert "ventas" in ces.index
# ----------------------------------------------------------------------
# Correlación CSAT vs AHT
# ----------------------------------------------------------------------
def test_csat_aht_correlation_negative():
df = _sample_df_negative_corr()
sm = SatisfactionExperienceMetrics(df)
corr = sm.csat_aht_correlation()
r = corr["r"]
code = corr["interpretation_code"]
assert r < -0.3
assert code == "negativo"
# ----------------------------------------------------------------------
# Clasificación por skill (sweet spot)
# ----------------------------------------------------------------------
def test_csat_aht_skill_summary_structure():
df = _sample_df_full()
sm = SatisfactionExperienceMetrics(df)
summary = sm.csat_aht_skill_summary()
assert "csat_avg" in summary.columns
assert "aht_avg" in summary.columns
assert "classification" in summary.columns
assert set(summary.index) == {"ventas", "retencion"}
# ----------------------------------------------------------------------
# Plots
# ----------------------------------------------------------------------
def test_plot_methods_return_axes():
df = _sample_df_full()
sm = SatisfactionExperienceMetrics(df)
ax1 = sm.plot_csat_vs_aht_scatter()
ax2 = sm.plot_csat_distribution()
from matplotlib.axes import Axes
assert isinstance(ax1, Axes)
assert isinstance(ax2, Axes)
def test_dataset_without_csat_does_not_break():
# Dataset “core” sin csat/nps/ces
df = pd.DataFrame(
{
"interaction_id": ["id1", "id2"],
"datetime_start": [datetime(2024, 1, 1, 10), datetime(2024, 1, 1, 11)],
"queue_skill": ["ventas", "soporte"],
"channel": ["voz", "chat"],
"duration_talk": [300, 400],
"hold_time": [30, 20],
"wrap_up_time": [20, 30],
}
)
sm = SatisfactionExperienceMetrics(df)
# No debe petar, simplemente devolver vacío/NaN
assert sm.csat_avg_by_skill_channel().empty
corr = sm.csat_aht_correlation()
assert math.isnan(corr["r"])

View File

@@ -0,0 +1,221 @@
import math
from datetime import datetime
import matplotlib
import pandas as pd
from beyond_metrics.dimensions.Volumetria import VolumetriaMetrics
# Usamos backend "Agg" para que matplotlib no intente abrir ventanas
matplotlib.use("Agg")
def _sample_df() -> pd.DataFrame:
"""
DataFrame de prueba con el nuevo esquema de columnas:
Campos usados por VolumetriaMetrics:
- interaction_id
- datetime_start
- queue_skill
- channel
5 interacciones:
- 3 por canal "voz", 2 por canal "chat"
- 3 en skill "ventas", 2 en skill "soporte"
- 3 en enero, 2 en febrero
"""
data = [
{
"interaction_id": "id1",
"datetime_start": datetime(2024, 1, 1, 9, 0),
"queue_skill": "ventas",
"channel": "voz",
},
{
"interaction_id": "id2",
"datetime_start": datetime(2024, 1, 1, 9, 30),
"queue_skill": "ventas",
"channel": "voz",
},
{
"interaction_id": "id3",
"datetime_start": datetime(2024, 1, 1, 10, 0),
"queue_skill": "soporte",
"channel": "voz",
},
{
"interaction_id": "id4",
"datetime_start": datetime(2024, 2, 1, 10, 0),
"queue_skill": "ventas",
"channel": "chat",
},
{
"interaction_id": "id5",
"datetime_start": datetime(2024, 2, 2, 11, 0),
"queue_skill": "soporte",
"channel": "chat",
},
]
return pd.DataFrame(data)
# ----------------------------------------------------------------------
# VALIDACIÓN BÁSICA
# ----------------------------------------------------------------------
def test_init_validates_required_columns():
df = _sample_df()
# No debe lanzar error con las columnas por defecto
vm = VolumetriaMetrics(df)
assert not vm.is_empty
# Si falta alguna columna requerida, debe lanzar ValueError
for col in ["interaction_id", "datetime_start", "queue_skill", "channel"]:
df_missing = df.drop(columns=[col])
try:
VolumetriaMetrics(df_missing)
assert False, f"Debería fallar al faltar la columna: {col}"
except ValueError:
pass
# ----------------------------------------------------------------------
# VOLUMEN Y DISTRIBUCIONES
# ----------------------------------------------------------------------
def test_volume_by_channel_and_skill():
df = _sample_df()
vm = VolumetriaMetrics(df)
vol_channel = vm.volume_by_channel()
vol_skill = vm.volume_by_skill()
# Canales
assert vol_channel.sum() == len(df)
assert vol_channel["voz"] == 3
assert vol_channel["chat"] == 2
# Skills
assert vol_skill.sum() == len(df)
assert vol_skill["ventas"] == 3
assert vol_skill["soporte"] == 2
def test_channel_and_skill_distribution_pct():
df = _sample_df()
vm = VolumetriaMetrics(df)
dist_channel = vm.channel_distribution_pct()
dist_skill = vm.skill_distribution_pct()
# 3/5 = 60%, 2/5 = 40%
assert math.isclose(dist_channel["voz"], 60.0, rel_tol=1e-6)
assert math.isclose(dist_channel["chat"], 40.0, rel_tol=1e-6)
assert math.isclose(dist_skill["ventas"], 60.0, rel_tol=1e-6)
assert math.isclose(dist_skill["soporte"], 40.0, rel_tol=1e-6)
# ----------------------------------------------------------------------
# HEATMAP Y SAZONALIDAD
# ----------------------------------------------------------------------
def test_heatmap_24x7_shape_and_values():
df = _sample_df()
vm = VolumetriaMetrics(df)
heatmap = vm.heatmap_24x7()
# 7 días x 24 horas
assert heatmap.shape == (7, 24)
# Comprobamos algunas celdas concretas
# 2024-01-01 es lunes (dayofweek=0), llamadas a las 9h (2) y 10h (1)
assert heatmap.loc[0, 9] == 2
assert heatmap.loc[0, 10] == 1
# 2024-02-01 es jueves (dayofweek=3), 10h
assert heatmap.loc[3, 10] == 1
# 2024-02-02 es viernes (dayofweek=4), 11h
assert heatmap.loc[4, 11] == 1
def test_monthly_seasonality_cv():
df = _sample_df()
vm = VolumetriaMetrics(df)
cv = vm.monthly_seasonality_cv()
# Volumen mensual: [3, 2]
# mean = 2.5, std (ddof=1) ≈ 0.7071 -> CV ≈ 28.28%
assert math.isclose(cv, 28.28, rel_tol=1e-2)
def test_peak_offpeak_ratio():
df = _sample_df()
vm = VolumetriaMetrics(df)
ratio = vm.peak_offpeak_ratio()
# Horas pico definidas en la clase: 10-19
# Pico: 10h,10h,11h -> 3 interacciones
# Valle: 9h,9h -> 2 interacciones
# Ratio = 3/2 = 1.5
assert math.isclose(ratio, 1.5, rel_tol=1e-6)
def test_concentration_top20_skills_pct():
df = _sample_df()
vm = VolumetriaMetrics(df)
conc = vm.concentration_top20_skills_pct()
# Skills: ventas=3, soporte=2, total=5
# Top 20% de skills (ceil(0.2 * 2) = 1 skill) -> ventas=3
# 3/5 = 60%
assert math.isclose(conc, 60.0, rel_tol=1e-6)
# ----------------------------------------------------------------------
# CASO DATAFRAME VACÍO
# ----------------------------------------------------------------------
def test_empty_dataframe_behaviour():
df_empty = pd.DataFrame(
columns=["interaction_id", "datetime_start", "queue_skill", "channel"]
)
vm = VolumetriaMetrics(df_empty)
assert vm.is_empty
assert vm.volume_by_channel().empty
assert vm.volume_by_skill().empty
assert math.isnan(vm.monthly_seasonality_cv())
assert math.isnan(vm.peak_offpeak_ratio())
assert math.isnan(vm.concentration_top20_skills_pct())
# ----------------------------------------------------------------------
# PLOTS
# ----------------------------------------------------------------------
def test_plot_methods_return_axes():
df = _sample_df()
vm = VolumetriaMetrics(df)
ax1 = vm.plot_heatmap_24x7()
ax2 = vm.plot_channel_distribution()
ax3 = vm.plot_skill_pareto()
from matplotlib.axes import Axes
assert isinstance(ax1, Axes)
assert isinstance(ax2, Axes)
assert isinstance(ax3, Axes)