Commit inicial

This commit is contained in:
Susana
2026-01-18 19:15:34 +00:00
parent 522b4b6caa
commit 62454c6b6a
30 changed files with 12750 additions and 1310 deletions

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import os
from pathlib import Path
import json
import math
@@ -12,6 +13,10 @@ from fastapi.responses import JSONResponse
from beyond_api.security import get_current_user
from beyond_api.services.analysis_service import run_analysis_collect_json
# Cache paths - same as in cache.py
CACHE_DIR = Path(os.getenv("CACHE_DIR", "/data/cache"))
CACHED_FILE = CACHE_DIR / "cached_data.csv"
router = APIRouter(
prefix="",
tags=["analysis"],
@@ -117,3 +122,100 @@ async def analysis_endpoint(
"results": safe_results,
}
)
def extract_date_range_from_csv(file_path: Path) -> dict:
"""Extrae el rango de fechas del CSV."""
import pandas as pd
try:
# Leer solo la columna de fecha para eficiencia
df = pd.read_csv(file_path, usecols=['datetime_start'], parse_dates=['datetime_start'])
if 'datetime_start' in df.columns and len(df) > 0:
min_date = df['datetime_start'].min()
max_date = df['datetime_start'].max()
return {
"min": min_date.strftime('%Y-%m-%d') if pd.notna(min_date) else None,
"max": max_date.strftime('%Y-%m-%d') if pd.notna(max_date) else None,
}
except Exception as e:
print(f"Error extracting date range: {e}")
return {"min": None, "max": None}
def count_unique_queues_from_csv(file_path: Path) -> int:
"""Cuenta las colas únicas en el CSV."""
import pandas as pd
try:
df = pd.read_csv(file_path, usecols=['queue_skill'])
if 'queue_skill' in df.columns:
return df['queue_skill'].nunique()
except Exception as e:
print(f"Error counting queues: {e}")
return 0
@router.post("/analysis/cached")
async def analysis_cached_endpoint(
economy_json: Optional[str] = Form(default=None),
analysis: Literal["basic", "premium"] = Form(default="premium"),
current_user: str = Depends(get_current_user),
):
"""
Ejecuta el pipeline sobre el archivo CSV cacheado en el servidor.
Útil para re-analizar sin tener que subir el archivo de nuevo.
"""
# Validar que existe el archivo cacheado
if not CACHED_FILE.exists():
raise HTTPException(
status_code=404,
detail="No hay archivo cacheado en el servidor. Sube un archivo primero.",
)
# Validar `analysis`
if analysis not in {"basic", "premium"}:
raise HTTPException(
status_code=400,
detail="analysis debe ser 'basic' o 'premium'.",
)
# Parseo de economía (si viene)
economy_data = None
if economy_json:
try:
economy_data = json.loads(economy_json)
except json.JSONDecodeError:
raise HTTPException(
status_code=400,
detail="economy_json no es un JSON válido.",
)
# Extraer metadatos del CSV
date_range = extract_date_range_from_csv(CACHED_FILE)
unique_queues = count_unique_queues_from_csv(CACHED_FILE)
try:
# Ejecutar el análisis sobre el archivo cacheado
results_json = run_analysis_collect_json(
input_path=CACHED_FILE,
economy_data=economy_data,
analysis=analysis,
company_folder=None,
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Error ejecutando análisis: {str(e)}",
)
# Limpiar NaN/inf para que el JSON sea válido
safe_results = sanitize_for_json(results_json)
return JSONResponse(
content={
"user": current_user,
"results": safe_results,
"source": "cached",
"dateRange": date_range,
"uniqueQueues": unique_queues,
}
)

View File

@@ -0,0 +1,250 @@
# beyond_api/api/cache.py
"""
Server-side cache for CSV files.
Stores the uploaded CSV file and metadata for later re-analysis.
"""
from __future__ import annotations
import json
import os
import shutil
from datetime import datetime
from pathlib import Path
from typing import Any, Optional
from fastapi import APIRouter, Depends, HTTPException, status, UploadFile, File, Form
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from beyond_api.security import get_current_user
router = APIRouter(
prefix="/cache",
tags=["cache"],
)
# Directory for cache files
CACHE_DIR = Path(os.getenv("CACHE_DIR", "/data/cache"))
CACHED_FILE = CACHE_DIR / "cached_data.csv"
METADATA_FILE = CACHE_DIR / "metadata.json"
DRILLDOWN_FILE = CACHE_DIR / "drilldown_data.json"
class CacheMetadata(BaseModel):
fileName: str
fileSize: int
recordCount: int
cachedAt: str
costPerHour: float
def ensure_cache_dir():
"""Create cache directory if it doesn't exist."""
CACHE_DIR.mkdir(parents=True, exist_ok=True)
def count_csv_records(file_path: Path) -> int:
"""Count records in CSV file (excluding header)."""
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
# Count lines minus header
return sum(1 for _ in f) - 1
except Exception:
return 0
@router.get("/check")
def check_cache(current_user: str = Depends(get_current_user)):
"""
Check if there's cached data available.
Returns metadata if cache exists, null otherwise.
"""
if not METADATA_FILE.exists() or not CACHED_FILE.exists():
return JSONResponse(content={"exists": False, "metadata": None})
try:
with open(METADATA_FILE, "r") as f:
metadata = json.load(f)
return JSONResponse(content={"exists": True, "metadata": metadata})
except Exception as e:
return JSONResponse(content={"exists": False, "metadata": None, "error": str(e)})
@router.get("/file")
def get_cached_file_path(current_user: str = Depends(get_current_user)):
"""
Returns the path to the cached CSV file for internal use.
"""
if not CACHED_FILE.exists():
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No cached file found"
)
return JSONResponse(content={"path": str(CACHED_FILE)})
@router.get("/download")
def download_cached_file(current_user: str = Depends(get_current_user)):
"""
Download the cached CSV file for frontend parsing.
Returns the file as a streaming response.
"""
from fastapi.responses import FileResponse
if not CACHED_FILE.exists():
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No cached file found"
)
return FileResponse(
path=CACHED_FILE,
media_type="text/csv",
filename="cached_data.csv"
)
@router.post("/file")
async def save_cached_file(
csv_file: UploadFile = File(...),
fileName: str = Form(...),
fileSize: int = Form(...),
costPerHour: float = Form(...),
current_user: str = Depends(get_current_user)
):
"""
Save uploaded CSV file to server cache.
"""
ensure_cache_dir()
try:
# Save the CSV file
with open(CACHED_FILE, "wb") as f:
while True:
chunk = await csv_file.read(1024 * 1024) # 1 MB chunks
if not chunk:
break
f.write(chunk)
# Count records
record_count = count_csv_records(CACHED_FILE)
# Save metadata
metadata = {
"fileName": fileName,
"fileSize": fileSize,
"recordCount": record_count,
"cachedAt": datetime.now().isoformat(),
"costPerHour": costPerHour,
}
with open(METADATA_FILE, "w") as f:
json.dump(metadata, f)
return JSONResponse(content={
"success": True,
"message": f"Cached file with {record_count} records",
"metadata": metadata
})
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error saving cache: {str(e)}"
)
@router.get("/drilldown")
def get_cached_drilldown(current_user: str = Depends(get_current_user)):
"""
Get the cached drilldownData JSON.
Returns the pre-calculated drilldown data for fast cache usage.
"""
if not DRILLDOWN_FILE.exists():
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No cached drilldown data found"
)
try:
with open(DRILLDOWN_FILE, "r", encoding="utf-8") as f:
drilldown_data = json.load(f)
return JSONResponse(content={"success": True, "drilldownData": drilldown_data})
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error reading drilldown data: {str(e)}"
)
@router.post("/drilldown")
async def save_cached_drilldown(
drilldown_json: str = Form(...),
current_user: str = Depends(get_current_user)
):
"""
Save drilldownData JSON to server cache.
Called by frontend after calculating drilldown from uploaded file.
Receives JSON as form field.
"""
ensure_cache_dir()
try:
# Parse and validate JSON
drilldown_data = json.loads(drilldown_json)
# Save to file
with open(DRILLDOWN_FILE, "w", encoding="utf-8") as f:
json.dump(drilldown_data, f)
return JSONResponse(content={
"success": True,
"message": f"Cached drilldown data with {len(drilldown_data)} skills"
})
except json.JSONDecodeError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid JSON: {str(e)}"
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error saving drilldown data: {str(e)}"
)
@router.delete("/file")
def clear_cache(current_user: str = Depends(get_current_user)):
"""
Clear the server-side cache (CSV, metadata, and drilldown data).
"""
try:
if CACHED_FILE.exists():
CACHED_FILE.unlink()
if METADATA_FILE.exists():
METADATA_FILE.unlink()
if DRILLDOWN_FILE.exists():
DRILLDOWN_FILE.unlink()
return JSONResponse(content={"success": True, "message": "Cache cleared"})
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error clearing cache: {str(e)}"
)
# Keep old endpoints for backwards compatibility but mark as deprecated
@router.get("/interactions")
def get_cached_interactions_deprecated(current_user: str = Depends(get_current_user)):
"""DEPRECATED: Use /cache/file instead."""
raise HTTPException(
status_code=status.HTTP_410_GONE,
detail="This endpoint is deprecated. Use /cache/file with re-analysis instead."
)
@router.post("/interactions")
def save_cached_interactions_deprecated(current_user: str = Depends(get_current_user)):
"""DEPRECATED: Use /cache/file instead."""
raise HTTPException(
status_code=status.HTTP_410_GONE,
detail="This endpoint is deprecated. Use /cache/file instead."
)

View File

@@ -4,7 +4,8 @@ from fastapi.middleware.cors import CORSMiddleware
# importa tus routers
from beyond_api.api.analysis import router as analysis_router
from beyond_api.api.auth import router as auth_router # 👈 nuevo
from beyond_api.api.auth import router as auth_router
from beyond_api.api.cache import router as cache_router
def setup_basic_logging() -> None:
logging.basicConfig(
@@ -30,4 +31,5 @@ app.add_middleware(
)
app.include_router(analysis_router)
app.include_router(auth_router) # 👈 registrar el router de auth
app.include_router(auth_router)
app.include_router(cache_router)

View File

@@ -5,26 +5,33 @@ import secrets
from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPBasic, HTTPBasicCredentials
security = HTTPBasic()
# auto_error=False para que no dispare el popup nativo del navegador automáticamente
security = HTTPBasic(auto_error=False)
# En producción: export BASIC_AUTH_USERNAME y BASIC_AUTH_PASSWORD.
BASIC_USER = os.getenv("BASIC_AUTH_USERNAME", "beyond")
BASIC_PASS = os.getenv("BASIC_AUTH_PASSWORD", "beyond2026")
def get_current_user(credentials: HTTPBasicCredentials = Depends(security)) -> str:
def get_current_user(credentials: HTTPBasicCredentials | None = Depends(security)) -> str:
"""
Valida el usuario/contraseña vía HTTP Basic.
NO envía WWW-Authenticate para evitar el popup nativo del navegador
(el frontend tiene su propio formulario de login).
"""
if credentials is None:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Credenciales requeridas",
)
correct_username = secrets.compare_digest(credentials.username, BASIC_USER)
correct_password = secrets.compare_digest(credentials.password, BASIC_PASS)
if not (correct_username and correct_password):
# Importante devolver el header WWW-Authenticate para que el navegador saque el prompt
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Credenciales incorrectas",
headers={"WWW-Authenticate": "Basic"},
)
return credentials.username

View File

@@ -506,11 +506,10 @@ def score_roi(annual_savings: Any) -> Dict[str, Any]:
def classify_agentic_score(score: Optional[float]) -> Dict[str, Any]:
"""
Clasificación final:
- 810: AUTOMATE 🤖
- 57.99: ASSIST 🤝
- 34.99: AUGMENT 🧠
- 02.99: HUMAN_ONLY 👤
Clasificación final (alineada con frontend):
- ≥6: COPILOT 🤖 (Listo para Copilot)
- 45.99: OPTIMIZE 🔧 (Optimizar Primero)
- <4: HUMAN 👤 (Requiere Gestión Humana)
Si score es None (ninguna dimensión disponible), devuelve NO_DATA.
"""
@@ -524,33 +523,26 @@ def classify_agentic_score(score: Optional[float]) -> Dict[str, Any]:
),
}
if score >= 8.0:
label = "AUTOMATE"
if score >= 6.0:
label = "COPILOT"
emoji = "🤖"
description = (
"Alta repetitividad, alta predictibilidad y ROI elevado. "
"Candidato a automatización completa (chatbot/IVR inteligente)."
"Listo para Copilot. Procesos con predictibilidad y simplicidad "
"suficientes para asistencia IA (sugerencias en tiempo real, autocompletado)."
)
elif score >= 5.0:
label = "ASSIST"
emoji = "🤝"
elif score >= 4.0:
label = "OPTIMIZE"
emoji = "🔧"
description = (
"Complejidad media o ROI limitado. Recomendado enfoque de copilot "
"para agentes (sugerencias en tiempo real, autocompletado, etc.)."
)
elif score >= 3.0:
label = "AUGMENT"
emoji = "🧠"
description = (
"Alta complejidad o bajo volumen. Mejor usar herramientas de apoyo "
"(knowledge base, guías dinámicas, scripts)."
"Optimizar primero. Estandarizar procesos y reducir variabilidad "
"antes de implementar asistencia IA."
)
else:
label = "HUMAN_ONLY"
label = "HUMAN"
emoji = "👤"
description = (
"Procesos de muy bajo volumen o extremadamente complejos. Mejor "
"mantener operación 100% humana de momento."
"Requiere gestión humana. Procesos complejos o variables que "
"necesitan intervención humana antes de considerar automatización."
)
return {

View File

@@ -23,6 +23,7 @@
"fcr_rate",
"escalation_rate",
"abandonment_rate",
"high_hold_time_rate",
"recurrence_rate_7d",
"repeat_channel_rate",
"occupancy_rate",

View File

@@ -86,6 +86,16 @@ class OperationalPerformanceMetrics:
+ df["wrap_up_time"].fillna(0)
)
# v3.0: Filtrar NOISE y ZOMBIE para cálculos de variabilidad
# record_status: 'valid', 'noise', 'zombie', 'abandon'
# Para AHT/CV solo usamos 'valid' (o sin status = legacy data)
if "record_status" in df.columns:
df["record_status"] = df["record_status"].astype(str).str.strip().str.upper()
# Crear máscara para registros válidos (para cálculos de CV/variabilidad)
df["_is_valid_for_cv"] = df["record_status"].isin(["VALID", "NAN", ""]) | df["record_status"].isna()
else:
df["_is_valid_for_cv"] = True
# Normalización básica
df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
df["channel"] = df["channel"].astype(str).str.strip()
@@ -121,8 +131,13 @@ class OperationalPerformanceMetrics:
def aht_distribution(self) -> Dict[str, float]:
"""
Devuelve P10, P50, P90 del AHT y el ratio P90/P50 como medida de variabilidad.
v3.0: Filtra NOISE y ZOMBIE para el cálculo de variabilidad.
Solo usa registros con record_status='valid' o sin status (legacy).
"""
ht = self.df["handle_time"].dropna().astype(float)
# Filtrar solo registros válidos para cálculo de variabilidad
df_valid = self.df[self.df["_is_valid_for_cv"] == True]
ht = df_valid["handle_time"].dropna().astype(float)
if ht.empty:
return {}
@@ -165,56 +180,45 @@ class OperationalPerformanceMetrics:
# ------------------------------------------------------------------ #
def fcr_rate(self) -> float:
"""
FCR proxy = 100 - escalation_rate.
FCR (First Contact Resolution).
Usamos la métrica de escalación ya calculada a partir de transfer_flag.
Si no se puede calcular escalation_rate, intentamos derivarlo
directamente de la columna transfer_flag. Si todo falla, devolvemos NaN.
Prioridad 1: Usar fcr_real_flag del CSV si existe
Prioridad 2: Calcular como 100 - escalation_rate
"""
df = self.df
total = len(df)
if total == 0:
return float("nan")
# Prioridad 1: Usar fcr_real_flag si existe
if "fcr_real_flag" in df.columns:
col = df["fcr_real_flag"]
# Normalizar a booleano
if col.dtype == "O":
fcr_mask = (
col.astype(str)
.str.strip()
.str.lower()
.isin(["true", "t", "1", "yes", "y", "si", ""])
)
else:
fcr_mask = pd.to_numeric(col, errors="coerce").fillna(0) > 0
fcr_count = int(fcr_mask.sum())
fcr = (fcr_count / total) * 100.0
return float(max(0.0, min(100.0, round(fcr, 2))))
# Prioridad 2: Fallback a 100 - escalation_rate
try:
esc = self.escalation_rate()
except Exception:
esc = float("nan")
# Si escalation_rate es válido, usamos el proxy simple
if esc is not None and not math.isnan(esc):
fcr = 100.0 - esc
return float(max(0.0, min(100.0, round(fcr, 2))))
# Fallback: calcular directamente desde transfer_flag
df = self.df
if "transfer_flag" not in df.columns or len(df) == 0:
return float("nan")
col = df["transfer_flag"]
# Normalizar a booleano: TRUE/FALSE, 1/0, etc.
if col.dtype == "O":
col_norm = (
col.astype(str)
.str.strip()
.str.lower()
.map({
"true": True,
"t": True,
"1": True,
"yes": True,
"y": True,
})
).fillna(False)
transfer_mask = col_norm
else:
transfer_mask = pd.to_numeric(col, errors="coerce").fillna(0) > 0
total = len(df)
transfers = int(transfer_mask.sum())
esc_rate = transfers / total if total > 0 else float("nan")
if math.isnan(esc_rate):
return float("nan")
fcr = 100.0 - esc_rate * 100.0
return float(max(0.0, min(100.0, round(fcr, 2))))
return float("nan")
def escalation_rate(self) -> float:
@@ -233,20 +237,57 @@ class OperationalPerformanceMetrics:
"""
% de interacciones abandonadas.
Definido como % de filas con abandoned_flag == True.
Si la columna no existe, devuelve NaN.
Busca en orden: is_abandoned, abandoned_flag, abandoned
Si ninguna columna existe, devuelve NaN.
"""
df = self.df
if "abandoned_flag" not in df.columns:
return float("nan")
total = len(df)
if total == 0:
return float("nan")
abandoned = df["abandoned_flag"].sum()
# Buscar columna de abandono en orden de prioridad
abandon_col = None
for col_name in ["is_abandoned", "abandoned_flag", "abandoned"]:
if col_name in df.columns:
abandon_col = col_name
break
if abandon_col is None:
return float("nan")
col = df[abandon_col]
# Normalizar a booleano
if col.dtype == "O":
abandon_mask = (
col.astype(str)
.str.strip()
.str.lower()
.isin(["true", "t", "1", "yes", "y", "si", ""])
)
else:
abandon_mask = pd.to_numeric(col, errors="coerce").fillna(0) > 0
abandoned = int(abandon_mask.sum())
return float(round(abandoned / total * 100, 2))
def high_hold_time_rate(self, threshold_seconds: float = 60.0) -> float:
"""
% de interacciones con hold_time > threshold (por defecto 60s).
Proxy de complejidad: si el agente tuvo que poner en espera al cliente
más de 60 segundos, probablemente tuvo que consultar/investigar.
"""
df = self.df
total = len(df)
if total == 0:
return float("nan")
hold_times = df["hold_time"].fillna(0)
high_hold_count = (hold_times > threshold_seconds).sum()
return float(round(high_hold_count / total * 100, 2))
def recurrence_rate_7d(self) -> float:
"""
% de clientes que vuelven a contactar en < 7 días.