Commit inicial

2026-01-18 19:15:34 +00:00
parent 522b4b6caa
commit 62454c6b6a
30 changed files with 12750 additions and 1310 deletions
--- a/backend/beyond_api/api/analysis.py
+++ b/backend/beyond_api/api/analysis.py
@@ -1,5 +1,6 @@
 from __future__ import annotations

+import os
 from pathlib import Path
 import json
 import math
@@ -12,6 +13,10 @@ from fastapi.responses import JSONResponse
 from beyond_api.security import get_current_user
 from beyond_api.services.analysis_service import run_analysis_collect_json

+# Cache paths - same as in cache.py
+CACHE_DIR = Path(os.getenv("CACHE_DIR", "/data/cache"))
+CACHED_FILE = CACHE_DIR / "cached_data.csv"
+
 router = APIRouter(
    prefix="",
    tags=["analysis"],
@@ -117,3 +122,100 @@ async def analysis_endpoint(
            "results": safe_results,
        }
    )
+
+
+def extract_date_range_from_csv(file_path: Path) -> dict:
+    """Extrae el rango de fechas del CSV."""
+    import pandas as pd
+    try:
+        # Leer solo la columna de fecha para eficiencia
+        df = pd.read_csv(file_path, usecols=['datetime_start'], parse_dates=['datetime_start'])
+        if 'datetime_start' in df.columns and len(df) > 0:
+            min_date = df['datetime_start'].min()
+            max_date = df['datetime_start'].max()
+            return {
+                "min": min_date.strftime('%Y-%m-%d') if pd.notna(min_date) else None,
+                "max": max_date.strftime('%Y-%m-%d') if pd.notna(max_date) else None,
+            }
+    except Exception as e:
+        print(f"Error extracting date range: {e}")
+    return {"min": None, "max": None}
+
+
+def count_unique_queues_from_csv(file_path: Path) -> int:
+    """Cuenta las colas únicas en el CSV."""
+    import pandas as pd
+    try:
+        df = pd.read_csv(file_path, usecols=['queue_skill'])
+        if 'queue_skill' in df.columns:
+            return df['queue_skill'].nunique()
+    except Exception as e:
+        print(f"Error counting queues: {e}")
+    return 0
+
+
+@router.post("/analysis/cached")
+async def analysis_cached_endpoint(
+    economy_json: Optional[str] = Form(default=None),
+    analysis: Literal["basic", "premium"] = Form(default="premium"),
+    current_user: str = Depends(get_current_user),
+):
+    """
+    Ejecuta el pipeline sobre el archivo CSV cacheado en el servidor.
+    Útil para re-analizar sin tener que subir el archivo de nuevo.
+    """
+    # Validar que existe el archivo cacheado
+    if not CACHED_FILE.exists():
+        raise HTTPException(
+            status_code=404,
+            detail="No hay archivo cacheado en el servidor. Sube un archivo primero.",
+        )
+
+    # Validar `analysis`
+    if analysis not in {"basic", "premium"}:
+        raise HTTPException(
+            status_code=400,
+            detail="analysis debe ser 'basic' o 'premium'.",
+        )
+
+    # Parseo de economía (si viene)
+    economy_data = None
+    if economy_json:
+        try:
+            economy_data = json.loads(economy_json)
+        except json.JSONDecodeError:
+            raise HTTPException(
+                status_code=400,
+                detail="economy_json no es un JSON válido.",
+            )
+
+    # Extraer metadatos del CSV
+    date_range = extract_date_range_from_csv(CACHED_FILE)
+    unique_queues = count_unique_queues_from_csv(CACHED_FILE)
+
+    try:
+        # Ejecutar el análisis sobre el archivo cacheado
+        results_json = run_analysis_collect_json(
+            input_path=CACHED_FILE,
+            economy_data=economy_data,
+            analysis=analysis,
+            company_folder=None,
+        )
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error ejecutando análisis: {str(e)}",
+        )
+
+    # Limpiar NaN/inf para que el JSON sea válido
+    safe_results = sanitize_for_json(results_json)
+
+    return JSONResponse(
+        content={
+            "user": current_user,
+            "results": safe_results,
+            "source": "cached",
+            "dateRange": date_range,
+            "uniqueQueues": unique_queues,
+        }
+    )
--- a/backend/beyond_api/api/cache.py
+++ b/backend/beyond_api/api/cache.py
@@ -0,0 +1,250 @@
+# beyond_api/api/cache.py
+"""
+Server-side cache for CSV files.
+Stores the uploaded CSV file and metadata for later re-analysis.
+"""
+from __future__ import annotations
+
+import json
+import os
+import shutil
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Optional
+
+from fastapi import APIRouter, Depends, HTTPException, status, UploadFile, File, Form
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+
+from beyond_api.security import get_current_user
+
+router = APIRouter(
+    prefix="/cache",
+    tags=["cache"],
+)
+
+# Directory for cache files
+CACHE_DIR = Path(os.getenv("CACHE_DIR", "/data/cache"))
+CACHED_FILE = CACHE_DIR / "cached_data.csv"
+METADATA_FILE = CACHE_DIR / "metadata.json"
+DRILLDOWN_FILE = CACHE_DIR / "drilldown_data.json"
+
+
+class CacheMetadata(BaseModel):
+    fileName: str
+    fileSize: int
+    recordCount: int
+    cachedAt: str
+    costPerHour: float
+
+
+def ensure_cache_dir():
+    """Create cache directory if it doesn't exist."""
+    CACHE_DIR.mkdir(parents=True, exist_ok=True)
+
+
+def count_csv_records(file_path: Path) -> int:
+    """Count records in CSV file (excluding header)."""
+    try:
+        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+            # Count lines minus header
+            return sum(1 for _ in f) - 1
+    except Exception:
+        return 0
+
+
+@router.get("/check")
+def check_cache(current_user: str = Depends(get_current_user)):
+    """
+    Check if there's cached data available.
+    Returns metadata if cache exists, null otherwise.
+    """
+    if not METADATA_FILE.exists() or not CACHED_FILE.exists():
+        return JSONResponse(content={"exists": False, "metadata": None})
+
+    try:
+        with open(METADATA_FILE, "r") as f:
+            metadata = json.load(f)
+        return JSONResponse(content={"exists": True, "metadata": metadata})
+    except Exception as e:
+        return JSONResponse(content={"exists": False, "metadata": None, "error": str(e)})
+
+
+@router.get("/file")
+def get_cached_file_path(current_user: str = Depends(get_current_user)):
+    """
+    Returns the path to the cached CSV file for internal use.
+    """
+    if not CACHED_FILE.exists():
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail="No cached file found"
+        )
+    return JSONResponse(content={"path": str(CACHED_FILE)})
+
+
+@router.get("/download")
+def download_cached_file(current_user: str = Depends(get_current_user)):
+    """
+    Download the cached CSV file for frontend parsing.
+    Returns the file as a streaming response.
+    """
+    from fastapi.responses import FileResponse
+
+    if not CACHED_FILE.exists():
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail="No cached file found"
+        )
+
+    return FileResponse(
+        path=CACHED_FILE,
+        media_type="text/csv",
+        filename="cached_data.csv"
+    )
+
+
+@router.post("/file")
+async def save_cached_file(
+    csv_file: UploadFile = File(...),
+    fileName: str = Form(...),
+    fileSize: int = Form(...),
+    costPerHour: float = Form(...),
+    current_user: str = Depends(get_current_user)
+):
+    """
+    Save uploaded CSV file to server cache.
+    """
+    ensure_cache_dir()
+
+    try:
+        # Save the CSV file
+        with open(CACHED_FILE, "wb") as f:
+            while True:
+                chunk = await csv_file.read(1024 * 1024)  # 1 MB chunks
+                if not chunk:
+                    break
+                f.write(chunk)
+
+        # Count records
+        record_count = count_csv_records(CACHED_FILE)
+
+        # Save metadata
+        metadata = {
+            "fileName": fileName,
+            "fileSize": fileSize,
+            "recordCount": record_count,
+            "cachedAt": datetime.now().isoformat(),
+            "costPerHour": costPerHour,
+        }
+        with open(METADATA_FILE, "w") as f:
+            json.dump(metadata, f)
+
+        return JSONResponse(content={
+            "success": True,
+            "message": f"Cached file with {record_count} records",
+            "metadata": metadata
+        })
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Error saving cache: {str(e)}"
+        )
+
+
+@router.get("/drilldown")
+def get_cached_drilldown(current_user: str = Depends(get_current_user)):
+    """
+    Get the cached drilldownData JSON.
+    Returns the pre-calculated drilldown data for fast cache usage.
+    """
+    if not DRILLDOWN_FILE.exists():
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail="No cached drilldown data found"
+        )
+
+    try:
+        with open(DRILLDOWN_FILE, "r", encoding="utf-8") as f:
+            drilldown_data = json.load(f)
+        return JSONResponse(content={"success": True, "drilldownData": drilldown_data})
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Error reading drilldown data: {str(e)}"
+        )
+
+
+@router.post("/drilldown")
+async def save_cached_drilldown(
+    drilldown_json: str = Form(...),
+    current_user: str = Depends(get_current_user)
+):
+    """
+    Save drilldownData JSON to server cache.
+    Called by frontend after calculating drilldown from uploaded file.
+    Receives JSON as form field.
+    """
+    ensure_cache_dir()
+
+    try:
+        # Parse and validate JSON
+        drilldown_data = json.loads(drilldown_json)
+
+        # Save to file
+        with open(DRILLDOWN_FILE, "w", encoding="utf-8") as f:
+            json.dump(drilldown_data, f)
+
+        return JSONResponse(content={
+            "success": True,
+            "message": f"Cached drilldown data with {len(drilldown_data)} skills"
+        })
+    except json.JSONDecodeError as e:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Invalid JSON: {str(e)}"
+        )
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Error saving drilldown data: {str(e)}"
+        )
+
+
+@router.delete("/file")
+def clear_cache(current_user: str = Depends(get_current_user)):
+    """
+    Clear the server-side cache (CSV, metadata, and drilldown data).
+    """
+    try:
+        if CACHED_FILE.exists():
+            CACHED_FILE.unlink()
+        if METADATA_FILE.exists():
+            METADATA_FILE.unlink()
+        if DRILLDOWN_FILE.exists():
+            DRILLDOWN_FILE.unlink()
+        return JSONResponse(content={"success": True, "message": "Cache cleared"})
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Error clearing cache: {str(e)}"
+        )
+
+
+# Keep old endpoints for backwards compatibility but mark as deprecated
+@router.get("/interactions")
+def get_cached_interactions_deprecated(current_user: str = Depends(get_current_user)):
+    """DEPRECATED: Use /cache/file instead."""
+    raise HTTPException(
+        status_code=status.HTTP_410_GONE,
+        detail="This endpoint is deprecated. Use /cache/file with re-analysis instead."
+    )
+
+
+@router.post("/interactions")
+def save_cached_interactions_deprecated(current_user: str = Depends(get_current_user)):
+    """DEPRECATED: Use /cache/file instead."""
+    raise HTTPException(
+        status_code=status.HTTP_410_GONE,
+        detail="This endpoint is deprecated. Use /cache/file instead."
+    )
--- a/backend/beyond_api/main.py
+++ b/backend/beyond_api/main.py
@@ -4,7 +4,8 @@ from fastapi.middleware.cors import CORSMiddleware

 # importa tus routers
 from beyond_api.api.analysis import router as analysis_router
-from beyond_api.api.auth import router as auth_router   # 👈 nuevo
+from beyond_api.api.auth import router as auth_router
+from beyond_api.api.cache import router as cache_router

 def setup_basic_logging() -> None:
    logging.basicConfig(
@@ -30,4 +31,5 @@ app.add_middleware(
 )

 app.include_router(analysis_router)
-app.include_router(auth_router)  # 👈 registrar el router de auth
+app.include_router(auth_router)
+app.include_router(cache_router)
--- a/backend/beyond_api/security.py
+++ b/backend/beyond_api/security.py
@@ -5,26 +5,33 @@ import secrets
 from fastapi import Depends, HTTPException, status
 from fastapi.security import HTTPBasic, HTTPBasicCredentials

-security = HTTPBasic()
+# auto_error=False para que no dispare el popup nativo del navegador automáticamente
+security = HTTPBasic(auto_error=False)

 # En producción: export BASIC_AUTH_USERNAME y BASIC_AUTH_PASSWORD.
 BASIC_USER = os.getenv("BASIC_AUTH_USERNAME", "beyond")
 BASIC_PASS = os.getenv("BASIC_AUTH_PASSWORD", "beyond2026")


-def get_current_user(credentials: HTTPBasicCredentials = Depends(security)) -> str:
+def get_current_user(credentials: HTTPBasicCredentials | None = Depends(security)) -> str:
    """
    Valida el usuario/contraseña vía HTTP Basic.
+    NO envía WWW-Authenticate para evitar el popup nativo del navegador
+    (el frontend tiene su propio formulario de login).
    """
+    if credentials is None:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Credenciales requeridas",
+        )
+
    correct_username = secrets.compare_digest(credentials.username, BASIC_USER)
    correct_password = secrets.compare_digest(credentials.password, BASIC_PASS)

    if not (correct_username and correct_password):
-        # Importante devolver el header WWW-Authenticate para que el navegador saque el prompt
        raise HTTPException(
            status_code=status.HTTP_401_UNAUTHORIZED,
            detail="Credenciales incorrectas",
-            headers={"WWW-Authenticate": "Basic"},
        )

    return credentials.username
--- a/backend/beyond_flows/scorers/agentic_score.py
+++ b/backend/beyond_flows/scorers/agentic_score.py
@@ -506,11 +506,10 @@ def score_roi(annual_savings: Any) -> Dict[str, Any]:

 def classify_agentic_score(score: Optional[float]) -> Dict[str, Any]:
    """
-    Clasificación final:
-      - 8–10: AUTOMATE 🤖
-      - 5–7.99: ASSIST 🤝
-      - 3–4.99: AUGMENT 🧠
-      - 0–2.99: HUMAN_ONLY 👤
+    Clasificación final (alineada con frontend):
+      - ≥6: COPILOT 🤖 (Listo para Copilot)
+      - 4–5.99: OPTIMIZE 🔧 (Optimizar Primero)
+      - <4: HUMAN 👤 (Requiere Gestión Humana)

    Si score es None (ninguna dimensión disponible), devuelve NO_DATA.
    """
@@ -524,33 +523,26 @@ def classify_agentic_score(score: Optional[float]) -> Dict[str, Any]:
            ),
        }

-    if score >= 8.0:
-        label = "AUTOMATE"
+    if score >= 6.0:
+        label = "COPILOT"
        emoji = "🤖"
        description = (
-            "Alta repetitividad, alta predictibilidad y ROI elevado. "
-            "Candidato a automatización completa (chatbot/IVR inteligente)."
+            "Listo para Copilot. Procesos con predictibilidad y simplicidad "
+            "suficientes para asistencia IA (sugerencias en tiempo real, autocompletado)."
        )
-    elif score >= 5.0:
-        label = "ASSIST"
-        emoji = "🤝"
+    elif score >= 4.0:
+        label = "OPTIMIZE"
+        emoji = "🔧"
        description = (
-            "Complejidad media o ROI limitado. Recomendado enfoque de copilot "
-            "para agentes (sugerencias en tiempo real, autocompletado, etc.)."
-        )
-    elif score >= 3.0:
-        label = "AUGMENT"
-        emoji = "🧠"
-        description = (
-            "Alta complejidad o bajo volumen. Mejor usar herramientas de apoyo "
-            "(knowledge base, guías dinámicas, scripts)."
+            "Optimizar primero. Estandarizar procesos y reducir variabilidad "
+            "antes de implementar asistencia IA."
        )
    else:
-        label = "HUMAN_ONLY"
+        label = "HUMAN"
        emoji = "👤"
        description = (
-            "Procesos de muy bajo volumen o extremadamente complejos. Mejor "
-            "mantener operación 100% humana de momento."
+            "Requiere gestión humana. Procesos complejos o variables que "
+            "necesitan intervención humana antes de considerar automatización."
        )

    return {
--- a/backend/beyond_metrics/configs/beyond_metrics_config.json
+++ b/backend/beyond_metrics/configs/beyond_metrics_config.json
@@ -23,6 +23,7 @@
        "fcr_rate",
        "escalation_rate",
        "abandonment_rate",
+        "high_hold_time_rate",
        "recurrence_rate_7d",
        "repeat_channel_rate",
        "occupancy_rate",
--- a/backend/beyond_metrics/dimensions/OperationalPerformance.py
+++ b/backend/beyond_metrics/dimensions/OperationalPerformance.py
@@ -86,6 +86,16 @@ class OperationalPerformanceMetrics:
            + df["wrap_up_time"].fillna(0)
        )

+        # v3.0: Filtrar NOISE y ZOMBIE para cálculos de variabilidad
+        # record_status: 'valid', 'noise', 'zombie', 'abandon'
+        # Para AHT/CV solo usamos 'valid' (o sin status = legacy data)
+        if "record_status" in df.columns:
+            df["record_status"] = df["record_status"].astype(str).str.strip().str.upper()
+            # Crear máscara para registros válidos (para cálculos de CV/variabilidad)
+            df["_is_valid_for_cv"] = df["record_status"].isin(["VALID", "NAN", ""])  | df["record_status"].isna()
+        else:
+            df["_is_valid_for_cv"] = True
+
        # Normalización básica
        df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
        df["channel"] = df["channel"].astype(str).str.strip()
@@ -121,8 +131,13 @@ class OperationalPerformanceMetrics:
    def aht_distribution(self) -> Dict[str, float]:
        """
        Devuelve P10, P50, P90 del AHT y el ratio P90/P50 como medida de variabilidad.
+
+        v3.0: Filtra NOISE y ZOMBIE para el cálculo de variabilidad.
+        Solo usa registros con record_status='valid' o sin status (legacy).
        """
-        ht = self.df["handle_time"].dropna().astype(float)
+        # Filtrar solo registros válidos para cálculo de variabilidad
+        df_valid = self.df[self.df["_is_valid_for_cv"] == True]
+        ht = df_valid["handle_time"].dropna().astype(float)
        if ht.empty:
            return {}

@@ -165,56 +180,45 @@ class OperationalPerformanceMetrics:
    # ------------------------------------------------------------------ #
    def fcr_rate(self) -> float:
        """
-        FCR proxy = 100 - escalation_rate.
+        FCR (First Contact Resolution).

-        Usamos la métrica de escalación ya calculada a partir de transfer_flag.
-        Si no se puede calcular escalation_rate, intentamos derivarlo
-        directamente de la columna transfer_flag. Si todo falla, devolvemos NaN.
+        Prioridad 1: Usar fcr_real_flag del CSV si existe
+        Prioridad 2: Calcular como 100 - escalation_rate
        """
+        df = self.df
+        total = len(df)
+        if total == 0:
+            return float("nan")
+
+        # Prioridad 1: Usar fcr_real_flag si existe
+        if "fcr_real_flag" in df.columns:
+            col = df["fcr_real_flag"]
+            # Normalizar a booleano
+            if col.dtype == "O":
+                fcr_mask = (
+                    col.astype(str)
+                       .str.strip()
+                       .str.lower()
+                       .isin(["true", "t", "1", "yes", "y", "si", "sí"])
+                )
+            else:
+                fcr_mask = pd.to_numeric(col, errors="coerce").fillna(0) > 0
+
+            fcr_count = int(fcr_mask.sum())
+            fcr = (fcr_count / total) * 100.0
+            return float(max(0.0, min(100.0, round(fcr, 2))))
+
+        # Prioridad 2: Fallback a 100 - escalation_rate
        try:
            esc = self.escalation_rate()
        except Exception:
            esc = float("nan")

-        # Si escalation_rate es válido, usamos el proxy simple
        if esc is not None and not math.isnan(esc):
            fcr = 100.0 - esc
            return float(max(0.0, min(100.0, round(fcr, 2))))

-        # Fallback: calcular directamente desde transfer_flag
-        df = self.df
-        if "transfer_flag" not in df.columns or len(df) == 0:
-            return float("nan")
-
-        col = df["transfer_flag"]
-
-        # Normalizar a booleano: TRUE/FALSE, 1/0, etc.
-        if col.dtype == "O":
-            col_norm = (
-                col.astype(str)
-                   .str.strip()
-                   .str.lower()
-                   .map({
-                       "true": True,
-                       "t": True,
-                       "1": True,
-                       "yes": True,
-                       "y": True,
-                   })
-            ).fillna(False)
-            transfer_mask = col_norm
-        else:
-            transfer_mask = pd.to_numeric(col, errors="coerce").fillna(0) > 0
-
-        total = len(df)
-        transfers = int(transfer_mask.sum())
-
-        esc_rate = transfers / total if total > 0 else float("nan")
-        if math.isnan(esc_rate):
-            return float("nan")
-
-        fcr = 100.0 - esc_rate * 100.0
-        return float(max(0.0, min(100.0, round(fcr, 2))))
+        return float("nan")


    def escalation_rate(self) -> float:
@@ -233,20 +237,57 @@ class OperationalPerformanceMetrics:
        """
        % de interacciones abandonadas.

-        Definido como % de filas con abandoned_flag == True.
-        Si la columna no existe, devuelve NaN.
+        Busca en orden: is_abandoned, abandoned_flag, abandoned
+        Si ninguna columna existe, devuelve NaN.
        """
        df = self.df
-        if "abandoned_flag" not in df.columns:
-            return float("nan")
-
        total = len(df)
        if total == 0:
            return float("nan")

-        abandoned = df["abandoned_flag"].sum()
+        # Buscar columna de abandono en orden de prioridad
+        abandon_col = None
+        for col_name in ["is_abandoned", "abandoned_flag", "abandoned"]:
+            if col_name in df.columns:
+                abandon_col = col_name
+                break
+
+        if abandon_col is None:
+            return float("nan")
+
+        col = df[abandon_col]
+
+        # Normalizar a booleano
+        if col.dtype == "O":
+            abandon_mask = (
+                col.astype(str)
+                   .str.strip()
+                   .str.lower()
+                   .isin(["true", "t", "1", "yes", "y", "si", "sí"])
+            )
+        else:
+            abandon_mask = pd.to_numeric(col, errors="coerce").fillna(0) > 0
+
+        abandoned = int(abandon_mask.sum())
        return float(round(abandoned / total * 100, 2))

+    def high_hold_time_rate(self, threshold_seconds: float = 60.0) -> float:
+        """
+        % de interacciones con hold_time > threshold (por defecto 60s).
+
+        Proxy de complejidad: si el agente tuvo que poner en espera al cliente
+        más de 60 segundos, probablemente tuvo que consultar/investigar.
+        """
+        df = self.df
+        total = len(df)
+        if total == 0:
+            return float("nan")
+
+        hold_times = df["hold_time"].fillna(0)
+        high_hold_count = (hold_times > threshold_seconds).sum()
+
+        return float(round(high_hold_count / total * 100, 2))
+
    def recurrence_rate_7d(self) -> float:
        """
        % de clientes que vuelven a contactar en < 7 días.