Initial commit - ACME demo version

This commit is contained in:
sujucu70
2026-02-04 11:08:21 +01:00
commit 1bb0765766
180 changed files with 52249 additions and 0 deletions

View File

@@ -0,0 +1,4 @@
# vacío o con un pequeño comentario
"""
Paquete de API para BeyondCX Heatmap.
"""

View File

@@ -0,0 +1,3 @@
from .analysis import router
__all__ = ["router"]

View File

@@ -0,0 +1,221 @@
from __future__ import annotations
import os
from pathlib import Path
import json
import math
from uuid import uuid4
from typing import Optional, Any, Literal
from fastapi import APIRouter, UploadFile, File, Form, HTTPException, Depends
from fastapi.responses import JSONResponse
from beyond_api.security import get_current_user
from beyond_api.services.analysis_service import run_analysis_collect_json
# Cache paths - same as in cache.py
CACHE_DIR = Path(os.getenv("CACHE_DIR", "/data/cache"))
CACHED_FILE = CACHE_DIR / "cached_data.csv"
router = APIRouter(
prefix="",
tags=["analysis"],
)
def sanitize_for_json(obj: Any) -> Any:
"""
Recorre un objeto (dict/list/escalares) y convierte:
- NaN, +inf, -inf -> None
para que sea JSON-compliant.
"""
if isinstance(obj, float):
if math.isnan(obj) or math.isinf(obj):
return None
return obj
if obj is None or isinstance(obj, (str, int, bool)):
return obj
if isinstance(obj, dict):
return {k: sanitize_for_json(v) for k, v in obj.items()}
if isinstance(obj, (list, tuple)):
return [sanitize_for_json(v) for v in obj]
return str(obj)
@router.post("/analysis")
async def analysis_endpoint(
csv_file: UploadFile = File(...),
economy_json: Optional[str] = Form(default=None),
analysis: Literal["basic", "premium"] = Form(default="premium"),
current_user: str = Depends(get_current_user),
):
"""
Ejecuta el pipeline sobre un CSV subido (multipart/form-data) y devuelve
ÚNICAMENTE un JSON con todos los resultados (incluyendo agentic_readiness).
Parámetro `analysis`:
- "basic": usa una configuración reducida (p.ej. configs/basic.json)
- "premium": usa la configuración completa por defecto
(p.ej. beyond_metrics_config.json), sin romper lo existente.
"""
# Validar `analysis` (por si llega algo raro)
if analysis not in {"basic", "premium"}:
raise HTTPException(
status_code=400,
detail="analysis debe ser 'basic' o 'premium'.",
)
# 1) Parseo de economía (si viene)
economy_data = None
if economy_json:
try:
economy_data = json.loads(economy_json)
except json.JSONDecodeError:
raise HTTPException(
status_code=400,
detail="economy_json no es un JSON válido.",
)
# 2) Guardar el CSV subido en una carpeta de trabajo
base_input_dir = Path("data/input")
base_input_dir.mkdir(parents=True, exist_ok=True)
original_name = csv_file.filename or f"input_{uuid4().hex}.csv"
safe_name = Path(original_name).name # evita rutas con ../
input_path = base_input_dir / safe_name
with input_path.open("wb") as f:
while True:
chunk = await csv_file.read(1024 * 1024) # 1 MB
if not chunk:
break
f.write(chunk)
try:
# 3) Ejecutar el análisis y obtener el JSON en memoria
results_json = run_analysis_collect_json(
input_path=input_path,
economy_data=economy_data,
analysis=analysis, # "basic" o "premium"
company_folder=None,
)
finally:
# 3b) Limpiar el CSV temporal
try:
input_path.unlink(missing_ok=True)
except Exception:
# No queremos romper la respuesta si falla el borrado
pass
# 4) Limpiar NaN/inf para que el JSON sea válido
safe_results = sanitize_for_json(results_json)
# 5) Devolver SOLO JSON
return JSONResponse(
content={
"user": current_user,
"results": safe_results,
}
)
def extract_date_range_from_csv(file_path: Path) -> dict:
"""Extrae el rango de fechas del CSV."""
import pandas as pd
try:
# Leer solo la columna de fecha para eficiencia
df = pd.read_csv(file_path, usecols=['datetime_start'], parse_dates=['datetime_start'])
if 'datetime_start' in df.columns and len(df) > 0:
min_date = df['datetime_start'].min()
max_date = df['datetime_start'].max()
return {
"min": min_date.strftime('%Y-%m-%d') if pd.notna(min_date) else None,
"max": max_date.strftime('%Y-%m-%d') if pd.notna(max_date) else None,
}
except Exception as e:
print(f"Error extracting date range: {e}")
return {"min": None, "max": None}
def count_unique_queues_from_csv(file_path: Path) -> int:
"""Cuenta las colas únicas en el CSV."""
import pandas as pd
try:
df = pd.read_csv(file_path, usecols=['queue_skill'])
if 'queue_skill' in df.columns:
return df['queue_skill'].nunique()
except Exception as e:
print(f"Error counting queues: {e}")
return 0
@router.post("/analysis/cached")
async def analysis_cached_endpoint(
economy_json: Optional[str] = Form(default=None),
analysis: Literal["basic", "premium"] = Form(default="premium"),
current_user: str = Depends(get_current_user),
):
"""
Ejecuta el pipeline sobre el archivo CSV cacheado en el servidor.
Útil para re-analizar sin tener que subir el archivo de nuevo.
"""
# Validar que existe el archivo cacheado
if not CACHED_FILE.exists():
raise HTTPException(
status_code=404,
detail="No hay archivo cacheado en el servidor. Sube un archivo primero.",
)
# Validar `analysis`
if analysis not in {"basic", "premium"}:
raise HTTPException(
status_code=400,
detail="analysis debe ser 'basic' o 'premium'.",
)
# Parseo de economía (si viene)
economy_data = None
if economy_json:
try:
economy_data = json.loads(economy_json)
except json.JSONDecodeError:
raise HTTPException(
status_code=400,
detail="economy_json no es un JSON válido.",
)
# Extraer metadatos del CSV
date_range = extract_date_range_from_csv(CACHED_FILE)
unique_queues = count_unique_queues_from_csv(CACHED_FILE)
try:
# Ejecutar el análisis sobre el archivo cacheado
results_json = run_analysis_collect_json(
input_path=CACHED_FILE,
economy_data=economy_data,
analysis=analysis,
company_folder=None,
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Error ejecutando análisis: {str(e)}",
)
# Limpiar NaN/inf para que el JSON sea válido
safe_results = sanitize_for_json(results_json)
return JSONResponse(
content={
"user": current_user,
"results": safe_results,
"source": "cached",
"dateRange": date_range,
"uniqueQueues": unique_queues,
}
)

View File

@@ -0,0 +1,26 @@
# beyond_api/api/auth.py
from __future__ import annotations
from fastapi import APIRouter, Depends
from fastapi.responses import JSONResponse
from beyond_api.security import get_current_user
router = APIRouter(
prefix="/auth",
tags=["auth"],
)
@router.get("/check")
def check_auth(current_user: str = Depends(get_current_user)):
"""
Endpoint muy simple: si las credenciales Basic son correctas,
devuelve 200 con el usuario. Si no, get_current_user lanza 401.
"""
return JSONResponse(
content={
"user": current_user,
"status": "ok",
}
)

View File

@@ -0,0 +1,288 @@
# beyond_api/api/cache.py
"""
Server-side cache for CSV files.
Stores the uploaded CSV file and metadata for later re-analysis.
"""
from __future__ import annotations
import json
import os
import shutil
import sys
from datetime import datetime
from pathlib import Path
from typing import Any, Optional
from fastapi import APIRouter, Depends, HTTPException, status, UploadFile, File, Form
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from beyond_api.security import get_current_user
router = APIRouter(
prefix="/cache",
tags=["cache"],
)
# Directory for cache files - use platform-appropriate default
def _get_default_cache_dir() -> Path:
"""Get a platform-appropriate default cache directory."""
env_cache_dir = os.getenv("CACHE_DIR")
if env_cache_dir:
return Path(env_cache_dir)
# On Windows, check if C:/data/cache exists (legacy location)
# Otherwise use a local .cache directory relative to the backend
# On Unix/Docker, use /data/cache
if sys.platform == "win32":
# Check legacy location first (for backwards compatibility)
legacy_cache = Path("C:/data/cache")
if legacy_cache.exists():
return legacy_cache
# Fallback to local .cache directory in the backend folder
backend_dir = Path(__file__).parent.parent.parent
return backend_dir / ".cache"
else:
return Path("/data/cache")
CACHE_DIR = _get_default_cache_dir()
CACHED_FILE = CACHE_DIR / "cached_data.csv"
METADATA_FILE = CACHE_DIR / "metadata.json"
DRILLDOWN_FILE = CACHE_DIR / "drilldown_data.json"
# Log cache directory on module load
import logging
logger = logging.getLogger(__name__)
logger.info(f"[Cache] Using cache directory: {CACHE_DIR}")
logger.info(f"[Cache] Drilldown file path: {DRILLDOWN_FILE}")
class CacheMetadata(BaseModel):
fileName: str
fileSize: int
recordCount: int
cachedAt: str
costPerHour: float
def ensure_cache_dir():
"""Create cache directory if it doesn't exist."""
CACHE_DIR.mkdir(parents=True, exist_ok=True)
def count_csv_records(file_path: Path) -> int:
"""Count records in CSV file (excluding header)."""
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
# Count lines minus header
return sum(1 for _ in f) - 1
except Exception:
return 0
@router.get("/check")
def check_cache(current_user: str = Depends(get_current_user)):
"""
Check if there's cached data available.
Returns metadata if cache exists, null otherwise.
"""
if not METADATA_FILE.exists() or not CACHED_FILE.exists():
return JSONResponse(content={"exists": False, "metadata": None})
try:
with open(METADATA_FILE, "r") as f:
metadata = json.load(f)
return JSONResponse(content={"exists": True, "metadata": metadata})
except Exception as e:
return JSONResponse(content={"exists": False, "metadata": None, "error": str(e)})
@router.get("/file")
def get_cached_file_path(current_user: str = Depends(get_current_user)):
"""
Returns the path to the cached CSV file for internal use.
"""
if not CACHED_FILE.exists():
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No cached file found"
)
return JSONResponse(content={"path": str(CACHED_FILE)})
@router.get("/download")
def download_cached_file(current_user: str = Depends(get_current_user)):
"""
Download the cached CSV file for frontend parsing.
Returns the file as a streaming response.
"""
from fastapi.responses import FileResponse
if not CACHED_FILE.exists():
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No cached file found"
)
return FileResponse(
path=CACHED_FILE,
media_type="text/csv",
filename="cached_data.csv"
)
@router.post("/file")
async def save_cached_file(
csv_file: UploadFile = File(...),
fileName: str = Form(...),
fileSize: int = Form(...),
costPerHour: float = Form(...),
current_user: str = Depends(get_current_user)
):
"""
Save uploaded CSV file to server cache.
"""
ensure_cache_dir()
try:
# Save the CSV file
with open(CACHED_FILE, "wb") as f:
while True:
chunk = await csv_file.read(1024 * 1024) # 1 MB chunks
if not chunk:
break
f.write(chunk)
# Count records
record_count = count_csv_records(CACHED_FILE)
# Save metadata
metadata = {
"fileName": fileName,
"fileSize": fileSize,
"recordCount": record_count,
"cachedAt": datetime.now().isoformat(),
"costPerHour": costPerHour,
}
with open(METADATA_FILE, "w") as f:
json.dump(metadata, f)
return JSONResponse(content={
"success": True,
"message": f"Cached file with {record_count} records",
"metadata": metadata
})
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error saving cache: {str(e)}"
)
@router.get("/drilldown")
def get_cached_drilldown(current_user: str = Depends(get_current_user)):
"""
Get the cached drilldownData JSON.
Returns the pre-calculated drilldown data for fast cache usage.
"""
logger.info(f"[Cache] GET /drilldown - checking file: {DRILLDOWN_FILE}")
logger.info(f"[Cache] File exists: {DRILLDOWN_FILE.exists()}")
if not DRILLDOWN_FILE.exists():
logger.warning(f"[Cache] Drilldown file not found at: {DRILLDOWN_FILE}")
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No cached drilldown data found"
)
try:
with open(DRILLDOWN_FILE, "r", encoding="utf-8") as f:
drilldown_data = json.load(f)
logger.info(f"[Cache] Loaded drilldown with {len(drilldown_data)} skills")
return JSONResponse(content={"success": True, "drilldownData": drilldown_data})
except Exception as e:
logger.error(f"[Cache] Error reading drilldown: {e}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error reading drilldown data: {str(e)}"
)
@router.post("/drilldown")
async def save_cached_drilldown(
drilldown_json: str = Form(...),
current_user: str = Depends(get_current_user)
):
"""
Save drilldownData JSON to server cache.
Called by frontend after calculating drilldown from uploaded file.
Receives JSON as form field.
"""
logger.info(f"[Cache] POST /drilldown - saving to: {DRILLDOWN_FILE}")
logger.info(f"[Cache] Cache directory: {CACHE_DIR}")
ensure_cache_dir()
logger.info(f"[Cache] Cache dir exists after ensure: {CACHE_DIR.exists()}")
try:
# Parse and validate JSON
drilldown_data = json.loads(drilldown_json)
logger.info(f"[Cache] Parsed drilldown JSON with {len(drilldown_data)} skills")
# Save to file
with open(DRILLDOWN_FILE, "w", encoding="utf-8") as f:
json.dump(drilldown_data, f)
logger.info(f"[Cache] Drilldown saved successfully, file exists: {DRILLDOWN_FILE.exists()}")
return JSONResponse(content={
"success": True,
"message": f"Cached drilldown data with {len(drilldown_data)} skills"
})
except json.JSONDecodeError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid JSON: {str(e)}"
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error saving drilldown data: {str(e)}"
)
@router.delete("/file")
def clear_cache(current_user: str = Depends(get_current_user)):
"""
Clear the server-side cache (CSV, metadata, and drilldown data).
"""
try:
if CACHED_FILE.exists():
CACHED_FILE.unlink()
if METADATA_FILE.exists():
METADATA_FILE.unlink()
if DRILLDOWN_FILE.exists():
DRILLDOWN_FILE.unlink()
return JSONResponse(content={"success": True, "message": "Cache cleared"})
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error clearing cache: {str(e)}"
)
# Keep old endpoints for backwards compatibility but mark as deprecated
@router.get("/interactions")
def get_cached_interactions_deprecated(current_user: str = Depends(get_current_user)):
"""DEPRECATED: Use /cache/file instead."""
raise HTTPException(
status_code=status.HTTP_410_GONE,
detail="This endpoint is deprecated. Use /cache/file with re-analysis instead."
)
@router.post("/interactions")
def save_cached_interactions_deprecated(current_user: str = Depends(get_current_user)):
"""DEPRECATED: Use /cache/file instead."""
raise HTTPException(
status_code=status.HTTP_410_GONE,
detail="This endpoint is deprecated. Use /cache/file instead."
)

View File

@@ -0,0 +1,37 @@
import logging
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
# importa tus routers
from beyond_api.api.analysis import router as analysis_router
from beyond_api.api.auth import router as auth_router
from beyond_api.api.cache import router as cache_router
def setup_basic_logging() -> None:
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s [%(name)s] %(message)s",
)
setup_basic_logging()
app = FastAPI()
origins = [
"http://localhost:3000",
"http://localhost:3001",
"http://127.0.0.1:3000",
"http://127.0.0.1:3001",
]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.include_router(analysis_router)
app.include_router(auth_router)
app.include_router(cache_router)

View File

@@ -0,0 +1,37 @@
from __future__ import annotations
import os
import secrets
from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPBasic, HTTPBasicCredentials
# auto_error=False para que no dispare el popup nativo del navegador automáticamente
security = HTTPBasic(auto_error=False)
# En producción: export BASIC_AUTH_USERNAME y BASIC_AUTH_PASSWORD.
BASIC_USER = os.getenv("BASIC_AUTH_USERNAME", "beyond")
BASIC_PASS = os.getenv("BASIC_AUTH_PASSWORD", "beyond2026")
def get_current_user(credentials: HTTPBasicCredentials | None = Depends(security)) -> str:
"""
Valida el usuario/contraseña vía HTTP Basic.
NO envía WWW-Authenticate para evitar el popup nativo del navegador
(el frontend tiene su propio formulario de login).
"""
if credentials is None:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Credenciales requeridas",
)
correct_username = secrets.compare_digest(credentials.username, BASIC_USER)
correct_password = secrets.compare_digest(credentials.password, BASIC_PASS)
if not (correct_username and correct_password):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Credenciales incorrectas",
)
return credentials.username

View File

View File

@@ -0,0 +1,262 @@
from __future__ import annotations
from pathlib import Path
from uuid import uuid4
from datetime import datetime
from typing import Optional, Literal
import json
import zipfile
from beyond_metrics.io import LocalDataSource, LocalResultsSink, ResultsSink
from beyond_metrics.pipeline import build_pipeline
from beyond_metrics.dimensions.EconomyCost import EconomyConfig
from beyond_flows.scorers import AgenticScorer
from typing import Any, Mapping, Optional, Dict
def _build_economy_config(economy_data: Optional[Mapping[str, Any]]) -> EconomyConfig:
"""
Construye EconomyConfig validando tipos y evitando que el type checker
mezcle floats y dicts en un solo diccionario.
"""
# Valores por defecto
default_customer_segments: Dict[str, str] = {
"VIP": "high",
"Premium": "high",
"Soporte_General": "medium",
"Ventas": "medium",
"Basico": "low",
}
if economy_data is None:
return EconomyConfig(
labor_cost_per_hour=20.0,
overhead_rate=0.10,
tech_costs_annual=5000.0,
automation_cpi=0.20,
automation_volume_share=0.5,
automation_success_rate=0.6,
customer_segments=default_customer_segments,
)
def _get_float(field: str, default: float) -> float:
value = economy_data.get(field, default)
if isinstance(value, (int, float)):
return float(value)
raise ValueError(f"El campo '{field}' debe ser numérico (float). Valor recibido: {value!r}")
# Campos escalares
labor_cost_per_hour = _get_float("labor_cost_per_hour", 20.0)
overhead_rate = _get_float("overhead_rate", 0.10)
tech_costs_annual = _get_float("tech_costs_annual", 5000.0)
automation_cpi = _get_float("automation_cpi", 0.20)
automation_volume_share = _get_float("automation_volume_share", 0.5)
automation_success_rate = _get_float("automation_success_rate", 0.6)
# customer_segments puede venir o no; si viene, validarlo
customer_segments: Dict[str, str] = dict(default_customer_segments)
if "customer_segments" in economy_data and economy_data["customer_segments"] is not None:
cs = economy_data["customer_segments"]
if not isinstance(cs, Mapping):
raise ValueError("customer_segments debe ser un diccionario {segment: level}")
for k, v in cs.items():
if not isinstance(v, str):
raise ValueError(
f"El valor de customer_segments['{k}'] debe ser str. Valor recibido: {v!r}"
)
customer_segments[str(k)] = v
return EconomyConfig(
labor_cost_per_hour=labor_cost_per_hour,
overhead_rate=overhead_rate,
tech_costs_annual=tech_costs_annual,
automation_cpi=automation_cpi,
automation_volume_share=automation_volume_share,
automation_success_rate=automation_success_rate,
customer_segments=customer_segments,
)
def run_analysis(
input_path: Path,
economy_data: Optional[dict] = None,
return_type: Literal["path", "zip"] = "path",
company_folder: Optional[str] = None,
) -> tuple[Path, Optional[Path]]:
"""
Ejecuta el pipeline sobre un CSV y devuelve:
- (results_dir, None) si return_type == "path"
- (results_dir, zip_path) si return_type == "zip"
input_path puede ser absoluto o relativo, pero los resultados
se escribirán SIEMPRE en la carpeta del CSV, dentro de una
subcarpeta con nombre = timestamp (y opcionalmente prefijada
por company_folder).
"""
input_path = input_path.resolve()
if not input_path.exists():
raise FileNotFoundError(f"El CSV no existe: {input_path}")
if not input_path.is_file():
raise ValueError(f"La ruta no apunta a un fichero CSV: {input_path}")
# Carpeta donde está el CSV
csv_dir = input_path.parent
# DataSource y ResultsSink apuntan a ESA carpeta
datasource = LocalDataSource(base_dir=str(csv_dir))
sink = LocalResultsSink(base_dir=str(csv_dir))
# Config de economía
economy_cfg = _build_economy_config(economy_data)
dimension_params: Dict[str, Mapping[str, Any]] = {
"economy_costs": {
"config": economy_cfg,
}
}
# Callback de scoring
def agentic_post_run(results: Dict[str, Any], run_base: str, sink_: ResultsSink) -> None:
scorer = AgenticScorer()
try:
agentic = scorer.compute_and_return(results)
except Exception as e:
# No rompemos toda la ejecución si el scorer falla
agentic = {
"error": f"{type(e).__name__}: {e}",
}
sink_.write_json(f"{run_base}/agentic_readiness.json", agentic)
pipeline = build_pipeline(
dimensions_config_path="beyond_metrics/configs/beyond_metrics_config.json",
datasource=datasource,
sink=sink,
dimension_params=dimension_params,
post_run=[agentic_post_run],
)
# Timestamp de ejecución (nombre de la carpeta de resultados)
timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
# Ruta lógica de resultados (RELATIVA al base_dir del sink)
if company_folder:
# Ej: "Cliente_X/20251208-153045"
run_dir_rel = f"{company_folder.rstrip('/')}/{timestamp}"
else:
# Ej: "20251208-153045"
run_dir_rel = timestamp
# Ejecutar pipeline: el CSV se pasa relativo a csv_dir
pipeline.run(
input_path=input_path.name,
run_dir=run_dir_rel,
)
# Carpeta real con los resultados
results_dir = csv_dir / run_dir_rel
if return_type == "path":
return results_dir, None
# --- ZIP de resultados -------------------------------------------------
# Creamos el ZIP en la MISMA carpeta del CSV, con nombre basado en run_dir
zip_name = f"{run_dir_rel.replace('/', '_')}.zip"
zip_path = csv_dir / zip_name
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
for file in results_dir.rglob("*"):
if file.is_file():
# Lo guardamos relativo a la carpeta de resultados
arcname = file.relative_to(results_dir.parent)
zipf.write(file, arcname)
return results_dir, zip_path
from typing import Any, Mapping, Dict # asegúrate de tener estos imports arriba
def run_analysis_collect_json(
input_path: Path,
economy_data: Optional[dict] = None,
analysis: Literal["basic", "premium"] = "premium",
company_folder: Optional[str] = None,
) -> Dict[str, Any]:
"""
Ejecuta el pipeline y devuelve un único JSON con todos los resultados.
A diferencia de run_analysis:
- NO escribe results.json
- NO escribe agentic_readiness.json
- agentic_readiness se incrusta en el dict de resultados
El parámetro `analysis` permite elegir el nivel de análisis:
- "basic" -> beyond_metrics/configs/basic.json
- "premium" -> beyond_metrics/configs/beyond_metrics_config.json
"""
# Normalizamos y validamos la ruta del CSV
input_path = input_path.resolve()
if not input_path.exists():
raise FileNotFoundError(f"El CSV no existe: {input_path}")
if not input_path.is_file():
raise ValueError(f"La ruta no apunta a un fichero CSV: {input_path}")
# Carpeta donde está el CSV
csv_dir = input_path.parent
# DataSource y ResultsSink apuntan a ESA carpeta
datasource = LocalDataSource(base_dir=str(csv_dir))
sink = LocalResultsSink(base_dir=str(csv_dir))
# Config de economía
economy_cfg = _build_economy_config(economy_data)
dimension_params: Dict[str, Mapping[str, Any]] = {
"economy_costs": {
"config": economy_cfg,
}
}
# Elegimos el fichero de configuración de dimensiones según `analysis`
if analysis == "basic":
dimensions_config_path = "beyond_metrics/configs/basic.json"
else:
dimensions_config_path = "beyond_metrics/configs/beyond_metrics_config.json"
# Callback post-run: añadir agentic_readiness al JSON final (sin escribir ficheros)
def agentic_post_run(results: Dict[str, Any], run_base: str, sink_: ResultsSink) -> None:
scorer = AgenticScorer()
try:
agentic = scorer.compute_and_return(results)
except Exception as e:
agentic = {"error": f"{type(e).__name__}: {e}"}
results["agentic_readiness"] = agentic
pipeline = build_pipeline(
dimensions_config_path=dimensions_config_path,
datasource=datasource,
sink=sink,
dimension_params=dimension_params,
post_run=[agentic_post_run],
)
# Timestamp de ejecución (para separar posibles artefactos como plots)
timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
if company_folder:
run_dir_rel = f"{company_folder.rstrip('/')}/{timestamp}"
else:
run_dir_rel = timestamp
# Ejecutar pipeline sin escribir results.json
results = pipeline.run(
input_path=input_path.name,
run_dir=run_dir_rel,
write_results_json=False,
)
return results