Initial commit - ACME demo version
This commit is contained in:
0
backend/beyond_api/services/__init__.py
Normal file
0
backend/beyond_api/services/__init__.py
Normal file
262
backend/beyond_api/services/analysis_service.py
Normal file
262
backend/beyond_api/services/analysis_service.py
Normal file
@@ -0,0 +1,262 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
from datetime import datetime
|
||||
from typing import Optional, Literal
|
||||
import json
|
||||
import zipfile
|
||||
|
||||
from beyond_metrics.io import LocalDataSource, LocalResultsSink, ResultsSink
|
||||
from beyond_metrics.pipeline import build_pipeline
|
||||
from beyond_metrics.dimensions.EconomyCost import EconomyConfig
|
||||
from beyond_flows.scorers import AgenticScorer
|
||||
|
||||
from typing import Any, Mapping, Optional, Dict
|
||||
|
||||
|
||||
def _build_economy_config(economy_data: Optional[Mapping[str, Any]]) -> EconomyConfig:
|
||||
"""
|
||||
Construye EconomyConfig validando tipos y evitando que el type checker
|
||||
mezcle floats y dicts en un solo diccionario.
|
||||
"""
|
||||
|
||||
# Valores por defecto
|
||||
default_customer_segments: Dict[str, str] = {
|
||||
"VIP": "high",
|
||||
"Premium": "high",
|
||||
"Soporte_General": "medium",
|
||||
"Ventas": "medium",
|
||||
"Basico": "low",
|
||||
}
|
||||
|
||||
if economy_data is None:
|
||||
return EconomyConfig(
|
||||
labor_cost_per_hour=20.0,
|
||||
overhead_rate=0.10,
|
||||
tech_costs_annual=5000.0,
|
||||
automation_cpi=0.20,
|
||||
automation_volume_share=0.5,
|
||||
automation_success_rate=0.6,
|
||||
customer_segments=default_customer_segments,
|
||||
)
|
||||
|
||||
def _get_float(field: str, default: float) -> float:
|
||||
value = economy_data.get(field, default)
|
||||
if isinstance(value, (int, float)):
|
||||
return float(value)
|
||||
raise ValueError(f"El campo '{field}' debe ser numérico (float). Valor recibido: {value!r}")
|
||||
|
||||
# Campos escalares
|
||||
labor_cost_per_hour = _get_float("labor_cost_per_hour", 20.0)
|
||||
overhead_rate = _get_float("overhead_rate", 0.10)
|
||||
tech_costs_annual = _get_float("tech_costs_annual", 5000.0)
|
||||
automation_cpi = _get_float("automation_cpi", 0.20)
|
||||
automation_volume_share = _get_float("automation_volume_share", 0.5)
|
||||
automation_success_rate = _get_float("automation_success_rate", 0.6)
|
||||
|
||||
# customer_segments puede venir o no; si viene, validarlo
|
||||
customer_segments: Dict[str, str] = dict(default_customer_segments)
|
||||
if "customer_segments" in economy_data and economy_data["customer_segments"] is not None:
|
||||
cs = economy_data["customer_segments"]
|
||||
if not isinstance(cs, Mapping):
|
||||
raise ValueError("customer_segments debe ser un diccionario {segment: level}")
|
||||
for k, v in cs.items():
|
||||
if not isinstance(v, str):
|
||||
raise ValueError(
|
||||
f"El valor de customer_segments['{k}'] debe ser str. Valor recibido: {v!r}"
|
||||
)
|
||||
customer_segments[str(k)] = v
|
||||
|
||||
return EconomyConfig(
|
||||
labor_cost_per_hour=labor_cost_per_hour,
|
||||
overhead_rate=overhead_rate,
|
||||
tech_costs_annual=tech_costs_annual,
|
||||
automation_cpi=automation_cpi,
|
||||
automation_volume_share=automation_volume_share,
|
||||
automation_success_rate=automation_success_rate,
|
||||
customer_segments=customer_segments,
|
||||
)
|
||||
|
||||
|
||||
def run_analysis(
|
||||
input_path: Path,
|
||||
economy_data: Optional[dict] = None,
|
||||
return_type: Literal["path", "zip"] = "path",
|
||||
company_folder: Optional[str] = None,
|
||||
) -> tuple[Path, Optional[Path]]:
|
||||
"""
|
||||
Ejecuta el pipeline sobre un CSV y devuelve:
|
||||
- (results_dir, None) si return_type == "path"
|
||||
- (results_dir, zip_path) si return_type == "zip"
|
||||
|
||||
input_path puede ser absoluto o relativo, pero los resultados
|
||||
se escribirán SIEMPRE en la carpeta del CSV, dentro de una
|
||||
subcarpeta con nombre = timestamp (y opcionalmente prefijada
|
||||
por company_folder).
|
||||
"""
|
||||
|
||||
input_path = input_path.resolve()
|
||||
|
||||
if not input_path.exists():
|
||||
raise FileNotFoundError(f"El CSV no existe: {input_path}")
|
||||
if not input_path.is_file():
|
||||
raise ValueError(f"La ruta no apunta a un fichero CSV: {input_path}")
|
||||
|
||||
# Carpeta donde está el CSV
|
||||
csv_dir = input_path.parent
|
||||
|
||||
# DataSource y ResultsSink apuntan a ESA carpeta
|
||||
datasource = LocalDataSource(base_dir=str(csv_dir))
|
||||
sink = LocalResultsSink(base_dir=str(csv_dir))
|
||||
|
||||
# Config de economía
|
||||
economy_cfg = _build_economy_config(economy_data)
|
||||
|
||||
dimension_params: Dict[str, Mapping[str, Any]] = {
|
||||
"economy_costs": {
|
||||
"config": economy_cfg,
|
||||
}
|
||||
}
|
||||
|
||||
# Callback de scoring
|
||||
def agentic_post_run(results: Dict[str, Any], run_base: str, sink_: ResultsSink) -> None:
|
||||
scorer = AgenticScorer()
|
||||
try:
|
||||
agentic = scorer.compute_and_return(results)
|
||||
except Exception as e:
|
||||
# No rompemos toda la ejecución si el scorer falla
|
||||
agentic = {
|
||||
"error": f"{type(e).__name__}: {e}",
|
||||
}
|
||||
sink_.write_json(f"{run_base}/agentic_readiness.json", agentic)
|
||||
|
||||
pipeline = build_pipeline(
|
||||
dimensions_config_path="beyond_metrics/configs/beyond_metrics_config.json",
|
||||
datasource=datasource,
|
||||
sink=sink,
|
||||
dimension_params=dimension_params,
|
||||
post_run=[agentic_post_run],
|
||||
)
|
||||
|
||||
# Timestamp de ejecución (nombre de la carpeta de resultados)
|
||||
timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
|
||||
|
||||
# Ruta lógica de resultados (RELATIVA al base_dir del sink)
|
||||
if company_folder:
|
||||
# Ej: "Cliente_X/20251208-153045"
|
||||
run_dir_rel = f"{company_folder.rstrip('/')}/{timestamp}"
|
||||
else:
|
||||
# Ej: "20251208-153045"
|
||||
run_dir_rel = timestamp
|
||||
|
||||
# Ejecutar pipeline: el CSV se pasa relativo a csv_dir
|
||||
pipeline.run(
|
||||
input_path=input_path.name,
|
||||
run_dir=run_dir_rel,
|
||||
)
|
||||
|
||||
# Carpeta real con los resultados
|
||||
results_dir = csv_dir / run_dir_rel
|
||||
|
||||
if return_type == "path":
|
||||
return results_dir, None
|
||||
|
||||
# --- ZIP de resultados -------------------------------------------------
|
||||
# Creamos el ZIP en la MISMA carpeta del CSV, con nombre basado en run_dir
|
||||
zip_name = f"{run_dir_rel.replace('/', '_')}.zip"
|
||||
zip_path = csv_dir / zip_name
|
||||
|
||||
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
|
||||
for file in results_dir.rglob("*"):
|
||||
if file.is_file():
|
||||
# Lo guardamos relativo a la carpeta de resultados
|
||||
arcname = file.relative_to(results_dir.parent)
|
||||
zipf.write(file, arcname)
|
||||
|
||||
return results_dir, zip_path
|
||||
|
||||
|
||||
from typing import Any, Mapping, Dict # asegúrate de tener estos imports arriba
|
||||
|
||||
|
||||
def run_analysis_collect_json(
|
||||
input_path: Path,
|
||||
economy_data: Optional[dict] = None,
|
||||
analysis: Literal["basic", "premium"] = "premium",
|
||||
company_folder: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Ejecuta el pipeline y devuelve un único JSON con todos los resultados.
|
||||
|
||||
A diferencia de run_analysis:
|
||||
- NO escribe results.json
|
||||
- NO escribe agentic_readiness.json
|
||||
- agentic_readiness se incrusta en el dict de resultados
|
||||
|
||||
El parámetro `analysis` permite elegir el nivel de análisis:
|
||||
- "basic" -> beyond_metrics/configs/basic.json
|
||||
- "premium" -> beyond_metrics/configs/beyond_metrics_config.json
|
||||
"""
|
||||
|
||||
# Normalizamos y validamos la ruta del CSV
|
||||
input_path = input_path.resolve()
|
||||
if not input_path.exists():
|
||||
raise FileNotFoundError(f"El CSV no existe: {input_path}")
|
||||
if not input_path.is_file():
|
||||
raise ValueError(f"La ruta no apunta a un fichero CSV: {input_path}")
|
||||
|
||||
# Carpeta donde está el CSV
|
||||
csv_dir = input_path.parent
|
||||
|
||||
# DataSource y ResultsSink apuntan a ESA carpeta
|
||||
datasource = LocalDataSource(base_dir=str(csv_dir))
|
||||
sink = LocalResultsSink(base_dir=str(csv_dir))
|
||||
|
||||
# Config de economía
|
||||
economy_cfg = _build_economy_config(economy_data)
|
||||
|
||||
dimension_params: Dict[str, Mapping[str, Any]] = {
|
||||
"economy_costs": {
|
||||
"config": economy_cfg,
|
||||
}
|
||||
}
|
||||
|
||||
# Elegimos el fichero de configuración de dimensiones según `analysis`
|
||||
if analysis == "basic":
|
||||
dimensions_config_path = "beyond_metrics/configs/basic.json"
|
||||
else:
|
||||
dimensions_config_path = "beyond_metrics/configs/beyond_metrics_config.json"
|
||||
|
||||
# Callback post-run: añadir agentic_readiness al JSON final (sin escribir ficheros)
|
||||
def agentic_post_run(results: Dict[str, Any], run_base: str, sink_: ResultsSink) -> None:
|
||||
scorer = AgenticScorer()
|
||||
try:
|
||||
agentic = scorer.compute_and_return(results)
|
||||
except Exception as e:
|
||||
agentic = {"error": f"{type(e).__name__}: {e}"}
|
||||
results["agentic_readiness"] = agentic
|
||||
|
||||
pipeline = build_pipeline(
|
||||
dimensions_config_path=dimensions_config_path,
|
||||
datasource=datasource,
|
||||
sink=sink,
|
||||
dimension_params=dimension_params,
|
||||
post_run=[agentic_post_run],
|
||||
)
|
||||
|
||||
# Timestamp de ejecución (para separar posibles artefactos como plots)
|
||||
timestamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
|
||||
if company_folder:
|
||||
run_dir_rel = f"{company_folder.rstrip('/')}/{timestamp}"
|
||||
else:
|
||||
run_dir_rel = timestamp
|
||||
|
||||
# Ejecutar pipeline sin escribir results.json
|
||||
results = pipeline.run(
|
||||
input_path=input_path.name,
|
||||
run_dir=run_dir_rel,
|
||||
write_results_json=False,
|
||||
)
|
||||
|
||||
return results
|
||||
Reference in New Issue
Block a user