Files
BeyondCXAnalytics-Demo/backend/beyond_metrics/dimensions/Volumetria.py
Claude 9caa382010 Translate Phase 3 low-priority backend files (complete Spanish-to-English translation)
Phase 3 of Spanish-to-English translation for low-priority backend files:

Backend core modules (4 files):
- Volumetria.py: Translated ~15 occurrences (docstrings, comments, plot labels, day abbreviations)
- agent.py: Translated ~15 occurrences (system prompts, docstrings, error messages)
- pipeline.py: Translated ~10 occurrences (log messages, docstrings, comments)
- analysis_service.py: Translated ~10 occurrences (docstrings, error messages, comments)

All function names, class names, and variable names preserved for API compatibility.
Frontend and backend compilation tested and verified successful.

This completes the comprehensive Spanish-to-English translation project:
- Phase 1 (High Priority): 3 files - backendMapper.ts, analysisGenerator.ts, realDataAnalysis.ts
- Phase 2 (Medium Priority): 5 files - dataTransformation.ts, segmentClassifier.ts, + 3 dimension files
- Phase 3 (Low Priority): 4 files - Volumetria.py, agent.py, pipeline.py, analysis_service.py

Total files translated: 12 files (5 frontend TypeScript + 7 backend Python)
All critical path translations complete.

Related to TRANSLATION_STATUS.md Phase 3 completion.

https://claude.ai/code/session_01GNbnkFoESkRcnPr3bLCYDg
2026-02-07 11:15:47 +00:00

269 lines
7.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
from dataclasses import dataclass
from typing import List
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.axes import Axes
REQUIRED_COLUMNS_VOLUMETRIA: List[str] = [
"interaction_id",
"datetime_start",
"queue_skill",
"channel",
]
@dataclass
class VolumetriaMetrics:
"""
Volumetry metrics based on the new data schema.
Minimum required columns:
- interaction_id
- datetime_start
- queue_skill
- channel
Other columns may exist but are not required for these metrics.
"""
df: pd.DataFrame
def __post_init__(self) -> None:
self._validate_columns()
self._prepare_data()
# ------------------------------------------------------------------ #
# Internal helpers
# ------------------------------------------------------------------ #
def _validate_columns(self) -> None:
missing = [c for c in REQUIRED_COLUMNS_VOLUMETRIA if c not in self.df.columns]
if missing:
raise ValueError(
f"Missing required columns for VolumetriaMetrics: {missing}"
)
def _prepare_data(self) -> None:
df = self.df.copy()
# Ensure datetime type
df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
# Normalize strings
df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
df["channel"] = df["channel"].astype(str).str.strip()
# Store the prepared dataframe
self.df = df
# ------------------------------------------------------------------ #
# Useful properties
# ------------------------------------------------------------------ #
@property
def is_empty(self) -> bool:
return self.df.empty
# ------------------------------------------------------------------ #
# Numeric / tabular metrics
# ------------------------------------------------------------------ #
def volume_by_channel(self) -> pd.Series:
"""
Number of interactions by channel.
"""
return self.df.groupby("channel")["interaction_id"].nunique().sort_values(
ascending=False
)
def volume_by_skill(self) -> pd.Series:
"""
Number of interactions by skill / queue.
"""
return self.df.groupby("queue_skill")["interaction_id"].nunique().sort_values(
ascending=False
)
def channel_distribution_pct(self) -> pd.Series:
"""
Percentage distribution of volume by channel.
"""
counts = self.volume_by_channel()
total = counts.sum()
if total == 0:
return counts * 0.0
return (counts / total * 100).round(2)
def skill_distribution_pct(self) -> pd.Series:
"""
Percentage distribution of volume by skill.
"""
counts = self.volume_by_skill()
total = counts.sum()
if total == 0:
return counts * 0.0
return (counts / total * 100).round(2)
def heatmap_24x7(self) -> pd.DataFrame:
"""
Matrix [day_of_week x hour] with number of interactions.
dayofweek: 0=Monday ... 6=Sunday
"""
df = self.df.dropna(subset=["datetime_start"]).copy()
if df.empty:
# Return an empty dataframe with expected index/columns
idx = range(7)
cols = range(24)
return pd.DataFrame(0, index=idx, columns=cols)
df["dow"] = df["datetime_start"].dt.dayofweek
df["hour"] = df["datetime_start"].dt.hour
pivot = (
df.pivot_table(
index="dow",
columns="hour",
values="interaction_id",
aggfunc="nunique",
fill_value=0,
)
.reindex(index=range(7), fill_value=0)
.reindex(columns=range(24), fill_value=0)
)
return pivot
def monthly_seasonality_cv(self) -> float:
"""
Coefficient of variation of monthly volume.
CV = std / mean (in %).
"""
df = self.df.dropna(subset=["datetime_start"]).copy()
if df.empty:
return float("nan")
df["year_month"] = df["datetime_start"].dt.to_period("M")
monthly_counts = (
df.groupby("year_month")["interaction_id"].nunique().astype(float)
)
if len(monthly_counts) < 2:
return float("nan")
mean = monthly_counts.mean()
std = monthly_counts.std(ddof=1)
if mean == 0:
return float("nan")
return float(round(std / mean * 100, 2))
def peak_offpeak_ratio(self) -> float:
"""
Volume ratio between peak and off-peak hours.
We define peak as hours 10:0019:59, rest as off-peak.
"""
df = self.df.dropna(subset=["datetime_start"]).copy()
if df.empty:
return float("nan")
df["hour"] = df["datetime_start"].dt.hour
peak_hours = list(range(10, 20))
is_peak = df["hour"].isin(peak_hours)
peak_vol = df.loc[is_peak, "interaction_id"].nunique()
off_vol = df.loc[~is_peak, "interaction_id"].nunique()
if off_vol == 0:
return float("inf") if peak_vol > 0 else float("nan")
return float(round(peak_vol / off_vol, 3))
def concentration_top20_skills_pct(self) -> float:
"""
% of volume concentrated in the top 20% of skills (by number of interactions).
"""
counts = (
self.df.groupby("queue_skill")["interaction_id"].nunique().sort_values(
ascending=False
)
)
n_skills = len(counts)
if n_skills == 0:
return float("nan")
top_n = max(1, int(np.ceil(0.2 * n_skills)))
top_vol = counts.head(top_n).sum()
total = counts.sum()
if total == 0:
return float("nan")
return float(round(top_vol / total * 100, 2))
# ------------------------------------------------------------------ #
# Plots
# ------------------------------------------------------------------ #
def plot_heatmap_24x7(self) -> Axes:
"""
Heatmap of volume by day of week (0-6) and hour (0-23).
Returns Axes so the pipeline can save the figure.
"""
data = self.heatmap_24x7()
fig, ax = plt.subplots(figsize=(10, 4))
im = ax.imshow(data.values, aspect="auto", origin="lower")
ax.set_xticks(range(24))
ax.set_xticklabels([str(h) for h in range(24)])
ax.set_yticks(range(7))
ax.set_yticklabels(["M", "T", "W", "T", "F", "S", "S"])
ax.set_xlabel("Hour of day")
ax.set_ylabel("Day of week")
ax.set_title("Volume by day of week and hour")
plt.colorbar(im, ax=ax, label="# interactions")
return ax
def plot_channel_distribution(self) -> Axes:
"""
Volume distribution by channel.
"""
series = self.volume_by_channel()
fig, ax = plt.subplots(figsize=(6, 4))
series.plot(kind="bar", ax=ax)
ax.set_xlabel("Channel")
ax.set_ylabel("# interactions")
ax.set_title("Volume by channel")
ax.grid(axis="y", alpha=0.3)
return ax
def plot_skill_pareto(self) -> Axes:
"""
Simple Pareto chart of volume by skill (volume bars only).
"""
series = self.volume_by_skill()
fig, ax = plt.subplots(figsize=(10, 4))
series.plot(kind="bar", ax=ax)
ax.set_xlabel("Skill / Queue")
ax.set_ylabel("# interactions")
ax.set_title("Pareto chart of volume by skill")
ax.grid(axis="y", alpha=0.3)
plt.xticks(rotation=45, ha="right")
return ax