Phase 3 of Spanish-to-English translation for low-priority backend files: Backend core modules (4 files): - Volumetria.py: Translated ~15 occurrences (docstrings, comments, plot labels, day abbreviations) - agent.py: Translated ~15 occurrences (system prompts, docstrings, error messages) - pipeline.py: Translated ~10 occurrences (log messages, docstrings, comments) - analysis_service.py: Translated ~10 occurrences (docstrings, error messages, comments) All function names, class names, and variable names preserved for API compatibility. Frontend and backend compilation tested and verified successful. This completes the comprehensive Spanish-to-English translation project: - Phase 1 (High Priority): 3 files - backendMapper.ts, analysisGenerator.ts, realDataAnalysis.ts - Phase 2 (Medium Priority): 5 files - dataTransformation.ts, segmentClassifier.ts, + 3 dimension files - Phase 3 (Low Priority): 4 files - Volumetria.py, agent.py, pipeline.py, analysis_service.py Total files translated: 12 files (5 frontend TypeScript + 7 backend Python) All critical path translations complete. Related to TRANSLATION_STATUS.md Phase 3 completion. https://claude.ai/code/session_01GNbnkFoESkRcnPr3bLCYDg
269 lines
7.7 KiB
Python
269 lines
7.7 KiB
Python
from __future__ import annotations
|
||
|
||
from dataclasses import dataclass
|
||
from typing import List
|
||
|
||
import numpy as np
|
||
import pandas as pd
|
||
import matplotlib.pyplot as plt
|
||
from matplotlib.axes import Axes
|
||
|
||
|
||
REQUIRED_COLUMNS_VOLUMETRIA: List[str] = [
|
||
"interaction_id",
|
||
"datetime_start",
|
||
"queue_skill",
|
||
"channel",
|
||
]
|
||
|
||
|
||
@dataclass
|
||
class VolumetriaMetrics:
|
||
"""
|
||
Volumetry metrics based on the new data schema.
|
||
|
||
Minimum required columns:
|
||
- interaction_id
|
||
- datetime_start
|
||
- queue_skill
|
||
- channel
|
||
|
||
Other columns may exist but are not required for these metrics.
|
||
"""
|
||
|
||
df: pd.DataFrame
|
||
|
||
def __post_init__(self) -> None:
|
||
self._validate_columns()
|
||
self._prepare_data()
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# Internal helpers
|
||
# ------------------------------------------------------------------ #
|
||
def _validate_columns(self) -> None:
|
||
missing = [c for c in REQUIRED_COLUMNS_VOLUMETRIA if c not in self.df.columns]
|
||
if missing:
|
||
raise ValueError(
|
||
f"Missing required columns for VolumetriaMetrics: {missing}"
|
||
)
|
||
|
||
def _prepare_data(self) -> None:
|
||
df = self.df.copy()
|
||
|
||
# Ensure datetime type
|
||
df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce")
|
||
|
||
# Normalize strings
|
||
df["queue_skill"] = df["queue_skill"].astype(str).str.strip()
|
||
df["channel"] = df["channel"].astype(str).str.strip()
|
||
|
||
# Store the prepared dataframe
|
||
self.df = df
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# Useful properties
|
||
# ------------------------------------------------------------------ #
|
||
@property
|
||
def is_empty(self) -> bool:
|
||
return self.df.empty
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# Numeric / tabular metrics
|
||
# ------------------------------------------------------------------ #
|
||
def volume_by_channel(self) -> pd.Series:
|
||
"""
|
||
Number of interactions by channel.
|
||
"""
|
||
return self.df.groupby("channel")["interaction_id"].nunique().sort_values(
|
||
ascending=False
|
||
)
|
||
|
||
def volume_by_skill(self) -> pd.Series:
|
||
"""
|
||
Number of interactions by skill / queue.
|
||
"""
|
||
return self.df.groupby("queue_skill")["interaction_id"].nunique().sort_values(
|
||
ascending=False
|
||
)
|
||
|
||
def channel_distribution_pct(self) -> pd.Series:
|
||
"""
|
||
Percentage distribution of volume by channel.
|
||
"""
|
||
counts = self.volume_by_channel()
|
||
total = counts.sum()
|
||
if total == 0:
|
||
return counts * 0.0
|
||
return (counts / total * 100).round(2)
|
||
|
||
def skill_distribution_pct(self) -> pd.Series:
|
||
"""
|
||
Percentage distribution of volume by skill.
|
||
"""
|
||
counts = self.volume_by_skill()
|
||
total = counts.sum()
|
||
if total == 0:
|
||
return counts * 0.0
|
||
return (counts / total * 100).round(2)
|
||
|
||
def heatmap_24x7(self) -> pd.DataFrame:
|
||
"""
|
||
Matrix [day_of_week x hour] with number of interactions.
|
||
dayofweek: 0=Monday ... 6=Sunday
|
||
"""
|
||
df = self.df.dropna(subset=["datetime_start"]).copy()
|
||
if df.empty:
|
||
# Return an empty dataframe with expected index/columns
|
||
idx = range(7)
|
||
cols = range(24)
|
||
return pd.DataFrame(0, index=idx, columns=cols)
|
||
|
||
df["dow"] = df["datetime_start"].dt.dayofweek
|
||
df["hour"] = df["datetime_start"].dt.hour
|
||
|
||
pivot = (
|
||
df.pivot_table(
|
||
index="dow",
|
||
columns="hour",
|
||
values="interaction_id",
|
||
aggfunc="nunique",
|
||
fill_value=0,
|
||
)
|
||
.reindex(index=range(7), fill_value=0)
|
||
.reindex(columns=range(24), fill_value=0)
|
||
)
|
||
|
||
return pivot
|
||
|
||
def monthly_seasonality_cv(self) -> float:
|
||
"""
|
||
Coefficient of variation of monthly volume.
|
||
CV = std / mean (in %).
|
||
"""
|
||
df = self.df.dropna(subset=["datetime_start"]).copy()
|
||
if df.empty:
|
||
return float("nan")
|
||
|
||
df["year_month"] = df["datetime_start"].dt.to_period("M")
|
||
monthly_counts = (
|
||
df.groupby("year_month")["interaction_id"].nunique().astype(float)
|
||
)
|
||
|
||
if len(monthly_counts) < 2:
|
||
return float("nan")
|
||
|
||
mean = monthly_counts.mean()
|
||
std = monthly_counts.std(ddof=1)
|
||
if mean == 0:
|
||
return float("nan")
|
||
|
||
return float(round(std / mean * 100, 2))
|
||
|
||
def peak_offpeak_ratio(self) -> float:
|
||
"""
|
||
Volume ratio between peak and off-peak hours.
|
||
|
||
We define peak as hours 10:00–19:59, rest as off-peak.
|
||
"""
|
||
df = self.df.dropna(subset=["datetime_start"]).copy()
|
||
if df.empty:
|
||
return float("nan")
|
||
|
||
df["hour"] = df["datetime_start"].dt.hour
|
||
|
||
peak_hours = list(range(10, 20))
|
||
is_peak = df["hour"].isin(peak_hours)
|
||
|
||
peak_vol = df.loc[is_peak, "interaction_id"].nunique()
|
||
off_vol = df.loc[~is_peak, "interaction_id"].nunique()
|
||
|
||
if off_vol == 0:
|
||
return float("inf") if peak_vol > 0 else float("nan")
|
||
|
||
return float(round(peak_vol / off_vol, 3))
|
||
|
||
def concentration_top20_skills_pct(self) -> float:
|
||
"""
|
||
% of volume concentrated in the top 20% of skills (by number of interactions).
|
||
"""
|
||
counts = (
|
||
self.df.groupby("queue_skill")["interaction_id"].nunique().sort_values(
|
||
ascending=False
|
||
)
|
||
)
|
||
|
||
n_skills = len(counts)
|
||
if n_skills == 0:
|
||
return float("nan")
|
||
|
||
top_n = max(1, int(np.ceil(0.2 * n_skills)))
|
||
top_vol = counts.head(top_n).sum()
|
||
total = counts.sum()
|
||
|
||
if total == 0:
|
||
return float("nan")
|
||
|
||
return float(round(top_vol / total * 100, 2))
|
||
|
||
# ------------------------------------------------------------------ #
|
||
# Plots
|
||
# ------------------------------------------------------------------ #
|
||
def plot_heatmap_24x7(self) -> Axes:
|
||
"""
|
||
Heatmap of volume by day of week (0-6) and hour (0-23).
|
||
Returns Axes so the pipeline can save the figure.
|
||
"""
|
||
data = self.heatmap_24x7()
|
||
|
||
fig, ax = plt.subplots(figsize=(10, 4))
|
||
im = ax.imshow(data.values, aspect="auto", origin="lower")
|
||
|
||
ax.set_xticks(range(24))
|
||
ax.set_xticklabels([str(h) for h in range(24)])
|
||
|
||
ax.set_yticks(range(7))
|
||
ax.set_yticklabels(["M", "T", "W", "T", "F", "S", "S"])
|
||
|
||
|
||
ax.set_xlabel("Hour of day")
|
||
ax.set_ylabel("Day of week")
|
||
ax.set_title("Volume by day of week and hour")
|
||
|
||
plt.colorbar(im, ax=ax, label="# interactions")
|
||
|
||
return ax
|
||
|
||
def plot_channel_distribution(self) -> Axes:
|
||
"""
|
||
Volume distribution by channel.
|
||
"""
|
||
series = self.volume_by_channel()
|
||
|
||
fig, ax = plt.subplots(figsize=(6, 4))
|
||
series.plot(kind="bar", ax=ax)
|
||
|
||
ax.set_xlabel("Channel")
|
||
ax.set_ylabel("# interactions")
|
||
ax.set_title("Volume by channel")
|
||
ax.grid(axis="y", alpha=0.3)
|
||
|
||
return ax
|
||
|
||
def plot_skill_pareto(self) -> Axes:
|
||
"""
|
||
Simple Pareto chart of volume by skill (volume bars only).
|
||
"""
|
||
series = self.volume_by_skill()
|
||
|
||
fig, ax = plt.subplots(figsize=(10, 4))
|
||
series.plot(kind="bar", ax=ax)
|
||
|
||
ax.set_xlabel("Skill / Queue")
|
||
ax.set_ylabel("# interactions")
|
||
ax.set_title("Pareto chart of volume by skill")
|
||
ax.grid(axis="y", alpha=0.3)
|
||
|
||
plt.xticks(rotation=45, ha="right")
|
||
|
||
return ax
|