from __future__ import annotations from dataclasses import dataclass from typing import List import numpy as np import pandas as pd import matplotlib.pyplot as plt from matplotlib.axes import Axes REQUIRED_COLUMNS_VOLUMETRIA: List[str] = [ "interaction_id", "datetime_start", "queue_skill", "channel", ] @dataclass class VolumetriaMetrics: """ Volumetry metrics based on the new data schema. Minimum required columns: - interaction_id - datetime_start - queue_skill - channel Other columns may exist but are not required for these metrics. """ df: pd.DataFrame def __post_init__(self) -> None: self._validate_columns() self._prepare_data() # ------------------------------------------------------------------ # # Internal helpers # ------------------------------------------------------------------ # def _validate_columns(self) -> None: missing = [c for c in REQUIRED_COLUMNS_VOLUMETRIA if c not in self.df.columns] if missing: raise ValueError( f"Missing required columns for VolumetriaMetrics: {missing}" ) def _prepare_data(self) -> None: df = self.df.copy() # Ensure datetime type df["datetime_start"] = pd.to_datetime(df["datetime_start"], errors="coerce") # Normalize strings df["queue_skill"] = df["queue_skill"].astype(str).str.strip() df["channel"] = df["channel"].astype(str).str.strip() # Store the prepared dataframe self.df = df # ------------------------------------------------------------------ # # Useful properties # ------------------------------------------------------------------ # @property def is_empty(self) -> bool: return self.df.empty # ------------------------------------------------------------------ # # Numeric / tabular metrics # ------------------------------------------------------------------ # def volume_by_channel(self) -> pd.Series: """ Number of interactions by channel. """ return self.df.groupby("channel")["interaction_id"].nunique().sort_values( ascending=False ) def volume_by_skill(self) -> pd.Series: """ Number of interactions by skill / queue. """ return self.df.groupby("queue_skill")["interaction_id"].nunique().sort_values( ascending=False ) def channel_distribution_pct(self) -> pd.Series: """ Percentage distribution of volume by channel. """ counts = self.volume_by_channel() total = counts.sum() if total == 0: return counts * 0.0 return (counts / total * 100).round(2) def skill_distribution_pct(self) -> pd.Series: """ Percentage distribution of volume by skill. """ counts = self.volume_by_skill() total = counts.sum() if total == 0: return counts * 0.0 return (counts / total * 100).round(2) def heatmap_24x7(self) -> pd.DataFrame: """ Matrix [day_of_week x hour] with number of interactions. dayofweek: 0=Monday ... 6=Sunday """ df = self.df.dropna(subset=["datetime_start"]).copy() if df.empty: # Return an empty dataframe with expected index/columns idx = range(7) cols = range(24) return pd.DataFrame(0, index=idx, columns=cols) df["dow"] = df["datetime_start"].dt.dayofweek df["hour"] = df["datetime_start"].dt.hour pivot = ( df.pivot_table( index="dow", columns="hour", values="interaction_id", aggfunc="nunique", fill_value=0, ) .reindex(index=range(7), fill_value=0) .reindex(columns=range(24), fill_value=0) ) return pivot def monthly_seasonality_cv(self) -> float: """ Coefficient of variation of monthly volume. CV = std / mean (in %). """ df = self.df.dropna(subset=["datetime_start"]).copy() if df.empty: return float("nan") df["year_month"] = df["datetime_start"].dt.to_period("M") monthly_counts = ( df.groupby("year_month")["interaction_id"].nunique().astype(float) ) if len(monthly_counts) < 2: return float("nan") mean = monthly_counts.mean() std = monthly_counts.std(ddof=1) if mean == 0: return float("nan") return float(round(std / mean * 100, 2)) def peak_offpeak_ratio(self) -> float: """ Volume ratio between peak and off-peak hours. We define peak as hours 10:00–19:59, rest as off-peak. """ df = self.df.dropna(subset=["datetime_start"]).copy() if df.empty: return float("nan") df["hour"] = df["datetime_start"].dt.hour peak_hours = list(range(10, 20)) is_peak = df["hour"].isin(peak_hours) peak_vol = df.loc[is_peak, "interaction_id"].nunique() off_vol = df.loc[~is_peak, "interaction_id"].nunique() if off_vol == 0: return float("inf") if peak_vol > 0 else float("nan") return float(round(peak_vol / off_vol, 3)) def concentration_top20_skills_pct(self) -> float: """ % of volume concentrated in the top 20% of skills (by number of interactions). """ counts = ( self.df.groupby("queue_skill")["interaction_id"].nunique().sort_values( ascending=False ) ) n_skills = len(counts) if n_skills == 0: return float("nan") top_n = max(1, int(np.ceil(0.2 * n_skills))) top_vol = counts.head(top_n).sum() total = counts.sum() if total == 0: return float("nan") return float(round(top_vol / total * 100, 2)) # ------------------------------------------------------------------ # # Plots # ------------------------------------------------------------------ # def plot_heatmap_24x7(self) -> Axes: """ Heatmap of volume by day of week (0-6) and hour (0-23). Returns Axes so the pipeline can save the figure. """ data = self.heatmap_24x7() fig, ax = plt.subplots(figsize=(10, 4)) im = ax.imshow(data.values, aspect="auto", origin="lower") ax.set_xticks(range(24)) ax.set_xticklabels([str(h) for h in range(24)]) ax.set_yticks(range(7)) ax.set_yticklabels(["M", "T", "W", "T", "F", "S", "S"]) ax.set_xlabel("Hour of day") ax.set_ylabel("Day of week") ax.set_title("Volume by day of week and hour") plt.colorbar(im, ax=ax, label="# interactions") return ax def plot_channel_distribution(self) -> Axes: """ Volume distribution by channel. """ series = self.volume_by_channel() fig, ax = plt.subplots(figsize=(6, 4)) series.plot(kind="bar", ax=ax) ax.set_xlabel("Channel") ax.set_ylabel("# interactions") ax.set_title("Volume by channel") ax.grid(axis="y", alpha=0.3) return ax def plot_skill_pareto(self) -> Axes: """ Simple Pareto chart of volume by skill (volume bars only). """ series = self.volume_by_skill() fig, ax = plt.subplots(figsize=(10, 4)) series.plot(kind="bar", ax=ax) ax.set_xlabel("Skill / Queue") ax.set_ylabel("# interactions") ax.set_title("Pareto chart of volume by skill") ax.grid(axis="y", alpha=0.3) plt.xticks(rotation=45, ha="right") return ax