BeyondCXAnalytics-Demo/frontend/utils/dataTransformation.ts

// utils/dataTransformation.ts
// Raw data to processed metrics transformation pipeline

import type { RawInteraction } from '../types';

/**
 * Step 1: Noise Cleanup
 * Removes interactions with duration < 10 seconds (false contacts or system errors)
 */
export function cleanNoiseFromData(interactions: RawInteraction[]): RawInteraction[] {
  const MIN_DURATION_SECONDS = 10;

  const cleaned = interactions.filter(interaction => {
    const totalDuration =
      interaction.duration_talk +
      interaction.hold_time +
      interaction.wrap_up_time;

    return totalDuration >= MIN_DURATION_SECONDS;
  });

  const removedCount = interactions.length - cleaned.length;
  const removedPercentage = ((removedCount / interactions.length) * 100).toFixed(1);

  console.log(`🧹 Noise Cleanup: ${removedCount} interactions removed (${removedPercentage}% of total)`);
  console.log(`✅ Clean interactions: ${cleaned.length}`);

  return cleaned;
}

/**
 * Base metrics calculated by skill
 */
export interface SkillBaseMetrics {
  skill: string;
  volume: number;                  // Number of interactions
  aht_mean: number;                // Average AHT (seconds)
  aht_std: number;                 // AHT standard deviation
  transfer_rate: number;           // Transfer rate (0-100)
  total_cost: number;              // Total cost (€)

  // Auxiliary data for subsequent calculations
  aht_values: number[];            // Array of all AHT values for percentiles
}

/**
 * Step 2: Calculate Base Metrics by Skill
 * Groups by skill and calculates volume, average AHT, standard deviation, transfer rate and cost
 */
export function calculateSkillBaseMetrics(
  interactions: RawInteraction[],
  costPerHour: number
): SkillBaseMetrics[] {
  const COST_PER_SECOND = costPerHour / 3600;

  // Group by skill
  const skillGroups = new Map<string, RawInteraction[]>();

  interactions.forEach(interaction => {
    const skill = interaction.queue_skill;
    if (!skillGroups.has(skill)) {
      skillGroups.set(skill, []);
    }
    skillGroups.get(skill)!.push(interaction);
  });

  // Calculate metrics per skill
  const metrics: SkillBaseMetrics[] = [];

  skillGroups.forEach((skillInteractions, skill) => {
    const volume = skillInteractions.length;

    // Calculate AHT for each interaction
    const ahtValues = skillInteractions.map(i =>
      i.duration_talk + i.hold_time + i.wrap_up_time
    );

    // Average AHT
    const ahtMean = ahtValues.reduce((sum, val) => sum + val, 0) / volume;

    // AHT standard deviation
    const variance = ahtValues.reduce((sum, val) =>
      sum + Math.pow(val - ahtMean, 2), 0
    ) / volume;
    const ahtStd = Math.sqrt(variance);

    // Transfer rate
    const transferCount = skillInteractions.filter(i => i.transfer_flag).length;
    const transferRate = (transferCount / volume) * 100;

    // Total cost
    const totalCost = ahtValues.reduce((sum, aht) =>
      sum + (aht * COST_PER_SECOND), 0
    );

    metrics.push({
      skill,
      volume,
      aht_mean: ahtMean,
      aht_std: ahtStd,
      transfer_rate: transferRate,
      total_cost: totalCost,
      aht_values: ahtValues
    });
  });

  // Sort by descending volume
  metrics.sort((a, b) => b.volume - a.volume);

  console.log(`📊 Base Metrics calculated for ${metrics.length} skills`);

  return metrics;
}

/**
 * Transformed dimensions for Agentic Readiness Score
 */
export interface SkillDimensions {
  skill: string;
  volume: number;

  // Dimension 1: Predictability (0-10)
  predictability_score: number;
  predictability_cv: number;       // Coefficient of Variation (for reference)

  // Dimension 2: Inverse Complexity (0-10)
  complexity_inverse_score: number;
  complexity_transfer_rate: number; // Transfer rate (for reference)

  // Dimension 3: Repetitiveness/Impact (0-10)
  repetitivity_score: number;

  // Auxiliary data
  aht_mean: number;
  total_cost: number;
}

/**
 * Step 3: Transform Base Metrics to Dimensions
 * Applies normalization formulas to obtain 0-10 scores
 */
export function transformToDimensions(
  baseMetrics: SkillBaseMetrics[]
): SkillDimensions[] {
  return baseMetrics.map(metric => {
    // Dimension 1: Predictability (Proxy: AHT Variability)
    // CV = standard deviation / mean
    const cv = metric.aht_std / metric.aht_mean;

    // Normalization: CV <= 0.3 → 10, CV >= 1.5 → 0
    // Formula: MAX(0, MIN(10, 10 - ((CV - 0.3) / 1.2 * 10)))
    const predictabilityScore = Math.max(0, Math.min(10,
      10 - ((cv - 0.3) / 1.2 * 10)
    ));

    // Dimension 2: Inverse Complexity (Proxy: Transfer Rate)
    // T = transfer rate (%)
    const transferRate = metric.transfer_rate;

    // Normalization: T <= 5% → 10, T >= 30% → 0
    // Formula: MAX(0, MIN(10, 10 - ((T - 0.05) / 0.25 * 10)))
    const complexityInverseScore = Math.max(0, Math.min(10,
      10 - ((transferRate / 100 - 0.05) / 0.25 * 10)
    ));

    // Dimension 3: Repetitiveness/Impact (Proxy: Volume)
    // Fixed normalization: > 5,000 calls/month = 10, < 100 = 0
    let repetitivityScore: number;
    if (metric.volume >= 5000) {
      repetitivityScore = 10;
    } else if (metric.volume <= 100) {
      repetitivityScore = 0;
    } else {
      // Linear interpolation between 100 and 5000
      repetitivityScore = ((metric.volume - 100) / (5000 - 100)) * 10;
    }

    return {
      skill: metric.skill,
      volume: metric.volume,
      predictability_score: Math.round(predictabilityScore * 10) / 10, // 1 decimal place
      predictability_cv: Math.round(cv * 100) / 100, // 2 decimal places
      complexity_inverse_score: Math.round(complexityInverseScore * 10) / 10,
      complexity_transfer_rate: Math.round(transferRate * 10) / 10,
      repetitivity_score: Math.round(repetitivityScore * 10) / 10,
      aht_mean: Math.round(metric.aht_mean),
      total_cost: Math.round(metric.total_cost)
    };
  });
}

/**
 * Final result with Agentic Readiness Score
 */
export interface SkillAgenticReadiness extends SkillDimensions {
  agentic_readiness_score: number;  // 0-10
  readiness_category: 'automate_now' | 'assist_copilot' | 'optimize_first';
  readiness_label: string;
}

/**
 * Step 4: Calculate Agentic Readiness Score
 * Weighted average of the 3 dimensions
 */
export function calculateAgenticReadinessScore(
  dimensions: SkillDimensions[],
  weights?: { predictability: number; complexity: number; repetitivity: number }
): SkillAgenticReadiness[] {
  // Default weights (adjustable)
  const w = weights || {
    predictability: 0.40,   // 40% - Most important
    complexity: 0.35,       // 35%
    repetitivity: 0.25      // 25%
  };

  return dimensions.map(dim => {
    // Weighted average
    const score =
      dim.predictability_score * w.predictability +
      dim.complexity_inverse_score * w.complexity +
      dim.repetitivity_score * w.repetitivity;

    // Categorize
    let category: 'automate_now' | 'assist_copilot' | 'optimize_first';
    let label: string;

    if (score >= 8.0) {
      category = 'automate_now';
      label = '🟢 Automate Now';
    } else if (score >= 5.0) {
      category = 'assist_copilot';
      label = '🟡 Assist / Copilot';
    } else {
      category = 'optimize_first';
      label = '🔴 Optimize First';
    }

    return {
      ...dim,
      agentic_readiness_score: Math.round(score * 10) / 10, // 1 decimal
      readiness_category: category,
      readiness_label: label
    };
  });
}

/**
 * Complete pipeline: Raw Data → Agentic Readiness Score
 */
export function transformRawDataToAgenticReadiness(
  rawInteractions: RawInteraction[],
  costPerHour: number,
  weights?: { predictability: number; complexity: number; repetitivity: number }
): SkillAgenticReadiness[] {
  console.log(`🚀 Starting transformation pipeline with ${rawInteractions.length} interactions...`);

  // Step 1: Noise cleanup
  const cleanedData = cleanNoiseFromData(rawInteractions);

  // Step 2: Calculate base metrics
  const baseMetrics = calculateSkillBaseMetrics(cleanedData, costPerHour);

  // Step 3: Transform to dimensions
  const dimensions = transformToDimensions(baseMetrics);

  // Step 4: Calculate Agentic Readiness Score
  const agenticReadiness = calculateAgenticReadinessScore(dimensions, weights);

  console.log(`✅ Pipeline completed: ${agenticReadiness.length} skills processed`);
  console.log(`📈 Distribution:`);
  const automateCount = agenticReadiness.filter(s => s.readiness_category === 'automate_now').length;
  const assistCount = agenticReadiness.filter(s => s.readiness_category === 'assist_copilot').length;
  const optimizeCount = agenticReadiness.filter(s => s.readiness_category === 'optimize_first').length;
  console.log(`   🟢 Automate Now: ${automateCount} skills`);
  console.log(`   🟡 Assist/Copilot: ${assistCount} skills`);
  console.log(`   🔴 Optimize First: ${optimizeCount} skills`);

  return agenticReadiness;
}

/**
 * Utility: Generate statistics summary
 */
export function generateTransformationSummary(
  originalCount: number,
  cleanedCount: number,
  skillsCount: number,
  agenticReadiness: SkillAgenticReadiness[]
): string {
  const removedCount = originalCount - cleanedCount;
  const removedPercentage = originalCount > 0 ? ((removedCount / originalCount) * 100).toFixed(1) : '0';

  const automateCount = agenticReadiness.filter(s => s.readiness_category === 'automate_now').length;
  const assistCount = agenticReadiness.filter(s => s.readiness_category === 'assist_copilot').length;
  const optimizeCount = agenticReadiness.filter(s => s.readiness_category === 'optimize_first').length;

  // Validate that skillsCount is not 0 to avoid division by zero
  const automatePercent = skillsCount > 0 ? ((automateCount/skillsCount)*100).toFixed(0) : '0';
  const assistPercent = skillsCount > 0 ? ((assistCount/skillsCount)*100).toFixed(0) : '0';
  const optimizePercent = skillsCount > 0 ? ((optimizeCount/skillsCount)*100).toFixed(0) : '0';

  return `
📊 Transformation Summary:
   • Original interactions: ${originalCount.toLocaleString()}
   • Noise removed: ${removedCount.toLocaleString()} (${removedPercentage}%)
   • Clean interactions: ${cleanedCount.toLocaleString()}
   • Unique skills: ${skillsCount}

🎯 Agentic Readiness:
   • 🟢 Automate Now: ${automateCount} skills (${automatePercent}%)
   • 🟡 Assist/Copilot: ${assistCount} skills (${assistPercent}%)
   • 🔴 Optimize First: ${optimizeCount} skills (${optimizePercent}%)
  `.trim();
}