BeyondCX_Insights/config/settings.yaml

# ============================================
# CXInsights - Settings Configuration
# ============================================
# Non-secret configuration values
# Secrets (API keys) go in .env
# ============================================

# ============================================
# GENERAL
# ============================================

project:
  name: "CXInsights"
  version: "0.1.0"
  language: "es"  # Primary language for analysis

# ============================================
# BATCH PROCESSING
# ============================================

batch:
  # Maximum calls per batch (cost protection)
  max_calls: 5000

  # Maximum audio minutes per batch (cost protection)
  max_audio_minutes: 40000

  # Default AHT assumption for cost estimation (minutes)
  default_aht_minutes: 7

# ============================================
# TRANSCRIPTION (STT)
# ============================================

transcription:
  # Default provider
  provider: "assemblyai"

  # AssemblyAI settings
  assemblyai:
    language_code: "es"
    speaker_labels: true
    auto_chapters: false
    entity_detection: false

  # Audio validation
  audio:
    supported_formats: ["mp3", "wav", "m4a"]
    max_duration_seconds: 18000  # 5 hours
    min_duration_seconds: 30

# ============================================
# FEATURES (Deterministic Extraction)
# ============================================

features:
  # Silence detection
  silence:
    threshold_seconds: 5.0
    min_gap_seconds: 1.0

  # Turn metrics
  turn_metrics:
    min_turn_duration_seconds: 0.5
    interruption_overlap_seconds: 0.3

# ============================================
# COMPRESSION
# ============================================

compression:
  # Target token reduction percentage
  target_reduction_percent: 60

  # Max tokens after compression
  max_compressed_tokens: 2000

  # Preserve elements
  preserve:
    - customer_intent
    - agent_offers
    - objections
    - resolution_statements
    - key_timestamps

# ============================================
# INFERENCE (LLM)
# ============================================

inference:
  # Default model
  model: "gpt-4o-mini"

  # Model settings
  temperature: 0.1
  max_tokens: 4000

  # Batch processing
  batch_size: 10
  checkpoint_interval: 50

  # Retry settings
  max_retries: 5
  backoff_base: 2.0
  backoff_max: 60.0

  # Response validation
  require_evidence: true
  min_evidence_spans: 1

# ============================================
# VALIDATION (Quality Gate)
# ============================================

validation:
  # Confidence thresholds
  confidence:
    accept: 0.6
    review: 0.4
    reject: 0.3

  # Evidence requirements
  evidence:
    required: true
    min_spans: 1
    max_span_length_chars: 500

  # Schema validation
  schema:
    strict: true
    version: "1.0.0"

# ============================================
# AGGREGATION (RCA Building)
# ============================================

aggregation:
  # Minimum sample size for statistics
  min_sample_size: 10

  # Severity score calculation
  severity:
    # Weights for severity formula
    frequency_weight: 0.4
    impact_weight: 0.4
    confidence_weight: 0.2

  # RCA Tree building
  rca_tree:
    # Minimum percentage to include in tree
    min_percentage: 1.0

    # Maximum drivers per category
    max_drivers_per_category: 10

    # Include emergent in separate section
    separate_emergent: true

# ============================================
# EXPORTS
# ============================================

exports:
  # PDF Report
  pdf:
    template: "executive_summary"
    max_pages: 5
    include_charts: true

  # Excel Export
  excel:
    include_raw_data: true
    include_pivot_tables: true

  # JSON Export
  json:
    pretty_print: true
    include_metadata: true

# ============================================
# LOGGING
# ============================================

logging:
  # Log level (DEBUG, INFO, WARNING, ERROR)
  level: "INFO"

  # Log format
  format: "structured"  # "structured" or "plain"

  # Retention
  retention_days: 30
  error_retention_days: 90

  # What to log
  log_transcripts: false  # Never log full transcripts
  log_evidence_spans: true
  log_token_usage: true

# ============================================
# PROMPT VERSIONS
# ============================================

prompts:
  # Active prompt versions
  call_analysis: "v1.0"
  rca_synthesis: "v1.0"